first commit: add entire code base

This commit is contained in:
Sai1919
2016-11-07 01:54:36 +05:30
parent 1b155e745c
commit cf1148b879
8 changed files with 1553 additions and 0 deletions

40
package.json Normal file
View File

@@ -0,0 +1,40 @@
{
"version": "0.0.1",
"name": "xml-streamer",
"description": "XML stream parser for parsing large files efficiently with less usage of memory.",
"author": {
"name": "Sai Teja",
"email": "saitejas464@gmail.com"
},
"keywords": [
"xml",
"xml streaming",
"xml streamer",
"streaming",
"xml parser",
"xml parsing",
"xml2js",
"xmltojs"
],
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/Sai1919/xml-streamer"
},
"dependencies": {
"node-expat": "2.3.15",
"lodash": "4.16.6"
},
"devDependencies": {
"mocha": "3.1.2",
"should": "11.1.1"
},
"optionalDependencies": {},
"main": "./parser",
"maintainers": [
{
"name": "Sai Teja",
"email": "saitejas464@gmail.com"
}
]
}

155
parser.js Normal file
View File

@@ -0,0 +1,155 @@
var expat = require('node-expat')
var _ = require('lodash')
var ParserState = require('./parserState')
function XmlParser (xmlStream, opts) {
this.opts = opts || {}
this.parserState = new ParserState()
this.parser = new expat.Parser('UTF-8')
var scope = this
this.parser.pause = function () {
xmlStream.pause()
scope.parser.stop()
}
this.parser.restart = function () {
scope.parser.resume()
xmlStream.resume()
}
process.nextTick(function () {
parse.call(scope, xmlStream)
})
return this.parser
}
function parse (xmlStream) {
if (!this.opts.resourcePath) this.parser.emit('error', new Error('resourcePath missing'))
var scope = this
var parser = scope.parser
var state = this.parserState
var lastIndex
var resourcePath = this.opts.resourcePath
parser.on('startElement', function (name, attrs) {
if (state.isRootNode) validateResourcePath(name)
state.currentPath = state.currentPath + '/' + name
checkForResourcePath(name)
if (state.isPathfound) processStartElement(name, attrs)
})
parser.on('endElement', function (name) {
state.lastEndedNode = name
lastIndex = state.currentPath.lastIndexOf('/' + name)
state.currentPath = state.currentPath.substring(0, lastIndex)
if (state.isPathfound) processEndElement(name)
checkForResourcePath(name)
})
parser.on('text', function (text) {
if (state.isPathfound) processText(text)
})
parser.on('end', function () {
parser.emit('finish')
})
function processStartElement (name, attrs) {
if (!name) return
var obj = {}
if (attrs && !_.isEmpty(attrs)) obj.$ = attrs
var tempObj = state.object
var path = getRelativePath(name)
if (!path) {
if (attrs && !_.isEmpty(attrs)) state.object.$ = attrs
return
}
var tokens = path.split('.')
for (var i = 0; i < tokens.length; i++) {
if (tempObj[tokens[i]]) {
tempObj = tempObj[tokens[i]]
} else {
tempObj[tokens[i]] = []
tempObj = tempObj[tokens[i]]
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
}
tempObj.push(obj)
}
function processEndElement (name) {
var index = resourcePath.lastIndexOf('/')
var rpath = resourcePath.substring(0, index)
if (rpath === state.currentPath) {
if (scope.opts.emitEventsOnNodeName) parser.emit(name, state.object)
parser.emit('data', state.object)
state.object = {}
}
}
function processText (text) {
if (!text || !/\S/.test(text)) {
return
}
var path = getRelativePath()
var tempObj = state.object
if (!path) {
if (!state.object._) state.object._ = ''
state.object._ = state.object._ + text
return
}
var tokens = path.split('.')
for (var i = 0; i < tokens.length; i++) {
if (tempObj[tokens[i]]) {
tempObj = tempObj[tokens[i]]
} else {
tempObj[tokens[i]] = []
tempObj = tempObj[tokens[i]]
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
}
var obj = tempObj[tempObj.length - 1]
if (!obj._) obj._ = ''
obj._ = obj._ + text
}
function checkForResourcePath (name) {
if (state.currentPath.indexOf(resourcePath) === 0) {
state.isPathfound = true
} else {
state.isPathfound = false
}
}
function getRelativePath () {
var xpath = state.currentPath.substring(resourcePath.length)
if (!xpath) return
if (xpath[0] === '/') xpath = xpath.substring(1)
var tokens = xpath.split('/')
var jsonPath = tokens.join('.')
return jsonPath
}
function validateResourcePath (name) {
var temp
var index
state.isRootNode = false
if (resourcePath[0] === '/') {
temp = resourcePath.substring(1, resourcePath.length)
} else {
temp = resourcePath
}
index = temp.indexOf('/')
temp = temp.substring(0, index)
if (temp !== name) {
xmlStream.end()
}
}
}
module.exports = XmlParser

12
parserState.js Normal file
View File

@@ -0,0 +1,12 @@
function ParserState () {
this.currentPath = ''
this.lastEndedNode = ''
this.isPathfound = false
this.object = {}
this.buffer = []
this.paused = false
this.isRootNode = true
}
module.exports = ParserState

View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="utf-8"?>
<items >
<item id="1" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
<item id="2">
<subitem>three</subitem>
<subitem>four</subitem>
<subitem>five</subitem>
</item>
</items>

12
test/TestFiles/item.xml Normal file
View File

@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<items>
<item id="1" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="2">
<subitem>three</subitem>
<subitem>four</subitem>
<subitem>five</subitem>
</item>
</items>

1195
test/TestFiles/manyItems.xml Normal file

File diff suppressed because it is too large Load Diff

44
test/TestFiles/medium.xml Normal file
View File

@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="utf-8"?>
<items>
<item id="1" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="2">
<subitem>three</subitem>
<subitem>four</subitem>
<subitem>five</subitem>
</item>
<item id="3" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="4" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="5" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="6" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="7" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="8" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="9" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="10" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
</items>

84
test/test.js Normal file
View File

@@ -0,0 +1,84 @@
var should = require('should')
var fs = require('fs')
var ParserFactory = require('../parser')
describe('Tests', function () {
describe('simple behaviour testing', function () {
it('should properly parse a simple file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory(xmlStream, {resourcePath: '/items/item'})
var expectedData = [
{ '$': { id: '1', test: 'hello' },
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualData = []
var dataEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
dataEventCount.should.equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a medium size file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
var parser = new ParserFactory(xmlStream, {resourcePath: '/items/item'})
var dataEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(10)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory(xmlStream, {resourcePath: '/items/item'})
var dataEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(296)
done()
})
xmlStream.pipe(parser)
})
})
})