first commit: add entire code base
This commit is contained in:
40
package.json
Normal file
40
package.json
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"version": "0.0.1",
|
||||
"name": "xml-streamer",
|
||||
"description": "XML stream parser for parsing large files efficiently with less usage of memory.",
|
||||
"author": {
|
||||
"name": "Sai Teja",
|
||||
"email": "saitejas464@gmail.com"
|
||||
},
|
||||
"keywords": [
|
||||
"xml",
|
||||
"xml streaming",
|
||||
"xml streamer",
|
||||
"streaming",
|
||||
"xml parser",
|
||||
"xml parsing",
|
||||
"xml2js",
|
||||
"xmltojs"
|
||||
],
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/Sai1919/xml-streamer"
|
||||
},
|
||||
"dependencies": {
|
||||
"node-expat": "2.3.15",
|
||||
"lodash": "4.16.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"mocha": "3.1.2",
|
||||
"should": "11.1.1"
|
||||
},
|
||||
"optionalDependencies": {},
|
||||
"main": "./parser",
|
||||
"maintainers": [
|
||||
{
|
||||
"name": "Sai Teja",
|
||||
"email": "saitejas464@gmail.com"
|
||||
}
|
||||
]
|
||||
}
|
||||
155
parser.js
Normal file
155
parser.js
Normal file
@@ -0,0 +1,155 @@
|
||||
var expat = require('node-expat')
|
||||
var _ = require('lodash')
|
||||
var ParserState = require('./parserState')
|
||||
|
||||
function XmlParser (xmlStream, opts) {
|
||||
this.opts = opts || {}
|
||||
this.parserState = new ParserState()
|
||||
this.parser = new expat.Parser('UTF-8')
|
||||
var scope = this
|
||||
this.parser.pause = function () {
|
||||
xmlStream.pause()
|
||||
scope.parser.stop()
|
||||
}
|
||||
this.parser.restart = function () {
|
||||
scope.parser.resume()
|
||||
xmlStream.resume()
|
||||
}
|
||||
process.nextTick(function () {
|
||||
parse.call(scope, xmlStream)
|
||||
})
|
||||
return this.parser
|
||||
}
|
||||
|
||||
function parse (xmlStream) {
|
||||
if (!this.opts.resourcePath) this.parser.emit('error', new Error('resourcePath missing'))
|
||||
var scope = this
|
||||
var parser = scope.parser
|
||||
var state = this.parserState
|
||||
var lastIndex
|
||||
var resourcePath = this.opts.resourcePath
|
||||
|
||||
parser.on('startElement', function (name, attrs) {
|
||||
if (state.isRootNode) validateResourcePath(name)
|
||||
state.currentPath = state.currentPath + '/' + name
|
||||
checkForResourcePath(name)
|
||||
if (state.isPathfound) processStartElement(name, attrs)
|
||||
})
|
||||
|
||||
parser.on('endElement', function (name) {
|
||||
state.lastEndedNode = name
|
||||
lastIndex = state.currentPath.lastIndexOf('/' + name)
|
||||
state.currentPath = state.currentPath.substring(0, lastIndex)
|
||||
if (state.isPathfound) processEndElement(name)
|
||||
checkForResourcePath(name)
|
||||
})
|
||||
|
||||
parser.on('text', function (text) {
|
||||
if (state.isPathfound) processText(text)
|
||||
})
|
||||
|
||||
parser.on('end', function () {
|
||||
parser.emit('finish')
|
||||
})
|
||||
|
||||
function processStartElement (name, attrs) {
|
||||
if (!name) return
|
||||
var obj = {}
|
||||
if (attrs && !_.isEmpty(attrs)) obj.$ = attrs
|
||||
var tempObj = state.object
|
||||
var path = getRelativePath(name)
|
||||
if (!path) {
|
||||
if (attrs && !_.isEmpty(attrs)) state.object.$ = attrs
|
||||
return
|
||||
}
|
||||
var tokens = path.split('.')
|
||||
|
||||
for (var i = 0; i < tokens.length; i++) {
|
||||
if (tempObj[tokens[i]]) {
|
||||
tempObj = tempObj[tokens[i]]
|
||||
} else {
|
||||
tempObj[tokens[i]] = []
|
||||
tempObj = tempObj[tokens[i]]
|
||||
}
|
||||
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
|
||||
}
|
||||
tempObj.push(obj)
|
||||
}
|
||||
|
||||
function processEndElement (name) {
|
||||
var index = resourcePath.lastIndexOf('/')
|
||||
var rpath = resourcePath.substring(0, index)
|
||||
|
||||
if (rpath === state.currentPath) {
|
||||
if (scope.opts.emitEventsOnNodeName) parser.emit(name, state.object)
|
||||
parser.emit('data', state.object)
|
||||
state.object = {}
|
||||
}
|
||||
}
|
||||
|
||||
function processText (text) {
|
||||
if (!text || !/\S/.test(text)) {
|
||||
return
|
||||
}
|
||||
var path = getRelativePath()
|
||||
var tempObj = state.object
|
||||
if (!path) {
|
||||
if (!state.object._) state.object._ = ''
|
||||
state.object._ = state.object._ + text
|
||||
return
|
||||
}
|
||||
var tokens = path.split('.')
|
||||
for (var i = 0; i < tokens.length; i++) {
|
||||
if (tempObj[tokens[i]]) {
|
||||
tempObj = tempObj[tokens[i]]
|
||||
} else {
|
||||
tempObj[tokens[i]] = []
|
||||
tempObj = tempObj[tokens[i]]
|
||||
}
|
||||
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
|
||||
}
|
||||
var obj = tempObj[tempObj.length - 1]
|
||||
if (!obj._) obj._ = ''
|
||||
obj._ = obj._ + text
|
||||
}
|
||||
|
||||
function checkForResourcePath (name) {
|
||||
if (state.currentPath.indexOf(resourcePath) === 0) {
|
||||
state.isPathfound = true
|
||||
} else {
|
||||
state.isPathfound = false
|
||||
}
|
||||
}
|
||||
|
||||
function getRelativePath () {
|
||||
var xpath = state.currentPath.substring(resourcePath.length)
|
||||
|
||||
if (!xpath) return
|
||||
if (xpath[0] === '/') xpath = xpath.substring(1)
|
||||
var tokens = xpath.split('/')
|
||||
var jsonPath = tokens.join('.')
|
||||
return jsonPath
|
||||
}
|
||||
|
||||
function validateResourcePath (name) {
|
||||
var temp
|
||||
var index
|
||||
|
||||
state.isRootNode = false
|
||||
|
||||
if (resourcePath[0] === '/') {
|
||||
temp = resourcePath.substring(1, resourcePath.length)
|
||||
} else {
|
||||
temp = resourcePath
|
||||
}
|
||||
index = temp.indexOf('/')
|
||||
temp = temp.substring(0, index)
|
||||
|
||||
if (temp !== name) {
|
||||
xmlStream.end()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = XmlParser
|
||||
|
||||
12
parserState.js
Normal file
12
parserState.js
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
function ParserState () {
|
||||
this.currentPath = ''
|
||||
this.lastEndedNode = ''
|
||||
this.isPathfound = false
|
||||
this.object = {}
|
||||
this.buffer = []
|
||||
this.paused = false
|
||||
this.isRootNode = true
|
||||
}
|
||||
|
||||
module.exports = ParserState
|
||||
11
test/TestFiles/corrupted.xml
Normal file
11
test/TestFiles/corrupted.xml
Normal file
@@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<items >
|
||||
<item id="1" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
<item id="2">
|
||||
<subitem>three</subitem>
|
||||
<subitem>four</subitem>
|
||||
<subitem>five</subitem>
|
||||
</item>
|
||||
</items>
|
||||
12
test/TestFiles/item.xml
Normal file
12
test/TestFiles/item.xml
Normal file
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<items>
|
||||
<item id="1" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="2">
|
||||
<subitem>three</subitem>
|
||||
<subitem>four</subitem>
|
||||
<subitem>five</subitem>
|
||||
</item>
|
||||
</items>
|
||||
1195
test/TestFiles/manyItems.xml
Normal file
1195
test/TestFiles/manyItems.xml
Normal file
File diff suppressed because it is too large
Load Diff
44
test/TestFiles/medium.xml
Normal file
44
test/TestFiles/medium.xml
Normal file
@@ -0,0 +1,44 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<items>
|
||||
<item id="1" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="2">
|
||||
<subitem>three</subitem>
|
||||
<subitem>four</subitem>
|
||||
<subitem>five</subitem>
|
||||
</item>
|
||||
<item id="3" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="4" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="5" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="6" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="7" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="8" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="9" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
<item id="10" test= 'hello'>
|
||||
<subitem sub= "TESTING SUB">one</subitem>
|
||||
<subitem sub= "2">two</subitem>
|
||||
</item>
|
||||
</items>
|
||||
84
test/test.js
Normal file
84
test/test.js
Normal file
@@ -0,0 +1,84 @@
|
||||
var should = require('should')
|
||||
var fs = require('fs')
|
||||
|
||||
var ParserFactory = require('../parser')
|
||||
|
||||
describe('Tests', function () {
|
||||
describe('simple behaviour testing', function () {
|
||||
it('should properly parse a simple file.', function (done) {
|
||||
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
|
||||
var parser = new ParserFactory(xmlStream, {resourcePath: '/items/item'})
|
||||
var expectedData = [
|
||||
{ '$': { id: '1', test: 'hello' },
|
||||
subitem:
|
||||
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
|
||||
{ '$': { sub: '2' }, _: 'two' } ] },
|
||||
{ '$': { id: '2' },
|
||||
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
|
||||
var actualData = []
|
||||
var dataEventCount = 0
|
||||
|
||||
parser.on('data', function (data) {
|
||||
actualData.push(data)
|
||||
dataEventCount++
|
||||
})
|
||||
|
||||
parser.on('error', function (err) {
|
||||
done(err)
|
||||
})
|
||||
|
||||
parser.on('end', function () {
|
||||
// console.log('actualData=', actualData)
|
||||
// console.log('dataEventCount=', dataEventCount)
|
||||
actualData.should.deepEqual(expectedData)
|
||||
dataEventCount.should.equal(2)
|
||||
done()
|
||||
})
|
||||
xmlStream.pipe(parser)
|
||||
})
|
||||
|
||||
it('should properly parse a medium size file.', function (done) {
|
||||
var xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
|
||||
var parser = new ParserFactory(xmlStream, {resourcePath: '/items/item'})
|
||||
|
||||
var dataEventCount = 0
|
||||
|
||||
parser.on('data', function (data) {
|
||||
dataEventCount++
|
||||
})
|
||||
|
||||
parser.on('error', function (err) {
|
||||
done(err)
|
||||
})
|
||||
|
||||
parser.on('end', function () {
|
||||
// console.log('dataEventCount=', dataEventCount)
|
||||
dataEventCount.should.equal(10)
|
||||
done()
|
||||
})
|
||||
xmlStream.pipe(parser)
|
||||
})
|
||||
|
||||
it('should properly parse a file containing many nodes.', function (done) {
|
||||
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
|
||||
var parser = new ParserFactory(xmlStream, {resourcePath: '/items/item'})
|
||||
|
||||
var dataEventCount = 0
|
||||
|
||||
parser.on('data', function (data) {
|
||||
dataEventCount++
|
||||
})
|
||||
|
||||
parser.on('error', function (err) {
|
||||
done(err)
|
||||
})
|
||||
|
||||
parser.on('end', function () {
|
||||
// console.log('dataEventCount=', dataEventCount)
|
||||
dataEventCount.should.equal(296)
|
||||
done()
|
||||
})
|
||||
xmlStream.pipe(parser)
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user