add supported for listening on interested nodes

This commit is contained in:
Sai1919
2016-11-10 20:39:54 +05:30
parent 9e366aa489
commit 4649d3749c
8 changed files with 9023 additions and 47 deletions

114
parser.js
View File

@@ -17,11 +17,22 @@ function XmlParser (opts) {
// var transformOpts = { readableObjectMode: true }
stream.Transform.call(this)
this._readableState.objectMode = true
var scope = this
process.nextTick(function () { scope.checkForInterestedNodeListeners() })
}
util.inherits(XmlParser, stream.Transform)
XmlParser.prototype.checkForInterestedNodeListeners = function () {
var ignore = [ 'end', 'prefinish', 'data', 'error' ]
var eventNames = Object.keys(this._events)
for (var i = 0; i < eventNames.length; i++) {
if (_.includes(ignore, eventNames[i], 0)) continue
this.parserState.interestedNodes.push(eventNames[i])
}
}
XmlParser.prototype._transform = function (chunk, encoding, callback) {
if (!this.opts.resourcePath) this.emit('error', new Error('resourcePath missing'))
if (encoding !== 'buffer') this.emit('error', new Error('unsupported encoding'))
this.parse(chunk)
@@ -36,6 +47,7 @@ XmlParser.prototype.parse = function (chunk) {
var resourcePath = this.opts.resourcePath
var attrsKey = this.opts.attrsKey
var textKey = this.opts.textKey
var interestedNodes = state.interestedNodes
if (state.isRootNode) registerEvents()
@@ -87,6 +99,7 @@ XmlParser.prototype.parse = function (chunk) {
function processStartElement (name, attrs) {
if (!name) return
var obj = {}
if (attrs && !_.isEmpty(attrs)) obj[attrsKey] = attrs
var tempObj = state.object
@@ -110,16 +123,46 @@ XmlParser.prototype.parse = function (chunk) {
}
function processEndElement (name) {
var index = resourcePath.lastIndexOf('/')
var rpath = resourcePath.substring(0, index)
if (resourcePath) {
var index = resourcePath.lastIndexOf('/')
var rpath = resourcePath.substring(0, index)
if (rpath === state.currentPath) {
if (scope.opts.emitOnNodeName) scope.emit(name, state.object)
scope.push(state.object)
state.object = {}
if (rpath === state.currentPath) {
if (scope.opts.emitOnNodeName) scope.emit(name, state.object)
scope.push(state.object)
state.object = {}
}
} else {
if (_.includes(interestedNodes, name, 0)) {
emitInterestedNode(name)
if (state.firstFoundNode === name) {
state.object = {}
state.firstFoundNode = ''
state.isPathfound = false
}
}
}
}
function emitInterestedNode (name) {
var index
var xpath
var pathTokens
xpath = state.currentPath.substring(1)
pathTokens = xpath.split('/')
pathTokens.push(name)
index = pathTokens.indexOf(state.firstFoundNode)
pathTokens = _.drop(pathTokens, index + 1)
var tempObj = state.object
for (var i = 0; i < pathTokens.length; i++) {
tempObj = tempObj[pathTokens[i]]
}
if (Array.isArray(tempObj)) tempObj = tempObj[tempObj.length - 1]
scope.emit(name, tempObj)
scope.push(tempObj)
}
function processText (text) {
if (!text || !/\S/.test(text)) {
return
@@ -147,20 +190,41 @@ XmlParser.prototype.parse = function (chunk) {
}
function checkForResourcePath (name) {
if (state.currentPath.indexOf(resourcePath) === 0) {
state.isPathfound = true
if (resourcePath) {
if (state.currentPath.indexOf(resourcePath) === 0) {
state.isPathfound = true
} else {
state.isPathfound = false
}
} else {
state.isPathfound = false
if (_.includes(interestedNodes, name, 0)) {
state.isPathfound = true
if (!state.firstFoundNode) {
state.firstFoundNode = name
}
}
}
}
function getRelativePath () {
var xpath = state.currentPath.substring(resourcePath.length)
var tokens
var jsonPath
var index
if (!xpath) return
if (xpath[0] === '/') xpath = xpath.substring(1)
var tokens = xpath.split('/')
var jsonPath = tokens.join('.')
if (resourcePath) {
var xpath = state.currentPath.substring(resourcePath.length)
if (!xpath) return
if (xpath[0] === '/') xpath = xpath.substring(1)
tokens = xpath.split('/')
jsonPath = tokens.join('.')
} else {
xpath = state.currentPath.substring(1)
tokens = xpath.split('/')
index = tokens.indexOf(state.firstFoundNode)
tokens = _.drop(tokens, index + 1)
jsonPath = tokens.join('.')
}
return jsonPath
}
@@ -170,15 +234,17 @@ XmlParser.prototype.parse = function (chunk) {
state.isRootNode = false
if (resourcePath[0] === '/') {
temp = resourcePath.substring(1, resourcePath.length)
} else {
temp = resourcePath
}
index = temp.indexOf('/')
if (index !== -1) temp = temp.substring(0, index)
if (temp !== name) {
scope.end()
if (resourcePath) {
if (resourcePath[0] === '/') {
temp = resourcePath.substring(1, resourcePath.length)
} else {
temp = resourcePath
}
index = temp.indexOf('/')
if (index !== -1) temp = temp.substring(0, index)
if (temp !== name) {
scope.end()
}
}
}
}

View File

@@ -4,9 +4,10 @@ function ParserState () {
this.lastEndedNode = ''
this.isPathfound = false
this.object = {}
this.buffer = []
this.paused = false
this.isRootNode = true
this.firstFoundNode = ''
this.interestedNodes = []
}
module.exports = ParserState

View File

@@ -0,0 +1 @@
</items>

View File

@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<items>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="utf-8"?>
<items>
<item id="1" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="2">
<item>three</item>
<subitem>four</subitem>
<subitem>five</subitem>
</item>
<item id="3" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="4" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<item sub= "2">two</item>
</item>
<item id="5" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<item sub= "2">two</item>
</item>
<item id="6" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="7" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="8" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="9" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="10" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<item sub= "2">two</item>
</item>
</items>

View File

@@ -0,0 +1,46 @@
<?xml version="1.0" encoding="utf-8"?>
<items>
<item id="1" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="2">
<item>three</item>
<subitem>four</subitem>
<subitem>five</subitem>
</item>
<item id="3" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="4" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<item sub= "2">two</item>
</item>
<item id="5" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<item sub= "2">two</item>
</item>
<item id="6" test= 'hello'>
<item id= "6a">
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
</item>
<item id="7" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="8" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="9" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="10" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<item sub= "2">two</item>
</item>
</items>

View File

@@ -1,6 +1,7 @@
var should = require('should')
var fs = require('fs')
var zlib = require('zlib')
var stream = require('stream')
var ParserFactory = require('../parser')
@@ -114,28 +115,6 @@ describe('Tests', function () {
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
// console.log(parser)
var dataEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(296)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
@@ -755,4 +734,497 @@ describe('Tests', function () {
xmlStream.pipe(parser)
})
})
describe('interested Nodes', function () {
it('should properly parse a simple file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory()
var expectedData =
[
{ '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' },
{ '$': { id: '1', test: 'hello' },
subitem: [ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ]
},
{ _: 'three' },
{ _: 'four' },
{ _: 'five' },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] }
]
var actualData = []
var dataEventCount = 0
var expectedItems = [
{ '$': { id: '1', test: 'hello' },
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualItems = []
var actualSubitems = []
var expectedSubitems = [
{ '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' },
{ _: 'three' },
{ _: 'four' },
{ _: 'five' }
]
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('item', function (item) {
actualItems.push(item)
})
parser.on('subitem', function (subitem) {
actualSubitems.push(subitem)
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
actualItems.should.deepEqual(expectedItems)
actualSubitems.should.deepEqual(expectedSubitems)
actualSubitems.length.should.equal(5)
actualItems.length.should.equal(2)
dataEventCount.should.equal(7)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a medium size file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
dataEventCount.should.equal(31)
itemEventCount.should.equal(10)
subitemEventCount.should.equal(21)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
itemEventCount.should.equal(296)
subitemEventCount.should.equal(600)
dataEventCount.should.equal(896)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a xml simple file in which nodes contain text values randomly.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/randomText.xml')
var parser = new ParserFactory()
var expectedData =
[
{ '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' },
{ '$': { id: '1', test: 'hello' }, _: ' item one two',
subitem: [ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ]
},
{ _: 'three' },
{ _: 'four' },
{ _: 'five' },
{ '$': { id: '2' }, '_': ' item one two three four',
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] }
]
var expectedItems = [
{ '$': { id: '1', test: 'hello' }, _: ' item one two',
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' }, '_': ' item one two three four',
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualItems = []
var actualSubitems = []
var expectedSubitems = [
{ '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' },
{ _: 'three' },
{ _: 'four' },
{ _: 'five' }
]
var actualData = []
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
actualItems.push(item)
})
parser.on('subitem', function (subitem) {
subitemEventCount++
actualSubitems.push(subitem)
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
actualData.should.deepEqual(expectedData)
actualItems.should.deepEqual(expectedItems)
actualSubitems.should.deepEqual(expectedSubitems)
dataEventCount.should.equal(7)
itemEventCount.should.equal(2)
subitemEventCount.should.equal(5)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
dataEventCount.should.equal(6272)
itemEventCount.should.equal(2072)
subitemEventCount.should.equal(4200)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a simple file and return when root element when listening on it.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory()
var expectedData =
[{ 'item': [{ '$': { 'id': '1', 'test': 'hello' },
'subitem': [{ '$': { 'sub': 'TESTING SUB' }, '_': 'one' },
{ '$': { 'sub': '2' }, '_': 'two' }]
},
{ '$': { 'id': '2' }, 'subitem': [{ '_': 'three' }, { '_': 'four' },
{ '_': 'five' }]
}]
}]
var actualData = []
var dataEventCount = 0
var itemsEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('items', function (item) {
itemsEventCount++
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemsEventCount)
actualData.should.deepEqual(expectedData)
itemsEventCount.should.equal(1)
dataEventCount.should.equal(1)
done()
})
xmlStream.pipe(parser)
})
})
describe('performance testing', function () {
it('should properly parse more than 500 MB of file.', function (done) {
var firstChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/firstChunk.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
// var wsStream = fs.createWriteStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
// var rsStream = fs.createReadStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
var dataEventCount = 0
// var maxRSSMemoryTaken = 0
// var rss
var startTime = Date.now()
var xmlStream = new stream.Readable()
xmlStream._read = function noop () {}
var dataChunk
this.timeout(900000)
xmlStream.push(firstChunk)
for (var i = 0; i < 2200; i++) {
dataChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/repetitiveChunk.xml')
xmlStream.push(dataChunk)
}
var endingChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/endingChunk.xml')
xmlStream.push(endingChunk)
xmlStream.push(null)
parser.on('data', function (data) {
// rss = process.memoryUsage().rss
// if (rss > maxRSSMemoryTaken) maxRSSMemoryTaken = rss
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('RSS memory=', rss)
var TimeTaken = Date.now() - startTime
// console.log('time taken=', TimeTaken)
TimeTaken.should.be.belowOrEqual(300000)
dataEventCount.should.equal(4558400)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse more than 1 GB of file.', function (done) {
var firstChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/firstChunk.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
// var wsStream = fs.createWriteStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
// var rsStream = fs.createReadStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
var dataEventCount = 0
// var maxRSSMemoryTaken = 0
// var rss
var startTime = Date.now()
var xmlStream = new stream.Readable()
xmlStream._read = function noop () {}
var dataChunk
this.timeout(900000)
xmlStream.push(firstChunk)
for (var i = 0; i < 4400; i++) {
dataChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/repetitiveChunk.xml')
xmlStream.push(dataChunk)
}
var endingChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/endingChunk.xml')
xmlStream.push(endingChunk)
xmlStream.push(null)
parser.on('data', function (data) {
// rss = process.memoryUsage().rss
// if (rss > maxRSSMemoryTaken) maxRSSMemoryTaken = rss
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('RSS memory=', rss)
var TimeTaken = Date.now() - startTime
// console.log('time taken=', TimeTaken)
TimeTaken.should.be.belowOrEqual(700000)
dataEventCount.should.equal(9116800)
done()
})
xmlStream.pipe(parser)
})
})
describe('nodes with same names', function () {
it('should properly parse a simple file containing nodes with same names.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNames.xml')
var parser = new ParserFactory()
var actualData = []
var actualItems = []
var dataEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('item', function (item) {
actualItems.push(item)
})
parser.on('end', function () {
actualItems.length.should.equal(18)
dataEventCount.should.equal(18)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a simple file containing nodes with same names and emit events on multiple nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNames.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
itemEventCount.should.equal(18)
subitemEventCount.should.equal(13)
dataEventCount.should.equal(31)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a medium size file with same names randomly.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNamesRandomly.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
dataEventCount.should.equal(32)
itemEventCount.should.equal(19)
subitemEventCount.should.equal(13)
done()
})
xmlStream.pipe(parser)
})
})
})