attrsKey, textKey, and many test cases

This commit is contained in:
Sai1919
2016-11-08 15:56:49 +05:30
parent b52144ba5e
commit 3df34ea4ef
5 changed files with 10234 additions and 16 deletions

View File

@@ -4,9 +4,14 @@ var util = require('util')
var stream = require('stream') var stream = require('stream')
var ParserState = require('./parserState') var ParserState = require('./parserState')
var defaults = {
resourcePath: '',
emitOnNodeName: false,
attrsKey: '$',
textKey: '_'
}
function XmlParser (opts) { function XmlParser (opts) {
this.opts = opts || {} this.opts = _.defaults(opts, defaults)
this.parserState = new ParserState() this.parserState = new ParserState()
this.parser = new expat.Parser('UTF-8') this.parser = new expat.Parser('UTF-8')
// var transformOpts = { readableObjectMode: true } // var transformOpts = { readableObjectMode: true }
@@ -29,13 +34,15 @@ XmlParser.prototype.parse = function (chunk) {
var state = this.parserState var state = this.parserState
var lastIndex var lastIndex
var resourcePath = this.opts.resourcePath var resourcePath = this.opts.resourcePath
var attrsKey = this.opts.attrsKey
var textKey = this.opts.textKey
if (state.isRootNode) registerEvents() if (state.isRootNode) registerEvents()
if (typeof chunk === 'string') { if (typeof chunk === 'string') {
parser.parse('', true) if (!parser.parse('', true)) processError()
} else { } else {
parser.parse(chunk.toString()) if (!parser.parse(chunk.toString())) processError()
} }
function registerEvents () { function registerEvents () {
@@ -59,21 +66,33 @@ XmlParser.prototype.parse = function (chunk) {
}) })
parser.on('error', function (err) { parser.on('error', function (err) {
scope.emit('error', new Error(err + 'at line no:' + parser.getCurrentLineNumber() + ' on column no:' + parser.getCurrentColumnNumber())) processError(err)
}) })
parser.on('end', function () { parser.on('end', function () {
scope.emit('end')
}) })
} }
function processError (err) {
var error = ''
if (err) {
error = err
} else {
error = parser.getError()
}
scope.emit('error', new Error(error + ' at line no: ' + parser.getCurrentLineNumber()))
}
function processStartElement (name, attrs) { function processStartElement (name, attrs) {
if (!name) return if (!name) return
var obj = {} var obj = {}
if (attrs && !_.isEmpty(attrs)) obj.$ = attrs if (attrs && !_.isEmpty(attrs)) obj[attrsKey] = attrs
var tempObj = state.object var tempObj = state.object
var path = getRelativePath(name) var path = getRelativePath(name)
if (!path) { if (!path) {
if (attrs && !_.isEmpty(attrs)) state.object.$ = attrs if (attrs && !_.isEmpty(attrs)) state.object[attrsKey] = attrs
return return
} }
var tokens = path.split('.') var tokens = path.split('.')
@@ -95,7 +114,7 @@ XmlParser.prototype.parse = function (chunk) {
var rpath = resourcePath.substring(0, index) var rpath = resourcePath.substring(0, index)
if (rpath === state.currentPath) { if (rpath === state.currentPath) {
if (scope.opts.emitEventsOnNodeName) scope.emit(name, state.object) if (scope.opts.emitOnNodeName) scope.emit(name, state.object)
scope.push(state.object) scope.push(state.object)
state.object = {} state.object = {}
} }
@@ -108,8 +127,8 @@ XmlParser.prototype.parse = function (chunk) {
var path = getRelativePath() var path = getRelativePath()
var tempObj = state.object var tempObj = state.object
if (!path) { if (!path) {
if (!state.object._) state.object._ = '' if (!state.object[textKey]) state.object[textKey] = ''
state.object._ = state.object._ + text state.object[textKey] = state.object[textKey] + text
return return
} }
var tokens = path.split('.') var tokens = path.split('.')
@@ -123,8 +142,8 @@ XmlParser.prototype.parse = function (chunk) {
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1] if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
} }
var obj = tempObj[tempObj.length - 1] var obj = tempObj[tempObj.length - 1]
if (!obj._) obj._ = '' if (!obj[textKey]) obj[textKey] = ''
obj._ = obj._ + text obj[textKey] = obj[textKey] + text
} }
function checkForResourcePath (name) { function checkForResourcePath (name) {
@@ -157,10 +176,9 @@ XmlParser.prototype.parse = function (chunk) {
temp = resourcePath temp = resourcePath
} }
index = temp.indexOf('/') index = temp.indexOf('/')
temp = temp.substring(0, index) if (index !== -1) temp = temp.substring(0, index)
if (temp !== name) { if (temp !== name) {
this.end() scope.end()
} }
} }
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="utf-8"?>
<details>
<item id="1" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="2">
<subitem>three</subitem>
<subitem>four</subitem>
<subitem>five</subitem>
</item>
<item id="3" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="4" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="5" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<product id="6" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</product>
<product id="7" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</product>
<product id="8" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</product>
<product id="9" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</product>
<product id="10" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</product>
</details>

View File

@@ -1,10 +1,11 @@
var should = require('should') var should = require('should')
var fs = require('fs') var fs = require('fs')
var zlib = require('zlib')
var ParserFactory = require('../parser') var ParserFactory = require('../parser')
describe('Tests', function () { describe('Tests', function () {
describe('simple behaviour testing', function () { describe('Basic behaviour', function () {
it('should properly parse a simple file.', function (done) { it('should properly parse a simple file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml') var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item'}) var parser = new ParserFactory({resourcePath: '/items/item'})
@@ -155,4 +156,602 @@ describe('Tests', function () {
xmlStream.pipe(parser) xmlStream.pipe(parser)
}) })
}) })
describe('pause and resume', function () {
it('should properly parse a simple file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var expectedData = [
{ '$': { id: '1', test: 'hello' },
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualData = []
var dataEventCount = 0
var isSetTimeoutHappened = true
this.timeout(4000)
parser.on('data', function (data) {
actualData.push(data)
parser.pause()
isSetTimeoutHappened.should.equal(true)
setTimeout(function () {
parser.resume()
isSetTimeoutHappened = true
}, 3000)
dataEventCount++
isSetTimeoutHappened = false
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
dataEventCount.should.equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should emit data events with 1sec interval between each using pause and resume.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var dataEventCount = 0
var isSetTimeoutHappened = true
this.timeout(20000)
parser.on('data', function (data) {
parser.pause()
isSetTimeoutHappened.should.equal(true)
setTimeout(function () {
parser.resume()
isSetTimeoutHappened = true
}, 2000)
dataEventCount++
isSetTimeoutHappened = false
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(10)
done()
})
xmlStream.pipe(parser)
})
})
describe('should respect the options passed', function () {
it('should properly generate objects with $ as key for attrs and _ as key for text value of node.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var expectedData = [
{ '$': { id: '1', test: 'hello' },
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualData = []
var dataEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
dataEventCount.should.equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly generate objects with passed attrs and text keys in the options.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item', attrsKey: 'attrs', textKey: 'text'})
var expectedData = [
{ 'attrs': { id: '1', test: 'hello' },
subitem:
[ { 'attrs': { sub: 'TESTING SUB' }, text: 'one' },
{ 'attrs': { sub: '2' }, text: 'two' } ] },
{ 'attrs': { id: '2' },
subitem: [ { text: 'three' }, { text: 'four' }, { text: 'five' } ] } ]
var actualData = []
var dataEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
dataEventCount.should.equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly generate objects when special symbols are passed as attrs and text keys in the options.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item', attrsKey: '!', textKey: '%'})
var expectedData = [
{ '!': { id: '1', test: 'hello' },
subitem:
[ { '!': { sub: 'TESTING SUB' }, '%': 'one' },
{ '!': { sub: '2' }, '%': 'two' } ] },
{ '!': { id: '2' },
subitem: [ { '%': 'three' }, { '%': 'four' }, { '%': 'five' } ] } ]
var actualData = []
var dataEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
dataEventCount.should.equal(2)
done()
})
xmlStream.pipe(parser)
})
})
describe('should properly handle uncompressed files', function () {
it('should properly parse a uncompressed xml file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var gzip = zlib.createGzip()
var gunzip = zlib.createGunzip()
var dataEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(10)
done()
})
xmlStream.pipe(gzip).pipe(gunzip).pipe(parser)
})
it('should properly parse uncompressed file and go fine with pause and resume.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var gzip = zlib.createGzip()
var gunzip = zlib.createGunzip()
var dataEventCount = 0
var isSetTimeoutHappened = true
this.timeout(20000)
parser.on('data', function (data) {
parser.pause()
isSetTimeoutHappened.should.equal(true)
setTimeout(function () {
parser.resume()
isSetTimeoutHappened = true
}, 2000)
dataEventCount++
isSetTimeoutHappened = false
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(10)
done()
})
xmlStream.pipe(gzip).pipe(gunzip).pipe(parser)
})
})
describe('read method', function () {
it('should properly parse a simple file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var expectedData = [
{ '$': { id: '1', test: 'hello' },
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualData = []
var obj
var Timeout
parser.on('readable', function () {
Timeout = setInterval(function () {
if ((obj = parser.read())) actualData.push(obj)
obj = null
}, 50)
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
clearInterval(Timeout)
actualData.should.deepEqual(expectedData)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var objCount = 0
var endEventOcurred = false
parser.on('readable', function () {
read()
})
function read () {
while (parser.read()) objCount++
if (!endEventOcurred) setTimeout(read, 50)
}
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log(objCount)
objCount.should.deepEqual(296)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var objCount = 0
var endEventOcurred = false
parser.on('readable', function () {
read()
})
function read () {
while (parser.read()) objCount++
if (!endEventOcurred) setTimeout(read, 50)
}
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log(objCount)
objCount.should.deepEqual(2072)
done()
})
xmlStream.pipe(parser)
})
})
describe('Error Handling', function () {
it('should properly return error if the xml file is corrupted.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/corrupted.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var dataEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
// console.log(err)
err.message.should.equal('mismatched tag at line no: 11')
done()
})
xmlStream.pipe(parser)
})
it('should properly return error if the large xml file is corrupted.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/largeCorruptedFile.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var dataEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
// console.log(err)
err.message.should.equal('mismatched tag at line no: 8346')
done()
})
xmlStream.pipe(parser)
})
})
describe('CData and comments in xml', function () {
it('should properly parse a simple file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/CData-comments.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var dataEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(296)
done()
})
xmlStream.pipe(parser)
})
})
describe('emitOnNodeName', function () {
it('should properly emit events on node names.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item', emitOnNodeName: true})
var expectedData = [
{ '$': { id: '1', test: 'hello' },
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualData = []
var itemData = []
var dataEventCount = 0
var itemCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('item', function (item) {
itemData.push(item)
itemCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
dataEventCount.should.equal(2)
itemData.should.deepEqual(expectedData)
itemCount.should.equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly emit events on node names while parsing a medium size file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
var parser = new ParserFactory({resourcePath: '/items/item', emitOnNodeName: true})
var dataEventCount = 0
var itemCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('item', function (data) {
itemCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(10)
itemCount.should.equal(10)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory({resourcePath: '/items/item', emitOnNodeName: true})
var dataEventCount = 0
var itemCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('item', function (data) {
itemCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(296)
itemCount.should.equal(296)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory({resourcePath: '/items/item', emitOnNodeName: true})
var dataEventCount = 0
var itemCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('item', function (item) {
itemCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(2072)
itemCount.should.equal(2072)
done()
})
xmlStream.pipe(parser)
})
})
describe('wrong resourcePath', function () {
it('should be able to detect the wrong resourcePath at root level.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/wrong/noNodes', emitOnNodeName: true})
var actualData = []
var itemData = []
var dataEventCount = 0
var itemCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('item', function (item) {
itemData.push(item)
itemCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
actualData.length.should.equal(0)
dataEventCount.should.equal(0)
itemData.length.should.equal(0)
itemCount.should.equal(0)
done()
})
xmlStream.pipe(parser)
})
it('should be able to detect wrong resourcePath while parsing xml', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory({resourcePath: '/wrong/noNodes', emitOnNodeName: true})
var dataEventCount = 0
var itemCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('item', function (data) {
itemCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(0)
itemCount.should.equal(0)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory({resourcePath: '/wrong/path', emitOnNodeName: true})
var dataEventCount = 0
var itemCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('item', function (item) {
itemCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(0)
itemCount.should.equal(0)
done()
})
xmlStream.pipe(parser)
})
})
}) })