40 Commits

Author SHA1 Message Date
Sai1919
c742d92e32 bump version to 0.2.3 from 0.2.1 2018-12-03 16:25:05 +05:30
Sai1919
7fe1a03d98 upgrade lodash to fix vunerability 2018-12-03 15:53:10 +05:30
Sai Teja
8b6cf41277 update readme 2017-04-01 12:00:04 -07:00
Sai Teja
5da96b5528 0.2.1 2017-04-01 11:51:45 -07:00
Sai Teja
daa7301de0 changes to readme 2017-04-01 11:50:07 -07:00
Sai Teja
3bdb46828a 0.2.0 2017-04-01 11:30:56 -07:00
Sai Teja
42985ae630 add explicitArray option to constructor 2017-04-01 11:26:17 -07:00
Sai1919
2f1c20eff6 change to README.md 2016-11-17 15:31:40 +05:30
Sai1919
b218abc3d3 0.1.7 2016-11-15 18:11:47 +05:30
Sai1919
6e29f884e4 add parse method to handle strings and buffers 2016-11-15 18:09:07 +05:30
Sai1919
46500c11ca 0.1.6 2016-11-12 22:01:13 +05:30
Sai1919
011a9ea813 update README.md 2016-11-12 22:00:43 +05:30
Sai1919
34b6e9767e 0.1.5 2016-11-12 21:56:20 +05:30
Sai1919
91b1472eaa update README.md 2016-11-12 21:55:18 +05:30
Sai1919
b440296aca 0.1.4 2016-11-11 16:36:38 +05:30
Sai1919
dc765da9f3 add keywors to package.json 2016-11-11 16:36:02 +05:30
Sai1919
5da901626d 0.1.3 2016-11-11 16:16:28 +05:30
Sai1919
0b5af2ba60 minor update to .npmignore 2016-11-11 16:16:06 +05:30
Sai1919
418a002f81 0.1.2 2016-11-11 16:12:05 +05:30
Sai1919
e73de0bbb9 minor update to README.md 2016-11-11 16:11:13 +05:30
Sai1919
ce56469497 0.1.1 2016-11-11 16:09:00 +05:30
Sai1919
d12d6abd1c 0.1.0 2016-11-11 16:04:30 +05:30
Sai1919
ae6b805329 0.0.2 2016-11-11 16:04:06 +05:30
Sai1919
77012921de Merge pull request #4 from Sai1919/intrestedNodes
add supported for listening on interested nodes
2016-11-11 15:43:47 +05:30
Sai1919
51b7639f2a final commit for this PR 2016-11-11 15:39:04 +05:30
Sai1919
398e707aef finalize 2016-11-11 15:02:57 +05:30
Sai1919
3f9104bf5c try fix 2016-11-11 14:58:49 +05:30
Sai1919
3c6f8c53c7 debug 2016-11-11 14:43:52 +05:30
Sai1919
731b9963de debugging 2016-11-11 14:38:42 +05:30
Sai1919
3bc6c9f542 skip performance tests 2016-11-11 13:28:43 +05:30
Sai1919
456379c234 use old version of mocha as new version throws processNexTick error 2016-11-11 13:08:37 +05:30
Sai1919
0af4ef8ca3 finalize 2016-11-11 12:36:42 +05:30
Sai1919
b12059e8f7 update 2016-11-11 12:17:38 +05:30
Sai1919
a9cf386642 update 2016-11-11 12:14:03 +05:30
Sai1919
e3bb00fccb update 2016-11-11 12:07:33 +05:30
Sai1919
e2578ccdc9 debugging 2016-11-11 12:01:35 +05:30
Sai1919
a797e90b16 update 2016-11-11 11:52:47 +05:30
Sai1919
95b8cfd6bb skip large tests 2016-11-11 11:48:13 +05:30
Sai1919
66104578bf update 2016-11-10 23:24:55 +05:30
Sai1919
4649d3749c add supported for listening on interested nodes 2016-11-10 20:39:54 +05:30
11 changed files with 9564 additions and 115 deletions

3
.npmignore Normal file
View File

@@ -0,0 +1,3 @@
# Dependency directories
test
.travis.yml

123
README.md
View File

@@ -3,7 +3,7 @@
[![Build Status](https://travis-ci.org/Sai1919/xml-streamer.svg?branch=master)](https://travis-ci.org/Sai1919/xml-streamer)
## Motivation
You use [Node.js](https://nodejs.org) for speed? You process XML streams? Then you want the fastest XML to JS parser: xml-streamer
You use [Node.js](https://nodejs.org) for speed? You process XML streams? Then you want the fastest XML to JS parser: `xml-streamer`, based on [node-expat](https://github.com/astro/node-expat) and It implements the Node.js `stream.Transform API`.
## Install
@@ -13,7 +13,73 @@ npm install xml-streamer
## Basic Usage
`xml-streamer can be used in four ways`
```javascript
// 1. By passing the resourcePath and reading data by calling `read` method instead listening for data events.
(function () {
"use strict";
var Parser = require('xml-streamer')
var opts = {resourcePath: '/items/item'}
var parser = new Parser(opts)
parser.on('end', function () {
// parsing ended no more data events will be raised
})
parser.on('error', function (error) {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
xmlStream.pipe(parser) // pipe your input xmlStream to parser.
// readable
parser.on('readable', function () {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
})
// after readable event occured you can call read method and get data.
parser.read() // will return one object at a time.
}())
// 2. By listening for interested nodes.
(function () {
"use strict";
var Parser = require('xml-streamer')
var opts = {} // see `Available Constructor Options` section below.
var parser = new Parser(opts)
parser.on('item', function (item) {
// consume the item object here
})
parser.on('end', function () {
// parsing ended no more data events will be raised
})
parser.on('error', function (error) {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
xmlStream.pipe(parser) // pipe your input xmlStream to parser.
// readable
parser.on('readable', function () {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
})
}())
// 3. By passing a resource path.
(function () {
"use strict";
@@ -32,7 +98,7 @@ npm install xml-streamer
parser.on('error', function (error) {
// error occurred
// NOTE: when error emit emitted no end event will be emitted
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
@@ -43,6 +109,22 @@ npm install xml-streamer
})
}())
// 4. By passing a string or buffer to parse function
(function () {
"use strict";
var Parser = require('xml-streamer')
var opts = {resourcePath: '/items/item'} // resourcePath is manditory when using parse method
var parser = new Parser(opts)
parser.parse(stringOrBuffer, function (err, data) {
// consume data here
})
}())
```
## API
@@ -55,9 +137,10 @@ npm install xml-streamer
* `#resume()` resumes
* `#read()` returns object if stream is readable
## Available Constructor Options
* `resourcePath`: `Type: String` Manditory field. Used to extract the XML nodes that you are interested in.
* `resourcePath`: `Type: String` Optional field. Used to extract the XML nodes that you are interested in.
// Ex: let the XML be
```xml
@@ -78,6 +161,8 @@ npm install xml-streamer
if you are interested in `subitem` nodes then resourcePath would be: `/items/item/subitem`
if you are interested in `items` nodes then resourcePath would be: `/items`
* `emitOnNodeName`: `Type: Boolean` Optional field. Set this to true if you want to listen on node names instead of data event. `default: false`
// Ex: consider the above XML snippet
@@ -96,7 +181,9 @@ npm install xml-streamer
```
`NOTE:` when you set `emitOnNodeName:true` "data" events are emitted normally. So make sure you don't listen for both the events.
* `attrsKey`: `Type: String` Optional field. pass the value with which you want to reference attributes of a node in its object form. `default: '$'`
* `textKey`: `Type: String` Optional field. pass the value with which you want to reference node value in its object form. `default: '_'`
@@ -120,11 +207,33 @@ npm install xml-streamer
// Then set `attrsKey= "attrs"` and `textKey= "text"`
* `explicitArray`: `Type: Boolean` Optional field. `Default value is true`. All children nodes will come in an array when this option is true.
// Ex: For example let the XML be
```xml
<?xml version="1.0" encoding="utf-8"?>
<items>
<item id="1" test= 'hello'>
<subitem sub= "2">two</subitem>
</item>
</items>
```
// if explicitArray is true and resourcePath is /items/item.
// Output for above xml will be
```javascript
[
{ '$': { id: '1', test: 'hello' },
subitem: { '$': { sub: '2' }, _: 'two' } },
]
```
`caution:` When explicitArray set to false and if there are multiple children nodes with same name then last node will override all preceding nodes.
## upcoming features
1. `allowing to listen on interested nodes instead of passing resourcePath in options`
2. `handling of compressed streams`
3. `handling of different encodings`
1. `handling of compressed streams`
2. `handling of different encodings`
3. `Filtering of objects extracted from resourcePath based on xpaths and json paths`
## Namespace handling

View File

@@ -1,5 +1,5 @@
{
"version": "0.0.1",
"version": "0.2.3",
"name": "xml-streamer",
"description": "XML stream parser for parsing large files efficiently with less usage of memory.",
"author": {
@@ -14,7 +14,9 @@
"xml parser",
"xml parsing",
"xml2js",
"xmltojs"
"xmltojs",
"node-expat",
"expat"
],
"license": "MIT",
"repository": {
@@ -23,10 +25,10 @@
},
"dependencies": {
"node-expat": "2.3.15",
"lodash": "4.16.6"
"lodash": "4.17.5"
},
"devDependencies": {
"mocha": "3.1.2",
"mocha": "^1.21.4",
"should": "11.1.1",
"standard": "8.5.0"
},
@@ -42,6 +44,9 @@
}
],
"standard": {
"globals": [ "describe", "it" ]
"globals": [
"describe",
"it"
]
}
}

267
parser.js
View File

@@ -8,27 +8,78 @@ var defaults = {
resourcePath: '',
emitOnNodeName: false,
attrsKey: '$',
textKey: '_'
textKey: '_',
explicitArray: true
}
function XmlParser (opts) {
this.opts = _.defaults(opts, defaults)
this.parserState = new ParserState()
this.parser = new expat.Parser('UTF-8')
// var transformOpts = { readableObjectMode: true }
stream.Transform.call(this)
this._readableState.objectMode = true
}
util.inherits(XmlParser, stream.Transform)
XmlParser.prototype.checkForInterestedNodeListeners = function () {
var ignore = [ 'end', 'prefinish', 'data', 'error' ]
var eventNames = Object.keys(this._events)
for (var i = 0; i < eventNames.length; i++) {
if (_.includes(ignore, eventNames[i], 0)) continue
this.parserState.interestedNodes.push(eventNames[i])
}
}
XmlParser.prototype._transform = function (chunk, encoding, callback) {
if (!this.opts.resourcePath) this.emit('error', new Error('resourcePath missing'))
if (encoding !== 'buffer') this.emit('error', new Error('unsupported encoding'))
this.parse(chunk)
this.processChunk(chunk)
callback()
}
XmlParser.prototype.parse = function (chunk) {
XmlParser.prototype.processChunk = function (chunk) {
var parser = this.parser
var state = this.parserState
if (state.isRootNode) {
this.checkForInterestedNodeListeners()
registerEvents.call(this)
}
if (typeof chunk === 'string') {
if (!parser.parse('', true)) processError.call(this)
} else {
if (!parser.parse(chunk.toString())) processError.call(this)
}
}
XmlParser.prototype.parse = function (chunk, cb) {
var parser = this.parser
var state = this.parserState
var error
if (state.isRootNode) {
this.checkForInterestedNodeListeners()
registerEvents.call(this)
}
if (typeof chunk === Buffer) chunk = chunk.toString()
this.on('error', function (err) {
error = err
})
if (!parser.parse(chunk)) {
error = processError.call(this)
}
if (error) return cb(error)
return cb(null, this._readableState.buffer)
}
function registerEvents () {
var scope = this
var parser = this.parser
var state = this.parserState
@@ -36,57 +87,35 @@ XmlParser.prototype.parse = function (chunk) {
var resourcePath = this.opts.resourcePath
var attrsKey = this.opts.attrsKey
var textKey = this.opts.textKey
var interestedNodes = state.interestedNodes
var explicitArray = this.opts.explicitArray
if (state.isRootNode) registerEvents()
parser.on('startElement', function (name, attrs) {
if (state.isRootNode) validateResourcePath(name)
state.currentPath = state.currentPath + '/' + name
checkForResourcePath(name)
if (state.isPathfound) processStartElement(name, attrs)
})
if (typeof chunk === 'string') {
if (!parser.parse('', true)) processError()
} else {
if (!parser.parse(chunk.toString())) processError()
}
parser.on('endElement', function (name) {
state.lastEndedNode = name
lastIndex = state.currentPath.lastIndexOf('/' + name)
state.currentPath = state.currentPath.substring(0, lastIndex)
if (state.isPathfound) processEndElement(name)
checkForResourcePath(name)
})
function registerEvents () {
parser.on('startElement', function (name, attrs) {
if (state.isRootNode) validateResourcePath(name)
state.currentPath = state.currentPath + '/' + name
checkForResourcePath(name)
if (state.isPathfound) processStartElement(name, attrs)
})
parser.on('text', function (text) {
if (state.isPathfound) processText(text)
})
parser.on('endElement', function (name) {
state.lastEndedNode = name
lastIndex = state.currentPath.lastIndexOf('/' + name)
state.currentPath = state.currentPath.substring(0, lastIndex)
if (state.isPathfound) processEndElement(name)
checkForResourcePath(name)
})
parser.on('text', function (text) {
if (state.isPathfound) processText(text)
})
parser.on('error', function (err) {
processError(err)
})
parser.on('end', function () {
scope.emit('end')
})
}
function processError (err) {
var error = ''
if (err) {
error = err
} else {
error = parser.getError()
}
scope.emit('error', new Error(error + ' at line no: ' + parser.getCurrentLineNumber()))
}
parser.on('error', function (err) {
processError.call(this, err)
})
function processStartElement (name, attrs) {
if (!name) return
var obj = {}
if (attrs && !_.isEmpty(attrs)) obj[attrsKey] = attrs
var tempObj = state.object
@@ -98,28 +127,63 @@ XmlParser.prototype.parse = function (chunk) {
var tokens = path.split('.')
for (var i = 0; i < tokens.length; i++) {
if (tempObj[tokens[i]]) {
if (tempObj[tokens[i]] && !(explicitArray === false && i === tokens.length - 1)) {
tempObj = tempObj[tokens[i]]
} else {
tempObj[tokens[i]] = []
// if explicitArray is true then create each node as array
// irrespective of how many nodes are there with same name.
tempObj[tokens[i]] = explicitArray ? [] : obj
tempObj = tempObj[tokens[i]]
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
}
tempObj.push(obj)
if (Array.isArray(tempObj)) {
tempObj.push(obj)
}
}
function processEndElement (name) {
var index = resourcePath.lastIndexOf('/')
var rpath = resourcePath.substring(0, index)
if (resourcePath) {
var index = resourcePath.lastIndexOf('/')
var rpath = resourcePath.substring(0, index)
if (rpath === state.currentPath) {
if (scope.opts.emitOnNodeName) scope.emit(name, state.object)
scope.push(state.object)
state.object = {}
if (rpath === state.currentPath) {
scope.push(state.object)
if (scope.opts.emitOnNodeName) scope.emit(name, state.object)
state.object = {}
}
} else {
if (_.includes(interestedNodes, name, 0)) {
emitInterestedNode(name)
if (state.firstFoundNode === name) {
state.object = {}
state.firstFoundNode = ''
state.isPathfound = false
}
}
}
}
function emitInterestedNode (name) {
var index
var xpath
var pathTokens
xpath = state.currentPath.substring(1)
pathTokens = xpath.split('/')
pathTokens.push(name)
index = pathTokens.indexOf(state.firstFoundNode)
pathTokens = _.drop(pathTokens, index + 1)
var tempObj = state.object
for (var i = 0; i < pathTokens.length; i++) {
tempObj = tempObj[pathTokens[i]]
}
if (Array.isArray(tempObj)) tempObj = tempObj[tempObj.length - 1]
scope.emit(name, tempObj)
scope.push(tempObj)
}
function processText (text) {
if (!text || !/\S/.test(text)) {
return
@@ -136,31 +200,58 @@ XmlParser.prototype.parse = function (chunk) {
if (tempObj[tokens[i]]) {
tempObj = tempObj[tokens[i]]
} else {
tempObj[tokens[i]] = []
tempObj[tokens[i]] = explicitArray ? [] : {}
tempObj = tempObj[tokens[i]]
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
}
var obj = tempObj[tempObj.length - 1]
if (!obj[textKey]) obj[textKey] = ''
obj[textKey] = obj[textKey] + text
if (Array.isArray(tempObj)) {
var obj = tempObj[tempObj.length - 1]
if (!obj[textKey]) obj[textKey] = ''
obj[textKey] = obj[textKey] + text
} else {
if (!tempObj[textKey]) tempObj[textKey] = ''
tempObj[textKey] = tempObj[textKey] + text
}
}
function checkForResourcePath (name) {
if (state.currentPath.indexOf(resourcePath) === 0) {
state.isPathfound = true
if (resourcePath) {
if (state.currentPath.indexOf(resourcePath) === 0) {
state.isPathfound = true
} else {
state.isPathfound = false
}
} else {
state.isPathfound = false
if (_.includes(interestedNodes, name, 0)) {
state.isPathfound = true
if (!state.firstFoundNode) {
state.firstFoundNode = name
}
}
}
}
function getRelativePath () {
var xpath = state.currentPath.substring(resourcePath.length)
var tokens
var jsonPath
var index
if (!xpath) return
if (xpath[0] === '/') xpath = xpath.substring(1)
var tokens = xpath.split('/')
var jsonPath = tokens.join('.')
if (resourcePath) {
var xpath = state.currentPath.substring(resourcePath.length)
if (!xpath) return
if (xpath[0] === '/') xpath = xpath.substring(1)
tokens = xpath.split('/')
jsonPath = tokens.join('.')
} else {
xpath = state.currentPath.substring(1)
tokens = xpath.split('/')
index = tokens.indexOf(state.firstFoundNode)
tokens = _.drop(tokens, index + 1)
jsonPath = tokens.join('.')
}
return jsonPath
}
@@ -170,21 +261,37 @@ XmlParser.prototype.parse = function (chunk) {
state.isRootNode = false
if (resourcePath[0] === '/') {
temp = resourcePath.substring(1, resourcePath.length)
} else {
temp = resourcePath
}
index = temp.indexOf('/')
if (index !== -1) temp = temp.substring(0, index)
if (temp !== name) {
scope.end()
if (resourcePath) {
if (resourcePath[0] === '/') {
temp = resourcePath.substring(1, resourcePath.length)
} else {
temp = resourcePath
}
index = temp.indexOf('/')
if (index !== -1) temp = temp.substring(0, index)
if (temp !== name) {
scope.end()
}
}
}
}
function processError (err) {
var parser = this.parser
var error = ''
if (err) {
error = err
} else {
error = parser.getError()
}
error = new Error(error + ' at line no: ' + parser.getCurrentLineNumber())
this.emit('error', error)
return error
}
XmlParser.prototype._flush = function (callback) {
this.parse('')
this.processChunk('')
callback()
}

View File

@@ -4,9 +4,10 @@ function ParserState () {
this.lastEndedNode = ''
this.isPathfound = false
this.object = {}
this.buffer = []
this.paused = false
this.isRootNode = true
this.firstFoundNode = ''
this.interestedNodes = []
}
module.exports = ParserState

View File

@@ -0,0 +1 @@
</items>

View File

@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<items>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="utf-8"?>
<items>
<item id="1" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="2">
<item>three</item>
<subitem>four</subitem>
<subitem>five</subitem>
</item>
<item id="3" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="4" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<item sub= "2">two</item>
</item>
<item id="5" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<item sub= "2">two</item>
</item>
<item id="6" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="7" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="8" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="9" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="10" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<item sub= "2">two</item>
</item>
</items>

View File

@@ -0,0 +1,46 @@
<?xml version="1.0" encoding="utf-8"?>
<items>
<item id="1" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="2">
<item>three</item>
<subitem>four</subitem>
<subitem>five</subitem>
</item>
<item id="3" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="4" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<item sub= "2">two</item>
</item>
<item id="5" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<item sub= "2">two</item>
</item>
<item id="6" test= 'hello'>
<item id= "6a">
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
</item>
<item id="7" test= 'hello'>
<item sub= "TESTING SUB">one</item>
<subitem sub= "2">two</subitem>
</item>
<item id="8" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="9" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
</item>
<item id="10" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<item sub= "2">two</item>
</item>
</items>

View File

@@ -1,6 +1,7 @@
var should = require('should')
var fs = require('fs')
var zlib = require('zlib')
var stream = require('stream')
var ParserFactory = require('../parser')
@@ -114,28 +115,6 @@ describe('Tests', function () {
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
// console.log(parser)
var dataEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
dataEventCount.should.equal(296)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
@@ -755,4 +734,812 @@ describe('Tests', function () {
xmlStream.pipe(parser)
})
})
describe('interested Nodes', function () {
it('should properly parse a simple file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory()
var expectedData =
[
{ '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' },
{ '$': { id: '1', test: 'hello' },
subitem: [ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ]
},
{ _: 'three' },
{ _: 'four' },
{ _: 'five' },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] }
]
var actualData = []
var dataEventCount = 0
var expectedItems = [
{ '$': { id: '1', test: 'hello' },
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualItems = []
var actualSubitems = []
var expectedSubitems = [
{ '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' },
{ _: 'three' },
{ _: 'four' },
{ _: 'five' }
]
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('item', function (item) {
actualItems.push(item)
})
parser.on('subitem', function (subitem) {
actualSubitems.push(subitem)
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
actualItems.should.deepEqual(expectedItems)
actualSubitems.should.deepEqual(expectedSubitems)
actualSubitems.length.should.equal(5)
actualItems.length.should.equal(2)
dataEventCount.should.equal(7)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a medium size file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
dataEventCount.should.equal(31)
itemEventCount.should.equal(10)
subitemEventCount.should.equal(21)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
itemEventCount.should.equal(296)
subitemEventCount.should.equal(600)
dataEventCount.should.equal(896)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a xml simple file in which nodes contain text values randomly.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/randomText.xml')
var parser = new ParserFactory()
var expectedData =
[
{ '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' },
{ '$': { id: '1', test: 'hello' }, _: ' item one two',
subitem: [ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ]
},
{ _: 'three' },
{ _: 'four' },
{ _: 'five' },
{ '$': { id: '2' }, '_': ' item one two three four',
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] }
]
var expectedItems = [
{ '$': { id: '1', test: 'hello' }, _: ' item one two',
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' }, '_': ' item one two three four',
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
var actualItems = []
var actualSubitems = []
var expectedSubitems = [
{ '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' },
{ _: 'three' },
{ _: 'four' },
{ _: 'five' }
]
var actualData = []
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
actualItems.push(item)
})
parser.on('subitem', function (subitem) {
subitemEventCount++
actualSubitems.push(subitem)
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
actualData.should.deepEqual(expectedData)
actualItems.should.deepEqual(expectedItems)
actualSubitems.should.deepEqual(expectedSubitems)
dataEventCount.should.equal(7)
itemEventCount.should.equal(2)
subitemEventCount.should.equal(5)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
dataEventCount.should.equal(6272)
itemEventCount.should.equal(2072)
subitemEventCount.should.equal(4200)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a simple file and return when root element when listening on it.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory()
var expectedData =
[{ 'item': [{ '$': { 'id': '1', 'test': 'hello' },
'subitem': [{ '$': { 'sub': 'TESTING SUB' }, '_': 'one' },
{ '$': { 'sub': '2' }, '_': 'two' }]
},
{ '$': { 'id': '2' }, 'subitem': [{ '_': 'three' }, { '_': 'four' },
{ '_': 'five' }]
}]
}]
var actualData = []
var dataEventCount = 0
var itemsEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('items', function (item) {
itemsEventCount++
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemsEventCount)
actualData.should.deepEqual(expectedData)
itemsEventCount.should.equal(1)
dataEventCount.should.equal(1)
done()
})
xmlStream.pipe(parser)
})
})
describe.skip('performance testing', function () {
it('should properly parse more than 500 MB of file.', function (done) {
var parser = new ParserFactory({resourcePath: '/items/item'})
// var wsStream = fs.createWriteStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
// var rsStream = fs.createReadStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
var dataEventCount = 0
// var maxRSSMemoryTaken = 0
// var rss
var startTime = Date.now()
var xmlStream = new stream.Readable()
xmlStream._read = function noop () {}
var dataChunk
this.timeout(900000)
var firstChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/firstChunk.xml')
xmlStream.push(firstChunk)
for (var i = 0; i < 2200; i++) {
dataChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/repetitiveChunk.xml')
xmlStream.push(dataChunk)
}
var endingChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/endingChunk.xml')
xmlStream.push(endingChunk)
xmlStream.push(null)
parser.on('data', function (data) {
// rss = process.memoryUsage().rss
// if (rss > maxRSSMemoryTaken) maxRSSMemoryTaken = rss
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('RSS memory=', rss)
var TimeTaken = Date.now() - startTime
// console.log('time taken=', TimeTaken)
TimeTaken.should.be.belowOrEqual(300000)
dataEventCount.should.equal(4558400)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse more than 1 GB of file.', function (done) {
var parser = new ParserFactory({resourcePath: '/items/item'})
// var wsStream = fs.createWriteStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
// var rsStream = fs.createReadStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
var dataEventCount = 0
// var maxRSSMemoryTaken = 0
// var rss
var startTime = Date.now()
var xmlStream = new stream.Readable()
xmlStream._read = function noop () {}
var dataChunk
this.timeout(900000)
var firstChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/firstChunk.xml')
xmlStream.push(firstChunk)
for (var i = 0; i < 4400; i++) {
dataChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/repetitiveChunk.xml')
xmlStream.push(dataChunk)
}
var endingChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/endingChunk.xml')
xmlStream.push(endingChunk)
xmlStream.push(null)
parser.on('data', function (data) {
// rss = process.memoryUsage().rss
// if (rss > maxRSSMemoryTaken) maxRSSMemoryTaken = rss
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('RSS memory=', rss)
var TimeTaken = Date.now() - startTime
// console.log('time taken=', TimeTaken)
TimeTaken.should.be.belowOrEqual(700000)
dataEventCount.should.equal(9116800)
done()
})
xmlStream.pipe(parser)
})
})
describe('nodes with same names', function () {
it('should properly parse a simple file containing nodes with same names.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNames.xml')
var parser = new ParserFactory()
var actualData = []
var actualItems = []
var dataEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('item', function (item) {
actualItems.push(item)
})
parser.on('end', function () {
actualItems.length.should.equal(18)
dataEventCount.should.equal(18)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a simple file containing nodes with same names and emit events on multiple nodes.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNames.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
should(err).not.be.ok()
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
itemEventCount.should.equal(18)
subitemEventCount.should.equal(13)
dataEventCount.should.equal(31)
done()
})
xmlStream.pipe(parser)
})
it.skip('should properly parse a medium size file with same names randomly.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNamesRandomly.xml')
var parser = new ParserFactory()
var dataEventCount = 0
var itemEventCount = 0
var subitemEventCount = 0
parser.on('data', function (data) {
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('item', function (item) {
itemEventCount++
})
parser.on('subitem', function (subitem) {
subitemEventCount++
})
parser.on('end', function () {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
dataEventCount.should.equal(32)
itemEventCount.should.equal(19)
subitemEventCount.should.equal(13)
done()
})
xmlStream.pipe(parser)
})
})
describe('Parse funtion should work properly', function () {
it('should properly parse a simple file.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var expectedData = [
{ '$': { id: '1', test: 'hello' },
subitem:
[ { '$': { sub: 'TESTING SUB' }, _: 'one' },
{ '$': { sub: '2' }, _: 'two' } ] },
{ '$': { id: '2' },
subitem: [ { _: 'three' }, { _: 'four' }, { _: 'five' } ] } ]
parser.parse(xml.toString(), function (err, data) {
if (err) done(err)
data.should.deepEqual(expectedData)
done()
})
})
it('should properly parse a medium size file.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/medium.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
parser.parse(xml, function (err, data) {
if (err) done(err)
data.length.should.equal(10)
done()
})
})
it('should properly parse a file containing many nodes.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
parser.parse(xml, function (err, data) {
if (err) done(err)
data.length.should.equal(296)
done()
})
})
it('should properly parse a xml simple file in which nodes contain text values randomly.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/randomText.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
var expectedData = [ { '$': { 'id': '1', 'test': 'hello' }, '_': ' item one two',
'subitem': [ { '$': { 'sub': 'TESTING SUB' }, '_': 'one' },
{ '$': { 'sub': '2' }, '_': 'two' } ] },
{ '$': { 'id': '2' }, '_': ' item one two three four',
'subitem': [ { '_': 'three' }, { '_': 'four' }, { '_': 'five' } ] }
]
parser.parse(xml, function (err, data) {
if (err) done(err)
data.should.deepEqual(expectedData)
data.length.should.equal(2)
done()
})
})
it('should properly parse a huge file.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
// console.log(parser)
parser.parse(xml, function (err, data) {
if (err) done(err)
data.length.should.equal(2072)
done()
})
})
it('should properly return error if the xml file is corrupted.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/corrupted.xml')
var parser = new ParserFactory({resourcePath: '/items/item'})
parser.parse(xml, function (err, data) {
// console.log(err)
err.message.should.equal('mismatched tag at line no: 11')
should(data).not.be.ok()
done()
})
})
})
describe('should respect explicitArray constructor option', function () {
it('should properly parse a simple file with explicitArray set to false.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item', explicitArray: false})
var expectedData = [
{ '$': { id: '1', test: 'hello' },
subitem: { '$': { sub: '2' }, _: 'two' } },
{ '$': { id: '2' },
subitem: { _: 'five' } } ]
parser.parse(xml.toString(), function (err, data) {
if (err) done(err)
// console.log('data=', JSON.stringify(data))
data.should.deepEqual(expectedData)
done()
})
})
it('should properly parse a medium size file with explicitArray set to false.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/medium.xml')
var parser = new ParserFactory({resourcePath: '/items/item', explicitArray: false})
var expectedData = [
{
"$":{
"id":"1",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
},
{
"$":{
"id":"2"
},
"subitem":{
"_":"five"
}
},
{
"$":{
"id":"3",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
},
{
"$":{
"id":"4",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
},
{
"$":{
"id":"5",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
},
{
"$":{
"id":"6",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
},
{
"$":{
"id":"7",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
},
{
"$":{
"id":"8",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
},
{
"$":{
"id":"9",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
},
{
"$":{
"id":"10",
"test":"hello"
},
"subitem":{
"$":{
"sub":"2"
},
"_":"two"
}
}
]
parser.parse(xml, function (err, data) {
if (err) done(err)
data.should.deepEqual(expectedData)
data.length.should.equal(10)
done()
})
})
it('should properly parse a file containing many nodes when explicitArray set to false.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/manyItems.xml')
var parser = new ParserFactory({resourcePath: '/items/item', explicitArray: false})
parser.parse(xml, function (err, data) {
if (err) done(err)
data.length.should.equal(296)
done()
})
})
it('should properly parse a xml simple file in which nodes contain text values randomly when explicitArray set to false.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/randomText.xml')
var parser = new ParserFactory({resourcePath: '/items/item', explicitArray: false})
var expectedData = [ { '$': { 'id': '1', 'test': 'hello' }, '_': ' item one two',
'subitem': { '$': { 'sub': '2' }, '_': 'two' } },
{ '$': { 'id': '2' }, '_': ' item one two three four',
'subitem': { '_': 'five' } }
]
parser.parse(xml, function (err, data) {
if (err) done(err)
data.should.deepEqual(expectedData)
data.length.should.equal(2)
done()
})
})
it('should properly parse a huge file with explicitArray set to false.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/hugeFile.xml')
var parser = new ParserFactory({resourcePath: '/items/item', explicitArray: false})
// console.log(parser)
parser.parse(xml, function (err, data) {
if (err) done(err)
data.length.should.equal(2072)
done()
})
})
it('should properly return error if the xml file is corrupted.', function (done) {
var xml = fs.readFileSync('./test/TestFiles/corrupted.xml')
var parser = new ParserFactory({resourcePath: '/items/item', explicitArray: false})
parser.parse(xml, function (err, data) {
// console.log(err)
err.message.should.equal('mismatched tag at line no: 11')
should(data).not.be.ok()
done()
})
})
it('should properly generate objects when special symbols are passed as attrs and text keys and explicitArray is false in the options.', function (done) {
var xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
var parser = new ParserFactory({resourcePath: '/items/item', attrsKey: '!', textKey: '%', explicitArray: false})
var expectedData = [
{ '!': { id: '1', test: 'hello' },
subitem: { '!': { sub: '2' }, '%': 'two' } },
{ '!': { id: '2' },
subitem: { '%': 'five' } } ]
var actualData = []
var dataEventCount = 0
parser.on('data', function (data) {
actualData.push(data)
dataEventCount++
})
parser.on('error', function (err) {
done(err)
})
parser.on('end', function () {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
actualData.should.deepEqual(expectedData)
dataEventCount.should.equal(2)
done()
})
xmlStream.pipe(parser)
})
})
})