var expat = require('node-expat') var _ = require('lodash') var util = require('util') var stream = require('stream') var ParserState = require('./parserState') var defaults = { resourcePath: '', emitOnNodeName: false, attrsKey: '$', textKey: '_' } function XmlParser (opts) { this.opts = _.defaults(opts, defaults) this.parserState = new ParserState() this.parser = new expat.Parser('UTF-8') stream.Transform.call(this) this._readableState.objectMode = true } util.inherits(XmlParser, stream.Transform) XmlParser.prototype.checkForInterestedNodeListeners = function () { var ignore = [ 'end', 'prefinish', 'data', 'error' ] var eventNames = Object.keys(this._events) for (var i = 0; i < eventNames.length; i++) { if (_.includes(ignore, eventNames[i], 0)) continue this.parserState.interestedNodes.push(eventNames[i]) } } XmlParser.prototype._transform = function (chunk, encoding, callback) { if (encoding !== 'buffer') this.emit('error', new Error('unsupported encoding')) this.processChunk(chunk) callback() } XmlParser.prototype.processChunk = function (chunk) { var parser = this.parser var state = this.parserState if (state.isRootNode) { this.checkForInterestedNodeListeners() registerEvents.call(this) } if (typeof chunk === 'string') { if (!parser.parse('', true)) processError.call(this) } else { if (!parser.parse(chunk.toString())) processError.call(this) } } XmlParser.prototype.parse = function (chunk, cb) { var parser = this.parser var state = this.parserState var error if (state.isRootNode) { this.checkForInterestedNodeListeners() registerEvents.call(this) } if (typeof chunk === Buffer) chunk = chunk.toString() this.on('error', function (err) { error = err }) if (!parser.parse(chunk)) { error = processError.call(this) } if (error) return cb(error) return cb(null, this._readableState.buffer) } function registerEvents () { var scope = this var parser = this.parser var state = this.parserState var lastIndex var resourcePath = this.opts.resourcePath var attrsKey = this.opts.attrsKey var textKey = this.opts.textKey var interestedNodes = state.interestedNodes parser.on('startElement', function (name, attrs) { if (state.isRootNode) validateResourcePath(name) state.currentPath = state.currentPath + '/' + name checkForResourcePath(name) if (state.isPathfound) processStartElement(name, attrs) }) parser.on('endElement', function (name) { state.lastEndedNode = name lastIndex = state.currentPath.lastIndexOf('/' + name) state.currentPath = state.currentPath.substring(0, lastIndex) if (state.isPathfound) processEndElement(name) checkForResourcePath(name) }) parser.on('text', function (text) { if (state.isPathfound) processText(text) }) parser.on('error', function (err) { processError.call(this, err) }) function processStartElement (name, attrs) { if (!name) return var obj = {} if (attrs && !_.isEmpty(attrs)) obj[attrsKey] = attrs var tempObj = state.object var path = getRelativePath(name) if (!path) { if (attrs && !_.isEmpty(attrs)) state.object[attrsKey] = attrs return } var tokens = path.split('.') for (var i = 0; i < tokens.length; i++) { if (tempObj[tokens[i]]) { tempObj = tempObj[tokens[i]] } else { tempObj[tokens[i]] = [] tempObj = tempObj[tokens[i]] } if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1] } tempObj.push(obj) } function processEndElement (name) { if (resourcePath) { var index = resourcePath.lastIndexOf('/') var rpath = resourcePath.substring(0, index) if (rpath === state.currentPath) { scope.push(state.object) if (scope.opts.emitOnNodeName) scope.emit(name, state.object) state.object = {} } } else { if (_.includes(interestedNodes, name, 0)) { emitInterestedNode(name) if (state.firstFoundNode === name) { state.object = {} state.firstFoundNode = '' state.isPathfound = false } } } } function emitInterestedNode (name) { var index var xpath var pathTokens xpath = state.currentPath.substring(1) pathTokens = xpath.split('/') pathTokens.push(name) index = pathTokens.indexOf(state.firstFoundNode) pathTokens = _.drop(pathTokens, index + 1) var tempObj = state.object for (var i = 0; i < pathTokens.length; i++) { tempObj = tempObj[pathTokens[i]] } if (Array.isArray(tempObj)) tempObj = tempObj[tempObj.length - 1] scope.emit(name, tempObj) scope.push(tempObj) } function processText (text) { if (!text || !/\S/.test(text)) { return } var path = getRelativePath() var tempObj = state.object if (!path) { if (!state.object[textKey]) state.object[textKey] = '' state.object[textKey] = state.object[textKey] + text return } var tokens = path.split('.') for (var i = 0; i < tokens.length; i++) { if (tempObj[tokens[i]]) { tempObj = tempObj[tokens[i]] } else { tempObj[tokens[i]] = [] tempObj = tempObj[tokens[i]] } if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1] } var obj = tempObj[tempObj.length - 1] if (!obj[textKey]) obj[textKey] = '' obj[textKey] = obj[textKey] + text } function checkForResourcePath (name) { if (resourcePath) { if (state.currentPath.indexOf(resourcePath) === 0) { state.isPathfound = true } else { state.isPathfound = false } } else { if (_.includes(interestedNodes, name, 0)) { state.isPathfound = true if (!state.firstFoundNode) { state.firstFoundNode = name } } } } function getRelativePath () { var tokens var jsonPath var index if (resourcePath) { var xpath = state.currentPath.substring(resourcePath.length) if (!xpath) return if (xpath[0] === '/') xpath = xpath.substring(1) tokens = xpath.split('/') jsonPath = tokens.join('.') } else { xpath = state.currentPath.substring(1) tokens = xpath.split('/') index = tokens.indexOf(state.firstFoundNode) tokens = _.drop(tokens, index + 1) jsonPath = tokens.join('.') } return jsonPath } function validateResourcePath (name) { var temp var index state.isRootNode = false if (resourcePath) { if (resourcePath[0] === '/') { temp = resourcePath.substring(1, resourcePath.length) } else { temp = resourcePath } index = temp.indexOf('/') if (index !== -1) temp = temp.substring(0, index) if (temp !== name) { scope.end() } } } } function processError (err) { var parser = this.parser var error = '' if (err) { error = err } else { error = parser.getError() } error = new Error(error + ' at line no: ' + parser.getCurrentLineNumber()) this.emit('error', error) return error } XmlParser.prototype._flush = function (callback) { this.processChunk('') callback() } module.exports = XmlParser