46 Commits

Author SHA1 Message Date
T. R. Bernstein
29e1c16f30 Make explicitArray=false the default 2025-07-02 14:43:20 +02:00
T. R. Bernstein
60173e3e65 Handle repeating elements w explicitArray=false 2025-07-02 00:01:28 +02:00
T. R. Bernstein
38f7230ce1 Reformat files using prettier 2025-07-01 23:21:56 +02:00
T. R. Bernstein
b1d36437ea Change package name to @tabshift/xml-streamer 2025-07-01 22:30:53 +02:00
Dror Gluska
07fd1db503 0.3.1 2019-07-05 23:32:58 +03:00
Dror Gluska
593c24b5ba remove install 2019-07-05 23:32:53 +03:00
Dror Gluska
63817068a8 add build to prepublish 2019-07-05 00:31:51 +03:00
Dror Gluska
14247dcbb8 exclude examples 2019-07-05 00:29:54 +03:00
Dror Gluska
15bf91d44c 0.3.0 2019-07-05 00:26:46 +03:00
Dror Gluska
e4051d0f45 modify package name 2019-07-05 00:24:41 +03:00
Dror Gluska
fa375b2d5a cleanup documentation, examples and coverage 2019-07-05 00:23:29 +03:00
Dror Gluska
c3a177244b update readme 2019-05-31 23:17:50 +03:00
Dror Gluska
dec131431d cleanup node-expat 2019-05-31 22:57:03 +03:00
Dror Gluska
46885d9ede replace node-expat with SaxLtx due to reliability issues with errors about invalid elements where there is no obvious reason for the error. 2019-05-31 22:52:23 +03:00
Dror Gluska
cfebc962f0 typescriptify and cleanup 2019-05-31 11:27:48 +03:00
Sai Teja
28006be580 Merge pull request #11 from Sai1919/release/0.2.1
Upgrade lodash to fix vunerability
2018-12-03 16:15:01 +05:30
Sai1919
7fe1a03d98 upgrade lodash to fix vunerability 2018-12-03 15:53:10 +05:30
Sai Teja
7da2235dab Merge pull request #8 from pludov/parse_all_input
Consume content in all cases but error
2018-11-28 20:28:00 +05:30
Sai Teja
afc63586e3 Merge pull request #7 from pludov/preserve_text
Add option verbatimText option to preserve blanks in text
2018-10-26 18:46:48 +05:30
Ludovic POLLET
e7c9808107 Add option verbatimText option to preserve blanks in text 2018-09-06 16:16:32 +02:00
Ludovic POLLET
32aef91cde Consume content in all cases but error 2018-09-05 16:56:16 +02:00
Sai Teja
8b6cf41277 update readme 2017-04-01 12:00:04 -07:00
Sai Teja
5da96b5528 0.2.1 2017-04-01 11:51:45 -07:00
Sai Teja
daa7301de0 changes to readme 2017-04-01 11:50:07 -07:00
Sai Teja
3bdb46828a 0.2.0 2017-04-01 11:30:56 -07:00
Sai Teja
42985ae630 add explicitArray option to constructor 2017-04-01 11:26:17 -07:00
Sai1919
2f1c20eff6 change to README.md 2016-11-17 15:31:40 +05:30
Sai1919
b218abc3d3 0.1.7 2016-11-15 18:11:47 +05:30
Sai1919
6e29f884e4 add parse method to handle strings and buffers 2016-11-15 18:09:07 +05:30
Sai1919
46500c11ca 0.1.6 2016-11-12 22:01:13 +05:30
Sai1919
011a9ea813 update README.md 2016-11-12 22:00:43 +05:30
Sai1919
34b6e9767e 0.1.5 2016-11-12 21:56:20 +05:30
Sai1919
91b1472eaa update README.md 2016-11-12 21:55:18 +05:30
Sai1919
b440296aca 0.1.4 2016-11-11 16:36:38 +05:30
Sai1919
dc765da9f3 add keywors to package.json 2016-11-11 16:36:02 +05:30
Sai1919
5da901626d 0.1.3 2016-11-11 16:16:28 +05:30
Sai1919
0b5af2ba60 minor update to .npmignore 2016-11-11 16:16:06 +05:30
Sai1919
418a002f81 0.1.2 2016-11-11 16:12:05 +05:30
Sai1919
e73de0bbb9 minor update to README.md 2016-11-11 16:11:13 +05:30
Sai1919
ce56469497 0.1.1 2016-11-11 16:09:00 +05:30
Sai1919
d12d6abd1c 0.1.0 2016-11-11 16:04:30 +05:30
Sai1919
ae6b805329 0.0.2 2016-11-11 16:04:06 +05:30
Sai1919
77012921de Merge pull request #4 from Sai1919/intrestedNodes
add supported for listening on interested nodes
2016-11-11 15:43:47 +05:30
Sai1919
6c736d0e76 Merge pull request #3 from Sai1919/intrestedNodes
README.md
2016-11-08 18:23:39 +05:30
Sai1919
f3d514b3b0 Merge pull request #2 from Sai1919/intrestedNodes
add stream.Transform API implementation
2016-11-08 16:01:31 +05:30
Sai1919
53fe8d33a2 Merge pull request #1 from Sai1919/travisBuildCheck
testing Travis build and one test case added
2016-11-07 14:08:19 +05:30
36 changed files with 3083 additions and 1643 deletions

3
.gitignore vendored
View File

@@ -35,3 +35,6 @@ jspm_packages
# Optional REPL history
.node_repl_history
/dist
/docs
drorgl-xml-streamer-*.tgz

View File

@@ -1,3 +1,13 @@
# Dependency directories
node_modules
test
test
.travis.yml
tsconfig.json
tslint.json
.nyc_output
.vscode
coverage
docs
src
drorgl-xml-streamer*
.nycrc
examples

20
.nycrc Normal file
View File

@@ -0,0 +1,20 @@
{
"extension": [
".ts"
],
"require": [
"ts-node/register"
],
"include": [
"src/**/*.ts"
],
"exclude": [
"**/*.d.ts"
],
"reporter": [
"html",
"text-summary",
"text"
],
"all": true
}

51
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,51 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"program": "${workspaceFolder}\\node_stream_zip.js",
"preLaunchTask": "tsc: build - tsconfig.json",
"outFiles": [
"${workspaceFolder}/dist/**/*.js"
]
},
{
"type": "node",
"request": "launch",
"name": "Mocha All",
"program": "${workspaceFolder}/node_modules/mocha/bin/_mocha",
"args": [
"--timeout",
"999999",
"--colors",
"-r",
"ts-node/register",
"test/**/*.spec.ts"
],
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen"
},
{
"type": "node",
"request": "launch",
"name": "Mocha Current File",
"program": "${workspaceFolder}/node_modules/mocha/bin/_mocha",
"args": [
"--timeout",
"999999",
"--colors",
"-r",
"ts-node/register",
"${file}"
],
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen"
}
]
}

15
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,15 @@
{
"cSpell.words": [
"Dror",
"Gluska",
"Teja",
"apos",
"drorgl",
"dryrun",
"gmail",
"prefinish",
"saitejas",
"typedoc",
"xmltojs"
]
}

301
README.md
View File

@@ -1,113 +1,162 @@
# xml-streamer
[![Build Status](https://travis-ci.org/Sai1919/xml-streamer.svg?branch=master)](https://travis-ci.org/Sai1919/xml-streamer)
## Motivation
## IMPORTANT
You use [Node.js](https://nodejs.org) for speed? You process XML streams? Then you want the fastest XML to JS parser: `xml-streamer`, based on [node-expat](https://github.com/astro/node-expat)
This is a modified version of xml-streamer, the parser + tests stayed mostly the same but the core xml parser was replaced with [SaxLtx xml parser](https://github.com/xmppjs/ltx) due to reliability/stability issues with node-expat, both this library and ltx were converted to typescript.
Please note that ltx parser is about 20% slower than node-expat.
## Install
```
npm install xml-streamer
npm install @drorgl/xml-streamer
```
## Basic Usage
`xml-streamer can be used in three
ways`
`xml-streamer can be used in four ways`
```javascript
```typescript
// 1. By passing the resourcePath and reading data by calling `read` method instead listening for data events.
// 1. By listening for interested nodes.
import {XmlParser} from "@drorgl/xml-streamer";
(function () {
"use strict";
const opts = { resourcePath: "/items/item" };
var Parser = require('xml-streamer')
var opts = {} // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
var parser = new Parser(opts)
parser.on('item', function (item) {
// consume the item object here
})
parser.on('end', function () {
// parsing ended no more data events will be raised
})
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on('error', function (error) {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser) // pipe your input xmlStream to parser.
// readable
parser.on('readable', function () {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
})
}())
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait
// for readable event and consume data by calling parser.read()
});
// after readable event occured you can call read method and get data.
parser.read(); // will return one object at a time.
// 2. By passing a resource path.
(function () {
"use strict";
// 2. By listening for interested nodes.
var Parser = require('xml-streamer')
var opts = {resourcePath: '/items/item'}
var parser = new Parser(opts)
parser.on('data', function (data) {
// consume the data object here
})
parser.on('end', function () {
// parsing ended no more data events will be raised
})
import { XmlParser } from "@drorgl/xml-streamer";
parser.on('error', function (error) {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
const opts = {}; // see `Available Constructor Options` section below.
xmlStream.pipe(parser) // pipe your input xmlStream to parser.
// readable
parser.on('readable', function () {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
})
}())
const parser = new XmlParser(opts);
// 2. By passing the resourcePath as shown in 2 method and reading data by calling `read` method instead listening for data events.
parser.on("item", (item) => {
// consume the item object here
});
(function () {
"use strict";
parser.on("end", () => {
// parsing ended no more data events will be raised
});
var Parser = require('xml-streamer')
var opts = {resourcePath: '/items/item'}
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
var parser = new Parser(opts)
parser.on('end', function () {
// parsing ended no more data events will be raised
})
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait
//for readable event and consume data by calling parser.read()
});
parser.on('error', function (error) {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
xmlStream.pipe(parser) // pipe your input xmlStream to parser.
// readable
parser.on('readable', function () {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
})
}())
// 3. By passing a resource path.
import { XmlParser } from "@drorgl/xml-streamer";
const opts = { resourcePath: "/items/item" };
const parser = new XmlParser(opts);
parser.on("data", (data) => {
// consume the data object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you
// can wait for readable event and consume data by calling parser.read()
});
// 4. By passing a string or buffer to parse function
import { XmlParser } from "@drorgl/xml-streamer";
const opts = { resourcePath: "/items/item" }; // resourcePath is manditory when using parse method
const parser = new XmlParser(opts);
parser.parse(stringOrBuffer, (err, data) => {
// consume data here
});
// 5. Compressed Stream Parsing
import { XmlParser } from "@drorgl/xml-streamer";
import { StreamZip } from "node-stream-zip";
const zip = new StreamZip({
file: archiveName,
storeEntries: true
});
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});
zip.on("ready", () => {
zip.stream('path/inside/zip.xml', (err, stm) => {
stm.pipe(parser);
stm.on('end', () => zip.close());
});
});
```
@@ -121,6 +170,7 @@ npm install xml-streamer
* `#resume()` resumes
* `#read()` returns object if stream is readable
## Available Constructor Options
* `resourcePath`: `Type: String` Optional field. Used to extract the XML nodes that you are interested in.
@@ -144,6 +194,8 @@ npm install xml-streamer
if you are interested in `subitem` nodes then resourcePath would be: `/items/item/subitem`
if you are interested in `items` nodes then resourcePath would be: `/items`
* `emitOnNodeName`: `Type: Boolean` Optional field. Set this to true if you want to listen on node names instead of data event. `default: false`
// Ex: consider the above XML snippet
@@ -162,7 +214,9 @@ npm install xml-streamer
```
`NOTE:` when you set `emitOnNodeName:true` "data" events are emitted normally. So make sure you don't listen for both the events.
* `attrsKey`: `Type: String` Optional field. pass the value with which you want to reference attributes of a node in its object form. `default: '$'`
* `textKey`: `Type: String` Optional field. pass the value with which you want to reference node value in its object form. `default: '_'`
@@ -186,10 +240,54 @@ npm install xml-streamer
// Then set `attrsKey= "attrs"` and `textKey= "text"`
## upcoming features
* `explicitArray`: `Type: Boolean` Optional field. `Default value is true`. All children nodes will come in an array when this option is true.
1. `handling of compressed streams`
2. `handling of different encodings`
// Ex: For example let the XML be
```xml
<?xml version="1.0" encoding="utf-8"?>
<items>
<item id="1" test= 'hello'>
<subitem sub= "2">two</subitem>
</item>
</items>
```
// if explicitArray is true and resourcePath is /items/item.
// Output for above xml will be
```javascript
[
{ '$': { id: '1', test: 'hello' },
subitem: { '$': { sub: '2' }, _: 'two' } },
]
```
`caution:` When explicitArray set to false and if there are multiple children nodes with same name then last node will override all preceding nodes.
* `verbatimText`: `Type: Boolean` Optional field. `Default value is false`. When set, text attribute will include all blanks found in xml. When unset, blanks are removed as long as they come in one expat single block (blank lines, newlines and entities).
// Ex: For example let the XML be
```xml
<?xml version="1.0" encoding="utf-8"?>
<items>
<item>
This is
a test
</item>
</items>
```
// if verbatimText is true and resourcePath is /items/item.
// Output for above xml will be
```javascript
[
{ '_' : "\nThis is\na test\n "}
]
```
// if verbatimText is false and resourcePath is /items/item.
// Output for above xml will be
```javascript
[
{ '_' : "This isa test"}
]
```
## Namespace handling
@@ -197,24 +295,25 @@ npm install xml-streamer
A word about special parsing of *xmlns:* Note that "resourcePath" in the options is not an XPATH.
So the value given to the resourcePath is treated as simple value and no expression evaluations are done.
## Benchmark
`xml-streamer` internally uses `node-expat`
`npm run benchmark`
| module | ops/sec | native | XML compliant | stream |
|---------------------------------------------------------------------------------------|--------:|:------:|:-------------:|:--------------:|
| [sax-js](https://github.com/isaacs/sax-js) | 99,412 | ☐ | ☑ | ☑ |
| [node-xml](https://github.com/dylang/node-xml) | 130,631 | ☐ | ☑ | ☑ |
| [libxmljs](https://github.com/polotek/libxmljs) | 276,136 | ☑ | ☑ | ☐ |
| **node-expat** | 322,769 | ☑ | ☑ | ☑ |
Higher is better.
## Testing
```
npm install -g standard
npm test
```
## Coverage
```
npm coverage
=============================== Coverage summary ===============================
Statements : 90.91% ( 340/374 )
Branches : 81.66% ( 187/229 )
Functions : 78.13% ( 25/32 )
Lines : 90.86% ( 318/350 )
================================================================================
```
## Documentation
```
npm doc
```

View File

@@ -0,0 +1,41 @@
// Compressed Stream Parsing
import { XmlParser } from "../";
import { StreamZip } from "node-stream-zip";
const zip = new StreamZip({
file: archiveName,
storeEntries: true
});
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});
zip.on("ready", () => {
zip.stream('path/inside/zip.xml', (err, stm) => {
stm.pipe(parser);
stm.on('end', () => zip.close());
});
});

View File

@@ -0,0 +1,27 @@
// By listening for interested nodes.
import { XmlParser } from "../";
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});

11
examples/parse.ts Normal file
View File

@@ -0,0 +1,11 @@
// By passing a string or buffer to parse function
import { XmlParser } from "../";
const opts = { resourcePath: "/items/item" }; // resourcePath is manditory when using parse method
const parser = new XmlParser(opts);
parser.parse(stringOrBuffer, (err, data) => {
// consume data here
});

View File

@@ -0,0 +1,27 @@
// By passing the resourcePath and reading data by calling
// `read` method instead listening for data events.
import {XmlParser} from "../";
const opts = { resourcePath: "/items/item" };
const parser = new XmlParser(opts);
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait
// for readable event and consume data by calling parser.read()
});
// after readable event occured you can call read method and get data.
parser.read(); // will return one object at a time.

28
examples/resourcePath.ts Normal file
View File

@@ -0,0 +1,28 @@
// By passing a resource path.
import { XmlParser } from "../";
const opts = { resourcePath: "/items/item" };
const parser = new XmlParser(opts);
parser.on("data", (data) => {
// consume the data object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you
// can wait for readable event and consume data by calling parser.read()
});

View File

@@ -1,47 +1,78 @@
{
"version": "0.0.1",
"name": "xml-streamer",
"description": "XML stream parser for parsing large files efficiently with less usage of memory.",
"author": {
"name": "Sai Teja",
"email": "saitejas464@gmail.com"
},
"keywords": [
"xml",
"xml streaming",
"xml streamer",
"streaming",
"xml parser",
"xml parsing",
"xml2js",
"xmltojs"
],
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/Sai1919/xml-streamer"
},
"dependencies": {
"node-expat": "2.3.15",
"lodash": "4.16.6"
},
"devDependencies": {
"mocha": "^1.21.4",
"should": "11.1.1",
"standard": "8.5.0"
},
"optionalDependencies": {},
"main": "./parser",
"scripts": {
"test": "mocha && standard"
},
"maintainers": [
{
"name": "Sai Teja",
"email": "saitejas464@gmail.com"
}
],
"standard": {
"globals": [ "describe", "it" ]
}
"version": "0.3.1",
"name": "@tabshift/xml-streamer",
"description": "XML stream parser for parsing large files efficiently with less usage of memory.",
"author": {
"name": "Sai Teja",
"email": "saitejas464@gmail.com"
},
"keywords": [
"xml",
"xml streaming",
"xml streamer",
"streaming",
"xml parser",
"xml parsing",
"xml2js",
"xmltojs",
"node-expat",
"expat"
],
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/drorgl/xml-streamer"
},
"dependencies": {
"lodash": "4.17.11"
},
"devDependencies": {
"mocha": "^6.1.4",
"should": "^13.2.3",
"@types/lodash": "^4.14.133",
"@types/mocha": "^5.2.7",
"@types/node": "^12.0.4",
"@types/should": "^13.0.0",
"ts-node": "^8.2.0",
"tslint": "^5.17.0",
"typescript": "^3.5.1",
"rimraf": "^2.6.3",
"source-map-support": "^0.5.12",
"typedoc": "^0.14.2",
"nyc": "^14.1.1"
},
"optionalDependencies": {},
"main": "dist/parser.js",
"types": "dist/parser.d.ts",
"scripts": {
"performance-test": "node --prof node_modules/mocha/bin/_mocha -r ts-node/register test/**/*.spec.ts",
"performance-process": "node --prof-process isolate...",
"test-one": "mocha -r ts-node/register",
"test": "mocha -r ts-node/register test/**/*.spec.ts",
"lint": "tslint --project .",
"dryrun": "tsc -noEmit",
"build": "tsc",
"prepublish": "rimraf ./dist && npm run lint && npm run dryrun && npm run test && npm run build",
"coverage": "rimraf ./.nyc_output && rimraf ./coverage && nyc mocha -r ts-node/register -r source-map-support/register --ui bdd test/**/*.spec.{ts,tsx}\"",
"doc": "rimraf ./docs && typedoc",
"publish-now": "npm publish --access public"
},
"contributors": [
{
"name": "Dror Gluska",
"email": "drorgl@gmail.com"
}
],
"maintainers": [
{
"name": "Sai Teja",
"email": "saitejas464@gmail.com"
}
],
"standard": {
"globals": [
"describe",
"it"
]
}
}

252
parser.js
View File

@@ -1,252 +0,0 @@
var expat = require('node-expat')
var _ = require('lodash')
var util = require('util')
var stream = require('stream')
var ParserState = require('./parserState')
var defaults = {
resourcePath: '',
emitOnNodeName: false,
attrsKey: '$',
textKey: '_'
}
function XmlParser (opts) {
this.opts = _.defaults(opts, defaults)
this.parserState = new ParserState()
this.parser = new expat.Parser('UTF-8')
stream.Transform.call(this)
this._readableState.objectMode = true
}
util.inherits(XmlParser, stream.Transform)
XmlParser.prototype.checkForInterestedNodeListeners = function () {
var ignore = [ 'end', 'prefinish', 'data', 'error' ]
var eventNames = Object.keys(this._events)
for (var i = 0; i < eventNames.length; i++) {
if (_.includes(ignore, eventNames[i], 0)) continue
this.parserState.interestedNodes.push(eventNames[i])
}
}
XmlParser.prototype._transform = function (chunk, encoding, callback) {
if (encoding !== 'buffer') this.emit('error', new Error('unsupported encoding'))
if (this.parserState.isRootNode) this.checkForInterestedNodeListeners()
this.parse(chunk)
callback()
}
XmlParser.prototype.parse = function (chunk) {
var scope = this
var parser = this.parser
var state = this.parserState
var lastIndex
var resourcePath = this.opts.resourcePath
var attrsKey = this.opts.attrsKey
var textKey = this.opts.textKey
var interestedNodes = state.interestedNodes
if (state.isRootNode) registerEvents()
if (typeof chunk === 'string') {
if (!parser.parse('', true)) processError()
} else {
if (!parser.parse(chunk.toString())) processError()
}
function registerEvents () {
parser.on('startElement', function (name, attrs) {
if (state.isRootNode) validateResourcePath(name)
state.currentPath = state.currentPath + '/' + name
checkForResourcePath(name)
if (state.isPathfound) processStartElement(name, attrs)
})
parser.on('endElement', function (name) {
state.lastEndedNode = name
lastIndex = state.currentPath.lastIndexOf('/' + name)
state.currentPath = state.currentPath.substring(0, lastIndex)
if (state.isPathfound) processEndElement(name)
checkForResourcePath(name)
})
parser.on('text', function (text) {
if (state.isPathfound) processText(text)
})
parser.on('error', function (err) {
processError(err)
})
}
function processError (err) {
var error = ''
if (err) {
error = err
} else {
error = parser.getError()
}
scope.emit('error', new Error(error + ' at line no: ' + parser.getCurrentLineNumber()))
}
function processStartElement (name, attrs) {
if (!name) return
var obj = {}
if (attrs && !_.isEmpty(attrs)) obj[attrsKey] = attrs
var tempObj = state.object
var path = getRelativePath(name)
if (!path) {
if (attrs && !_.isEmpty(attrs)) state.object[attrsKey] = attrs
return
}
var tokens = path.split('.')
for (var i = 0; i < tokens.length; i++) {
if (tempObj[tokens[i]]) {
tempObj = tempObj[tokens[i]]
} else {
tempObj[tokens[i]] = []
tempObj = tempObj[tokens[i]]
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
}
tempObj.push(obj)
}
function processEndElement (name) {
if (resourcePath) {
var index = resourcePath.lastIndexOf('/')
var rpath = resourcePath.substring(0, index)
if (rpath === state.currentPath) {
scope.push(state.object)
if (scope.opts.emitOnNodeName) scope.emit(name, state.object)
state.object = {}
}
} else {
if (_.includes(interestedNodes, name, 0)) {
emitInterestedNode(name)
if (state.firstFoundNode === name) {
state.object = {}
state.firstFoundNode = ''
state.isPathfound = false
}
}
}
}
function emitInterestedNode (name) {
var index
var xpath
var pathTokens
xpath = state.currentPath.substring(1)
pathTokens = xpath.split('/')
pathTokens.push(name)
index = pathTokens.indexOf(state.firstFoundNode)
pathTokens = _.drop(pathTokens, index + 1)
var tempObj = state.object
for (var i = 0; i < pathTokens.length; i++) {
tempObj = tempObj[pathTokens[i]]
}
if (Array.isArray(tempObj)) tempObj = tempObj[tempObj.length - 1]
scope.emit(name, tempObj)
scope.push(tempObj)
}
function processText (text) {
if (!text || !/\S/.test(text)) {
return
}
var path = getRelativePath()
var tempObj = state.object
if (!path) {
if (!state.object[textKey]) state.object[textKey] = ''
state.object[textKey] = state.object[textKey] + text
return
}
var tokens = path.split('.')
for (var i = 0; i < tokens.length; i++) {
if (tempObj[tokens[i]]) {
tempObj = tempObj[tokens[i]]
} else {
tempObj[tokens[i]] = []
tempObj = tempObj[tokens[i]]
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) tempObj = tempObj[tempObj.length - 1]
}
var obj = tempObj[tempObj.length - 1]
if (!obj[textKey]) obj[textKey] = ''
obj[textKey] = obj[textKey] + text
}
function checkForResourcePath (name) {
if (resourcePath) {
if (state.currentPath.indexOf(resourcePath) === 0) {
state.isPathfound = true
} else {
state.isPathfound = false
}
} else {
if (_.includes(interestedNodes, name, 0)) {
state.isPathfound = true
if (!state.firstFoundNode) {
state.firstFoundNode = name
}
}
}
}
function getRelativePath () {
var tokens
var jsonPath
var index
if (resourcePath) {
var xpath = state.currentPath.substring(resourcePath.length)
if (!xpath) return
if (xpath[0] === '/') xpath = xpath.substring(1)
tokens = xpath.split('/')
jsonPath = tokens.join('.')
} else {
xpath = state.currentPath.substring(1)
tokens = xpath.split('/')
index = tokens.indexOf(state.firstFoundNode)
tokens = _.drop(tokens, index + 1)
jsonPath = tokens.join('.')
}
return jsonPath
}
function validateResourcePath (name) {
var temp
var index
state.isRootNode = false
if (resourcePath) {
if (resourcePath[0] === '/') {
temp = resourcePath.substring(1, resourcePath.length)
} else {
temp = resourcePath
}
index = temp.indexOf('/')
if (index !== -1) temp = temp.substring(0, index)
if (temp !== name) {
scope.end()
}
}
}
}
XmlParser.prototype._flush = function (callback) {
this.parse('', true)
callback()
}
module.exports = XmlParser

View File

@@ -1,13 +0,0 @@
function ParserState () {
this.currentPath = ''
this.lastEndedNode = ''
this.isPathfound = false
this.object = {}
this.paused = false
this.isRootNode = true
this.firstFoundNode = ''
this.interestedNodes = []
}
module.exports = ParserState

228
src/ltx.ts Normal file
View File

@@ -0,0 +1,228 @@
// Source: https://github.com/xmppjs/ltx/blob/master/lib/parsers/ltx.js
import events from 'events'
import { unescapeXML } from './unescape'
const STATE_TEXT = 0
const STATE_IGNORE_COMMENT = 1
const STATE_IGNORE_INSTRUCTION = 2
const STATE_TAG_NAME = 3
const STATE_TAG = 4
const STATE_ATTR_NAME = 5
const STATE_ATTR_EQ = 6
const STATE_ATTR_QUOT = 7
const STATE_ATTR_VALUE = 8
const STATE_CDATA = 9
const lineCounterRegExp = new RegExp('\n', 'g')
export class SaxLtx extends events.EventEmitter {
public remainder: string
public tagName: string
public attrs: any
public endTag: boolean
public selfClosing: boolean
public attrQuote: number
public attrQuoteChar: string
public recordStart = 0
public attrName: string
public state = STATE_TEXT
public currentLineNumber = 0
constructor() {
super()
}
public getCurrentLineNumber() {
return this.currentLineNumber + 1
}
public end(data?: Buffer) {
if (data) {
this.write(data)
}
this.removeAllListeners()
}
public write(data: Buffer | string) {
if (typeof data !== 'string') {
data = data.toString()
}
let pos = 0
const self = this
/* Anything from previous write()? */
if (self.remainder) {
data = self.remainder + data
pos += self.remainder.length
self.remainder = null
}
function endRecording() {
if (typeof self.recordStart === 'number') {
const recorded = (data as string).substring(self.recordStart, pos)
self.recordStart = undefined
return recorded
}
}
let prevPos = pos
for (; pos < data.length; pos++) {
if (self.state === STATE_TEXT) {
// if we're looping through text, fast-forward using indexOf to
// the next '<' character
const lt = data.indexOf('<', pos)
if (lt !== -1 && pos !== lt) {
pos = lt
}
} else if (self.state === STATE_ATTR_VALUE) {
// if we're looping through an attribute, fast-forward using
// indexOf to the next end quote character
const quot = data.indexOf(self.attrQuoteChar, pos)
if (quot !== -1) {
pos = quot
}
} else if (self.state === STATE_IGNORE_COMMENT) {
// if we're looping through a comment, fast-forward using
// indexOf to the first end-comment character
const endcomment = data.indexOf('-->', pos)
if (endcomment !== -1) {
pos = endcomment + 2 // target the '>' character
}
}
const newLines = (data.substring(prevPos, pos + 1).match(lineCounterRegExp) || []).length
self.currentLineNumber += newLines
prevPos = pos
const c = data.charCodeAt(pos)
switch (self.state) {
case STATE_TEXT:
if (c === 60 /* < */) {
const text = endRecording()
if (text) {
self.emit('text', unescapeXML(text))
}
self.state = STATE_TAG_NAME
self.recordStart = pos + 1
self.attrs = {}
}
break
case STATE_CDATA:
if (c === 93 /* ] */ && data.substr(pos + 1, 2) === ']>') {
const cData = endRecording()
if (cData) {
self.emit('text', cData)
}
self.state = STATE_IGNORE_COMMENT
}
break
case STATE_TAG_NAME:
if (c === 47 /* / */ && self.recordStart === pos) {
self.recordStart = pos + 1
self.endTag = true
} else if (c === 33 /* ! */) {
if (data.substr(pos + 1, 7) === '[CDATA[') {
self.recordStart = pos + 8
self.state = STATE_CDATA
} else if (data.substr(pos + 1, 7) === 'DOCTYPE') {
self.recordStart = pos + 8
self.state = STATE_TEXT
} else {
self.recordStart = undefined
self.state = STATE_IGNORE_COMMENT
}
} else if (c === 63 /* ? */) {
self.recordStart = undefined
self.state = STATE_IGNORE_INSTRUCTION
} else if (c <= 32 || c === 47 /* / */ || c === 62 /* > */) {
self.tagName = endRecording()
pos--
self.state = STATE_TAG
}
break
case STATE_IGNORE_COMMENT:
if (c === 62 /* > */) {
const prevFirst = data.charCodeAt(pos - 1)
const prevSecond = data.charCodeAt(pos - 2)
if (
(prevFirst === 45 /* - */ && prevSecond === 45) /* - */ ||
(prevFirst === 93 /* ] */ && prevSecond === 93) /* ] */
) {
self.state = STATE_TEXT
}
}
break
case STATE_IGNORE_INSTRUCTION:
if (c === 62 /* > */) {
const prev = data.charCodeAt(pos - 1)
if (prev === 63 /* ? */) {
self.state = STATE_TEXT
}
}
break
case STATE_TAG:
if (c === 62 /* > */) {
self._handleTagOpening(self.endTag, self.tagName, self.attrs)
self.tagName = undefined
self.attrs = undefined
self.endTag = undefined
self.selfClosing = undefined
self.state = STATE_TEXT
self.recordStart = pos + 1
} else if (c === 47 /* / */) {
self.selfClosing = true
} else if (c > 32) {
self.recordStart = pos
self.state = STATE_ATTR_NAME
}
break
case STATE_ATTR_NAME:
if (c <= 32 || c === 61 /* = */) {
self.attrName = endRecording()
pos--
self.state = STATE_ATTR_EQ
}
break
case STATE_ATTR_EQ:
if (c === 61 /* = */) {
self.state = STATE_ATTR_QUOT
}
break
case STATE_ATTR_QUOT:
if (c === 34 /* " */ || c === 39 /* ' */) {
self.attrQuote = c
self.attrQuoteChar = c === 34 ? '"' : "'"
self.state = STATE_ATTR_VALUE
self.recordStart = pos + 1
}
break
case STATE_ATTR_VALUE:
if (c === self.attrQuote) {
const value = unescapeXML(endRecording())
self.attrs[self.attrName] = value
self.attrName = undefined
self.state = STATE_TAG
}
break
}
}
if (typeof self.recordStart === 'number' && self.recordStart <= data.length) {
self.remainder = data.slice(self.recordStart)
self.recordStart = 0
}
}
private _handleTagOpening(endTag: boolean, tagName: string, attrs: string) {
if (!endTag) {
this.emit('startElement', tagName, attrs)
if (this.selfClosing) {
this.emit('endElement', tagName)
}
} else {
this.emit('endElement', tagName)
}
}
}

394
src/parser.ts Normal file
View File

@@ -0,0 +1,394 @@
import _ from 'lodash'
import stream from 'stream'
import util from 'util'
import { SaxLtx } from './ltx'
import { ParserState } from './parserState'
const defaults = {
resourcePath: '',
emitOnNodeName: false,
attrsKey: '$',
textKey: '_',
explicitArray: false,
verbatimText: false,
preserveWhitespace: false
}
export interface IXmlParserOptions {
/**
* Optional field. Used to extract the XML nodes that you are interested in.
*
* @type {string}
* @memberof IXmlParserOptions
*/
resourcePath?: string
/**
* Optional field. Set this to true if you want to listen on node names instead of data event. default: false
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
emitOnNodeName?: boolean
/**
* Optional field. pass the value with which you want to reference attributes of a node in its object form. default: '$'
*
* @type {string}
* @memberof IXmlParserOptions
*/
attrsKey?: string
/**
* Optional field. pass the value with which you want to reference node value in its object form. default: '_'
*
* @type {string}
* @memberof IXmlParserOptions
*/
textKey?: string
/**
* Optional field. Default value is true. All children nodes will come in an array when this option is true.
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
explicitArray?: boolean
/**
* Optional field. Default value is false. When set, text attribute will include all blanks found in xml.
* When unset, blanks are removed as long as they come in one expat single block (blank lines, newlines and entities).
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
verbatimText?: boolean
preserveWhitespace?: boolean
}
export class XmlParser extends stream.Transform {
public parserState: ParserState
private opts: IXmlParserOptions
private _readableState: { objectMode: true; buffer: any }
private parser: SaxLtx
constructor(opts?: IXmlParserOptions) {
super()
this.opts = _.defaults(opts, defaults)
this.parserState = new ParserState()
this.parser = new SaxLtx()
this._readableState.objectMode = true
}
public _flush(callback: () => void) {
this.processChunk('')
callback()
}
public _transform(chunk: Buffer | string, encoding: string, callback: () => void) {
if (encoding !== 'buffer') {
this.emit('error', new Error('unsupported encoding'))
}
this.processChunk(chunk)
callback()
}
public parse(chunk: Buffer | string, cb: (error: Error, data?: Buffer) => void) {
const parser = this.parser
const state = this.parserState
let error
if (state.isRootNode) {
this.checkForInterestedNodeListeners()
registerEvents.call(this)
}
this.on('error', (err) => {
error = err
})
if (chunk.length === 0) {
parser.end()
this.emit('end')
this.removeAllListeners()
}
parser.write(chunk)
if (error) {
return cb(error)
}
const result = []
while (this._readableState.buffer.length > 0) {
result.push(this._readableState.buffer.consume())
}
return cb(null, result as any)
}
private processChunk(chunk: string | Buffer) {
const parser = this.parser
const state = this.parserState
if (state.isRootNode) {
this.checkForInterestedNodeListeners()
registerEvents.call(this)
}
parser.write(chunk)
}
private checkForInterestedNodeListeners() {
const ignore = ['end', 'prefinish', 'data', 'error']
const eventNames = Object.keys((this as any)._events)
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < eventNames.length; i++) {
if (_.includes(ignore, eventNames[i], 0)) {
continue
}
this.parserState.interestedNodes.push(eventNames[i])
}
}
}
function registerEvents() {
const scope = this
const parser: SaxLtx = this.parser
const state: ParserState = this.parserState
let lastIndex
const resourcePath = this.opts.resourcePath
const attrsKey = this.opts.attrsKey
const textKey = this.opts.textKey
const interestedNodes = state.interestedNodes
const explicitArray = this.opts.explicitArray
const verbatimText = this.opts.verbatimText
const preserveWhitespace = this.opts.preserveWhitespace
parser.on('startElement', (name, attrs) => {
if (state.isRootNode) {
state.isRootNode = false
}
state.currentPath = state.currentPath + '/' + name
checkForResourcePath(name)
if (state.isPathfound) {
processStartElement(name, attrs)
}
})
parser.on('endElement', (name) => {
state.lastEndedNode = name
lastIndex = state.currentPath.lastIndexOf('/' + name)
if (state.currentPath.substring(lastIndex + 1).indexOf('/') !== -1) {
processError.call(this, `mismatched tag`)
}
state.currentPath = state.currentPath.substring(0, lastIndex)
if (state.isPathfound) {
processEndElement(name)
}
checkForResourcePath(name)
})
parser.on('text', (text) => {
if (state.isPathfound) {
processText(text)
}
})
parser.on('error', function (err) {
processError.call(this, err)
})
function processStartElement(name: string, attrs: any) {
if (!name) {
return
}
const obj: any = {}
if (attrs && !_.isEmpty(attrs)) {
obj[attrsKey] = attrs
}
let tempObj = state.object
const path = getRelativePath(/*name*/)
if (!path) {
if (attrs && !_.isEmpty(attrs)) {
state.object[attrsKey] = attrs
}
return
}
const tokens = path.split('.')
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i]
const isLastToken = i === tokens.length - 1
const doesTokenExist = token in tempObj
if (!doesTokenExist) {
tempObj[token] = explicitArray ? [] : obj
} else {
if (!Array.isArray(tempObj[token]) && isLastToken) tempObj[token] = [tempObj[token]]
}
tempObj = tempObj[token]
if (Array.isArray(tempObj) && !isLastToken) {
tempObj = tempObj[tempObj.length - 1]
}
}
if (Array.isArray(tempObj)) {
tempObj.push(obj)
}
}
function processEndElement(name: string) {
if (resourcePath) {
const index = resourcePath.lastIndexOf('/')
const rpath = resourcePath.substring(0, index)
if (rpath === state.currentPath) {
scope.push(state.object)
if (scope.opts.emitOnNodeName) {
scope.emit(name, state.object)
}
state.object = {}
}
} else {
if (_.includes(interestedNodes, name, 0)) {
emitInterestedNode(name)
if (state.firstFoundNode === name) {
state.object = {}
state.firstFoundNode = ''
state.isPathfound = false
}
}
}
}
function emitInterestedNode(name: string) {
let index
let xpath
let pathTokens
xpath = state.currentPath.substring(1)
pathTokens = xpath.split('/')
pathTokens.push(name)
index = pathTokens.indexOf(state.firstFoundNode)
pathTokens = _.drop(pathTokens, index + 1)
let tempObj = state.object
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < pathTokens.length; i++) {
tempObj = tempObj[pathTokens[i] as any]
}
if (Array.isArray(tempObj)) {
tempObj = tempObj[tempObj.length - 1]
}
scope.emit(name, tempObj)
scope.push(tempObj)
}
function processText(text: string) {
if (!text || (!verbatimText && !/\S/.test(text))) {
return
}
const path = getRelativePath()
let tempObj = state.object
if (!path) {
if (!state.object[textKey]) {
state.object[textKey] = ''
}
state.object[textKey] = state.object[textKey] + text
if (!preserveWhitespace) {
state.object[textKey] = state.object[textKey].replace(/\s+/g, ' ').trim()
}
return
}
const tokens = path.split('.')
for (let i = 0; i < tokens.length; i++) {
if (tempObj[tokens[i]]) {
tempObj = tempObj[tokens[i]]
} else {
tempObj[tokens[i]] = explicitArray ? [] : {}
tempObj = tempObj[tokens[i]]
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) {
tempObj = tempObj[tempObj.length - 1]
}
}
if (Array.isArray(tempObj)) {
const obj = tempObj[tempObj.length - 1]
if (!obj[textKey]) {
obj[textKey] = ''
}
obj[textKey] = obj[textKey] + text
if (!preserveWhitespace) {
obj[textKey] = obj[textKey].replace(/\s+/g, ' ').trim()
}
} else {
if (!tempObj[textKey]) {
tempObj[textKey] = ''
}
tempObj[textKey] = tempObj[textKey] + text
if (!preserveWhitespace) {
tempObj[textKey] = tempObj[textKey].replace(/\s+/g, ' ').trim()
}
}
}
function checkForResourcePath(name: string) {
if (resourcePath) {
if (state.currentPath.indexOf(resourcePath) === 0) {
state.isPathfound = true
} else {
state.isPathfound = false
}
} else {
if (_.includes(interestedNodes, name, 0)) {
state.isPathfound = true
if (!state.firstFoundNode) {
state.firstFoundNode = name
}
}
}
}
function getRelativePath() {
let tokens
let jsonPath
let index
if (resourcePath) {
let xpath = state.currentPath.substring(resourcePath.length)
if (!xpath) {
return
}
if (xpath[0] === '/') {
xpath = xpath.substring(1)
}
tokens = xpath.split('/')
jsonPath = tokens.join('.')
} else {
const xpath = state.currentPath.substring(1)
tokens = xpath.split('/')
index = tokens.indexOf(state.firstFoundNode)
tokens = _.drop(tokens, index + 1)
jsonPath = tokens.join('.')
}
return jsonPath
}
}
function processError(err: Error) {
const parser = this.parser
let error: Error = null
if (err) {
error = err
} else {
error = parser.getError()
}
error = new Error(`${error} at line no: ${parser.getCurrentLineNumber()}`)
this.emit('error', error)
return error
}

10
src/parserState.ts Normal file
View File

@@ -0,0 +1,10 @@
export class ParserState {
public currentPath = ''
public lastEndedNode = ''
public isPathfound = false
public object: any = {}
public paused = false
public isRootNode = true
public firstFoundNode = ''
public interestedNodes: string[] = []
}

86
src/unescape.ts Normal file
View File

@@ -0,0 +1,86 @@
const escapeXMLTable: { [char: string]: string } = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&apos;'
}
function escapeXMLReplace(match: string) {
return escapeXMLTable[match]
}
const unescapeXMLTable: { [char: string]: string } = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&apos;': "'"
}
function unescapeXMLReplace(match: string) {
if (match[1] === '#') {
let num
if (match[2] === 'x') {
num = parseInt(match.slice(3), 16)
} else {
num = parseInt(match.slice(2), 10)
}
// https://www.w3.org/TR/xml/#NT-Char defines legal XML characters:
// #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
if (
num === 0x9 ||
num === 0xa ||
num === 0xd ||
(num >= 0x20 && num <= 0xd7ff) ||
(num >= 0xe000 && num <= 0xfffd) ||
(num >= 0x10000 && num <= 0x10ffff)
) {
return String.fromCodePoint(num)
}
throw new Error('Illegal XML character 0x' + num.toString(16))
}
if (unescapeXMLTable[match]) {
return unescapeXMLTable[match] || match
}
throw new Error('Illegal XML entity ' + match)
}
export function escapeXML(s: string) {
return s.replace(/&|<|>|"|'/g, escapeXMLReplace)
}
export function unescapeXML(s: string) {
let result = ''
let start = -1
let end = -1
let previous = 0
start = s.indexOf('&', previous)
end = s.indexOf(';', start + 1)
while (start !== -1 && end !== -1) {
result = result + s.substring(previous, start) + unescapeXMLReplace(s.substring(start, end + 1))
previous = end + 1
start = s.indexOf('&', previous)
end = s.indexOf(';', start + 1)
}
// shortcut if loop never entered:
// return the original string without creating new objects
if (previous === 0) {
return s
}
// push the remaining characters
result = result + s.substring(previous)
return result
}
export function escapeXMLText(s: string) {
return s.replace(/&|<|>/g, escapeXMLReplace)
}
export function unescapeXMLText(s: string) {
return s.replace(/&(amp|#38|lt|#60|gt|#62);/g, unescapeXMLReplace)
}

View File

@@ -3,6 +3,7 @@
<item id="1" test= 'hello'>
<subitem sub= "TESTING SUB">one</subitem>
<subitem sub= "2">two</subitem>
<subitem sub= "2"/>
<item id="2">
<subitem>three</subitem>
<subitem>four</subitem>

153
test/basic.spec.ts Normal file
View File

@@ -0,0 +1,153 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('Basic behavior', () => {
it('should properly parse a simple file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const expectedData = [
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualData: string[] = []
let dataEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
should(err).not.be.ok()
done(err)
})
parser.on('end', () => {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(dataEventCount).equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a medium size file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
let dataEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(10)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
let dataEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(296)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a xml simple file in which nodes contain text values randomly.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/randomText.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const expectedData = [
{
$: { id: '1', test: 'hello' },
_: 'item one two',
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
_: 'item one two three four',
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualData: string[] = []
let dataEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(dataEventCount).equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
// console.log(parser)
let dataEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(2072)
done()
})
xmlStream.pipe(parser)
})
})

View File

@@ -0,0 +1,30 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('CData and comments in xml', () => {
it('should properly parse a simple file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/CData-comments.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
let dataEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(296)
done()
})
xmlStream.pipe(parser)
})
})

138
test/emit.spec.ts Normal file
View File

@@ -0,0 +1,138 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('emitOnNodeName', () => {
it('should properly emit events on node names.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item', emitOnNodeName: true })
const expectedData = [
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualData: string[] = []
const itemData: string[] = []
let dataEventCount = 0
let itemCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('item', (item) => {
itemData.push(item)
itemCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(dataEventCount).equal(2)
should(itemData).deepEqual(expectedData)
should(itemCount).equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly emit events on node names while parsing a medium size file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
const parser = new XmlParser({ resourcePath: '/items/item', emitOnNodeName: true })
let dataEventCount = 0
let itemCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('item', (data) => {
itemCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(10)
should(itemCount).equal(10)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
const parser = new XmlParser({ resourcePath: '/items/item', emitOnNodeName: true })
let dataEventCount = 0
let itemCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('item', (data) => {
itemCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(296)
should(itemCount).equal(296)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
const parser = new XmlParser({ resourcePath: '/items/item', emitOnNodeName: true })
let dataEventCount = 0
let itemCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('item', (item) => {
itemCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(2072)
should(itemCount).equal(2072)
done()
})
xmlStream.pipe(parser)
})
})

44
test/error.spec.ts Normal file
View File

@@ -0,0 +1,44 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe.skip('Error Handling', () => {
it('should properly return error if the xml file is corrupted.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/corrupted.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
let dataEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
// console.log(err)
should(err.message).equal('mismatched tag at line no: 12')
done()
})
xmlStream.pipe(parser)
})
it('should properly return error if the large xml file is corrupted.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/largeCorruptedFile.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
let dataEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
// console.log(err)
should(err.message).equal('mismatched tag at line no: 8346')
done()
})
xmlStream.pipe(parser)
})
})

270
test/explicit_array.spec.ts Normal file
View File

@@ -0,0 +1,270 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('should respect explicitArray constructor option', () => {
it('should properly parse a simple file with explicitArray set to false.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item', explicitArray: false })
const expectedData = [
{
$: { id: '1', test: 'hello' },
subitem: { $: { sub: '2' }, _: 'two' }
},
{
$: { id: '2' },
subitem: { _: 'five' }
}
]
parser.parse(xml.toString(), (err, data) => {
if (err) {
done(err)
}
// console.log('data=', JSON.stringify(data))
should(data).deepEqual(expectedData)
done()
})
})
it('should properly parse a medium size file with explicitArray set to false.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/medium.xml')
const parser = new XmlParser({ resourcePath: '/items/item', explicitArray: false })
const expectedData = [
{
$: {
id: '1',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
},
{
$: {
id: '2'
},
subitem: {
_: 'five'
}
},
{
$: {
id: '3',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
},
{
$: {
id: '4',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
},
{
$: {
id: '5',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
},
{
$: {
id: '6',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
},
{
$: {
id: '7',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
},
{
$: {
id: '8',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
},
{
$: {
id: '9',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
},
{
$: {
id: '10',
test: 'hello'
},
subitem: {
$: {
sub: '2'
},
_: 'two'
}
}
]
parser.parse(xml, (err, data) => {
if (err) {
done(err)
}
should(data).deepEqual(expectedData)
should(data.length).equal(10)
done()
})
})
it('should properly parse a file containing many nodes when explicitArray set to false.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/manyItems.xml')
const parser = new XmlParser({ resourcePath: '/items/item', explicitArray: false })
parser.parse(xml, (err, data) => {
if (err) {
done(err)
}
should(data.length).equal(296)
done()
})
})
it('should properly parse a xml simple file in which nodes contain text values randomly when explicitArray set to false.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/randomText.xml')
const parser = new XmlParser({ resourcePath: '/items/item', explicitArray: false })
const expectedData = [
{
$: { id: '1', test: 'hello' },
_: 'item one two',
subitem: { $: { sub: '2' }, _: 'two' }
},
{
$: { id: '2' },
_: 'item one two three four',
subitem: { _: 'five' }
}
]
parser.parse(xml, (err, data) => {
if (err) {
done(err)
}
should(data).deepEqual(expectedData)
should(data.length).equal(2)
done()
})
})
it('should properly parse a huge file with explicitArray set to false.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/hugeFile.xml')
const parser = new XmlParser({ resourcePath: '/items/item', explicitArray: false })
// console.log(parser)
parser.parse(xml, (err, data) => {
if (err) {
done(err)
}
should(data.length).equal(2072)
done()
})
})
it('should properly return error if the xml file is corrupted.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/corrupted.xml')
const parser = new XmlParser({ resourcePath: '/items/item', explicitArray: false })
parser.parse(xml, (err, data) => {
// console.log(err)
should(err.message).equal('mismatched tag at line no: 12')
should(data).not.be.ok()
done()
})
})
it('should properly generate objects when special symbols are passed as attrs and text keys and explicitArray is false in the options.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({
resourcePath: '/items/item',
attrsKey: '!',
textKey: '%',
explicitArray: false
})
const expectedData = [
{
'!': { id: '1', test: 'hello' },
subitem: { '!': { sub: '2' }, '%': 'two' }
},
{
'!': { id: '2' },
subitem: { '%': 'five' }
}
]
const actualData: string[] = []
let dataEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(dataEventCount).equal(2)
done()
})
xmlStream.pipe(parser)
})
})

336
test/interested.spec.ts Normal file
View File

@@ -0,0 +1,336 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('interested Nodes', () => {
it('should properly parse a simple file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser()
const expectedData = [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' },
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{ _: 'three' },
{ _: 'four' },
{ _: 'five' },
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualData: string[] = []
let dataEventCount = 0
const expectedItems = [
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualItems: string[] = []
const actualSubitems: string[] = []
const expectedSubitems = [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' },
{ _: 'three' },
{ _: 'four' },
{ _: 'five' }
]
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
should(err).not.be.ok()
done(err)
})
parser.on('item', (item) => {
actualItems.push(item)
})
parser.on('subitem', (subitem) => {
actualSubitems.push(subitem)
})
parser.on('end', () => {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(actualItems).deepEqual(expectedItems)
should(actualSubitems).deepEqual(expectedSubitems)
should(actualSubitems.length).equal(5)
should(actualItems.length).equal(2)
should(dataEventCount).equal(7)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a medium size file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
const parser = new XmlParser()
let dataEventCount = 0
let itemEventCount = 0
let subitemEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('item', (item) => {
itemEventCount++
})
parser.on('subitem', (subitem) => {
subitemEventCount++
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
should(dataEventCount).equal(31)
should(itemEventCount).equal(10)
should(subitemEventCount).equal(21)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
const parser = new XmlParser()
let dataEventCount = 0
let itemEventCount = 0
let subitemEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('item', (item) => {
itemEventCount++
})
parser.on('subitem', (subitem) => {
subitemEventCount++
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
should(itemEventCount).equal(296)
should(subitemEventCount).equal(600)
should(dataEventCount).equal(896)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a xml simple file in which nodes contain text values randomly.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/randomText.xml')
const parser = new XmlParser()
const expectedData = [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' },
{
$: { id: '1', test: 'hello' },
_: 'item one two',
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{ _: 'three' },
{ _: 'four' },
{ _: 'five' },
{
$: { id: '2' },
_: 'item one two three four',
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const expectedItems = [
{
$: { id: '1', test: 'hello' },
_: 'item one two',
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
_: 'item one two three four',
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualItems: string[] = []
const actualSubitems: string[] = []
const expectedSubitems = [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' },
{ _: 'three' },
{ _: 'four' },
{ _: 'five' }
]
const actualData: string[] = []
let dataEventCount = 0
let itemEventCount = 0
let subitemEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('item', (item) => {
itemEventCount++
actualItems.push(item)
})
parser.on('subitem', (subitem) => {
subitemEventCount++
actualSubitems.push(subitem)
})
parser.on('end', () => {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
should(actualData).deepEqual(expectedData)
should(actualItems).deepEqual(expectedItems)
should(actualSubitems).deepEqual(expectedSubitems)
should(dataEventCount).equal(7)
should(itemEventCount).equal(2)
should(subitemEventCount).equal(5)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
const parser = new XmlParser()
let dataEventCount = 0
let itemEventCount = 0
let subitemEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('item', (item) => {
itemEventCount++
})
parser.on('subitem', (subitem) => {
subitemEventCount++
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
should(dataEventCount).equal(6272)
should(itemEventCount).equal(2072)
should(subitemEventCount).equal(4200)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a simple file and return when root element when listening on it.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser()
const expectedData = [
{
item: [
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
}
]
const actualData: string[] = []
let dataEventCount = 0
let itemsEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
should(err).not.be.ok()
done(err)
})
parser.on('items', (item) => {
itemsEventCount++
})
parser.on('end', () => {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemsEventCount)
should(actualData).deepEqual(expectedData)
should(itemsEventCount).equal(1)
should(dataEventCount).equal(1)
done()
})
xmlStream.pipe(parser)
})
})

123
test/options.spec.ts Normal file
View File

@@ -0,0 +1,123 @@
import fs from 'fs'
import 'mocha'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('should respect the options passed', () => {
it('should properly generate objects with $ as key for attrs and _ as key for text value of node.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const expectedData = [
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualData: string[] = []
let dataEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(dataEventCount).equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly generate objects with passed attrs and text keys in the options.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item', attrsKey: 'attrs', textKey: 'text' })
const expectedData = [
{
attrs: { id: '1', test: 'hello' },
subitem: [
{ attrs: { sub: 'TESTING SUB' }, text: 'one' },
{ attrs: { sub: '2' }, text: 'two' }
]
},
{
attrs: { id: '2' },
subitem: [{ text: 'three' }, { text: 'four' }, { text: 'five' }]
}
]
const actualData: string[] = []
let dataEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(dataEventCount).equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should properly generate objects when special symbols are passed as attrs and text keys in the options.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item', attrsKey: '!', textKey: '%' })
const expectedData = [
{
'!': { id: '1', test: 'hello' },
subitem: [
{ '!': { sub: 'TESTING SUB' }, '%': 'one' },
{ '!': { sub: '2' }, '%': 'two' }
]
},
{
'!': { id: '2' },
subitem: [{ '%': 'three' }, { '%': 'four' }, { '%': 'five' }]
}
]
const actualData: string[] = []
let dataEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', JSON.stringify(actualData, null, 1))
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(dataEventCount).equal(2)
done()
})
xmlStream.pipe(parser)
})
})

114
test/parse.spec.ts Normal file
View File

@@ -0,0 +1,114 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('Parse function should work properly', () => {
it('should properly parse a simple file.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const expectedData = [
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
parser.parse(xml.toString(), (err, data) => {
if (err) {
done(err)
}
should(data).deepEqual(expectedData)
done()
})
})
it('should properly parse a medium size file.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/medium.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
parser.parse(xml, (err, data) => {
if (err) {
done(err)
}
should(data.length).equal(10)
done()
})
})
it('should properly parse a file containing many nodes.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/manyItems.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
parser.parse(xml, (err, data) => {
if (err) {
done(err)
}
should(data.length).equal(296)
done()
})
})
it('should properly parse a xml simple file in which nodes contain text values randomly.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/randomText.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const expectedData = [
{
$: { id: '1', test: 'hello' },
_: 'item one two',
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
_: 'item one two three four',
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
parser.parse(xml, (err, data) => {
if (err) {
done(err)
}
should(data).deepEqual(expectedData)
should(data.length).equal(2)
done()
})
})
it('should properly parse a huge file.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/hugeFile.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
// console.log(parser)
parser.parse(xml, (err, data) => {
if (err) {
done(err)
}
should(data.length).equal(2072)
done()
})
})
it('should properly return error if the xml file is corrupted.', (done) => {
const xml = fs.readFileSync('./test/TestFiles/corrupted.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
parser.parse(xml, (err, data) => {
// console.log(err)
should(err.message).equal('mismatched tag at line no: 12')
should(data).not.be.ok()
done()
})
})
})

View File

@@ -0,0 +1,84 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('pause and resume', () => {
it('should properly parse a simple file.', function (done) {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const expectedData = [
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualData: string[] = []
let dataEventCount = 0
let isSetTimeoutHappened = true
this.timeout(4000)
parser.on('data', (data) => {
actualData.push(data)
parser.pause()
should(isSetTimeoutHappened).equal(true)
setTimeout(() => {
parser.resume()
isSetTimeoutHappened = true
}, 1000)
dataEventCount++
isSetTimeoutHappened = false
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
should(actualData).deepEqual(expectedData)
should(dataEventCount).equal(2)
done()
})
xmlStream.pipe(parser)
})
it('should emit data events with 1sec interval between each using pause and resume.', function (done) {
const xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
let dataEventCount = 0
let isSetTimeoutHappened = true
this.timeout(20000)
parser.on('data', (data) => {
parser.pause()
should(isSetTimeoutHappened).equal(true)
setTimeout(() => {
parser.resume()
isSetTimeoutHappened = true
}, 1000)
dataEventCount++
isSetTimeoutHappened = false
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(10)
done()
})
xmlStream.pipe(parser)
})
})

102
test/performance.spec.ts Normal file
View File

@@ -0,0 +1,102 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe.skip('performance testing', () => {
it('should properly parse more than 500 MB of file.', function (done) {
const parser = new XmlParser({ resourcePath: '/items/item' })
// var wsStream = fs.createWriteStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
// var rsStream = fs.createReadStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
let dataEventCount = 0
// var maxRSSMemoryTaken = 0
// var rss
const startTime = Date.now()
const xmlStream = new stream.Readable()
xmlStream._read = function noop() {
// nop
}
let dataChunk
this.timeout(900000)
const firstChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/firstChunk.xml')
xmlStream.push(firstChunk)
for (let i = 0; i < 2200; i++) {
dataChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/repetitiveChunk.xml')
xmlStream.push(dataChunk)
}
const endingChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/endingChunk.xml')
xmlStream.push(endingChunk)
xmlStream.push(null)
parser.on('data', (data) => {
// rss = process.memoryUsage().rss
// if (rss > maxRSSMemoryTaken) maxRSSMemoryTaken = rss
dataEventCount++
})
parser.on('error', (err) => {
should(err).not.be.ok()
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
// console.log('RSS memory=', rss)
const TimeTaken = Date.now() - startTime
// console.log('time taken=', TimeTaken)
should(TimeTaken).be.belowOrEqual(300000)
should(dataEventCount).equal(4558400)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse more than 1 GB of file.', function (done) {
const parser = new XmlParser({ resourcePath: '/items/item' })
// var wsStream = fs.createWriteStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
// var rsStream = fs.createReadStream('./test/TestFiles/MB_and_GB_size_files/MBFile.xml')
let dataEventCount = 0
// var maxRSSMemoryTaken = 0
// var rss
const startTime = Date.now()
const xmlStream = new stream.Readable()
xmlStream._read = function noop() {
// nop
}
let dataChunk
this.timeout(900000)
const firstChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/firstChunk.xml')
xmlStream.push(firstChunk)
for (let i = 0; i < 4400; i++) {
dataChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/repetitiveChunk.xml')
xmlStream.push(dataChunk)
}
const endingChunk = fs.readFileSync('./test/TestFiles/MB_and_GB_size_files/endingChunk.xml')
xmlStream.push(endingChunk)
xmlStream.push(null)
parser.on('data', (data) => {
// rss = process.memoryUsage().rss
// if (rss > maxRSSMemoryTaken) maxRSSMemoryTaken = rss
dataEventCount++
})
parser.on('error', (err) => {
should(err).not.be.ok()
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
// console.log('RSS memory=', rss)
const TimeTaken = Date.now() - startTime
// console.log('time taken=', TimeTaken)
should(TimeTaken).be.belowOrEqual(700000)
should(dataEventCount).equal(9116800)
done()
})
xmlStream.pipe(parser)
})
})

117
test/read.spec.ts Normal file
View File

@@ -0,0 +1,117 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('read method', () => {
it('should properly parse a simple file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const expectedData = [
{
$: { id: '1', test: 'hello' },
subitem: [
{ $: { sub: 'TESTING SUB' }, _: 'one' },
{ $: { sub: '2' }, _: 'two' }
]
},
{
$: { id: '2' },
subitem: [{ _: 'three' }, { _: 'four' }, { _: 'five' }]
}
]
const actualData: string[] = []
let obj
let Timeout: any
parser.on('readable', () => {
Timeout = setInterval(() => {
obj = parser.read()
if (obj) {
actualData.push(obj)
}
obj = null
}, 50)
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
clearInterval(Timeout)
should(actualData).deepEqual(expectedData)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a file containing many nodes.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
let objCount = 0
let endEventOcurred = false
parser.on('readable', () => {
read()
})
function read() {
while (parser.read()) {
objCount++
}
if (!endEventOcurred) {
setTimeout(read, 50)
}
}
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
endEventOcurred = true
// console.log(objCount)
should(objCount).deepEqual(296)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
let objCount = 0
let endEventOcurred = false
parser.on('readable', () => {
read()
})
function read() {
while (parser.read()) {
objCount++
}
if (!endEventOcurred) {
setTimeout(read, 50)
}
}
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
endEventOcurred = true
// console.log(objCount);
should(objCount).deepEqual(2072)
done()
})
xmlStream.pipe(parser)
})
})

111
test/same_name.spec.ts Normal file
View File

@@ -0,0 +1,111 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('nodes with same names', () => {
it('should properly parse a simple file containing nodes with same names.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNames.xml')
const parser = new XmlParser()
const actualData: string[] = []
const actualItems: string[] = []
let dataEventCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('error', (err) => {
should(err).not.be.ok()
done(err)
})
parser.on('item', (item) => {
actualItems.push(item)
})
parser.on('end', () => {
should(actualItems.length).equal(18)
should(dataEventCount).equal(18)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a simple file containing nodes with same names and emit events on multiple nodes.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNames.xml')
const parser = new XmlParser()
let dataEventCount = 0
let itemEventCount = 0
let subitemEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
should(err).not.be.ok()
done(err)
})
parser.on('item', (item) => {
itemEventCount++
})
parser.on('subitem', (subitem) => {
subitemEventCount++
})
parser.on('end', () => {
should(itemEventCount).equal(18)
should(subitemEventCount).equal(13)
should(dataEventCount).equal(31)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a medium size file with same names randomly.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/nodesWithSameNamesRandomly.xml')
const parser = new XmlParser()
let dataEventCount = 0
let itemEventCount = 0
let subitemEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('item', (item) => {
itemEventCount++
})
parser.on('subitem', (subitem) => {
subitemEventCount++
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
// console.log('itemEventCount=', itemEventCount)
// console.log('subitemEventCount=', subitemEventCount)
should(dataEventCount).equal(32)
should(itemEventCount).equal(19)
should(subitemEventCount).equal(13)
done()
})
xmlStream.pipe(parser)
})
})

File diff suppressed because it is too large Load Diff

64
test/uncompressed.spec.ts Normal file
View File

@@ -0,0 +1,64 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('should properly handle uncompressed files', () => {
it('should properly parse a uncompressed xml file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const gzip = zlib.createGzip()
const gunzip = zlib.createGunzip()
let dataEventCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(10)
done()
})
xmlStream.pipe(gzip).pipe(gunzip).pipe(parser)
})
it('should properly parse uncompressed file and go fine with pause and resume.', function (done) {
const xmlStream = fs.createReadStream('./test/TestFiles/medium.xml')
const parser = new XmlParser({ resourcePath: '/items/item' })
const gzip = zlib.createGzip()
const gunzip = zlib.createGunzip()
let dataEventCount = 0
let isSetTimeoutHappened = true
this.timeout(20000)
parser.on('data', (data) => {
parser.pause()
should(isSetTimeoutHappened).equal(true)
setTimeout(() => {
parser.resume()
isSetTimeoutHappened = true
}, 2000)
dataEventCount++
isSetTimeoutHappened = false
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(10)
done()
})
xmlStream.pipe(gzip).pipe(gunzip).pipe(parser)
})
})

View File

@@ -0,0 +1,99 @@
import fs from 'fs'
import 'mocha'
import should from 'should'
import stream from 'stream'
import zlib from 'zlib'
import { XmlParser } from '../src/parser'
describe('wrong resourcePath', () => {
it('should be able to detect the wrong resourcePath at root level.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/item.xml')
const parser = new XmlParser({ resourcePath: '/wrong/noNodes', emitOnNodeName: true })
const actualData: string[] = []
const itemData: string[] = []
let dataEventCount = 0
let itemCount = 0
parser.on('data', (data) => {
actualData.push(data)
dataEventCount++
})
parser.on('item', (item) => {
itemData.push(item)
itemCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('actualData=', actualData)
// console.log('dataEventCount=', dataEventCount)
should(actualData.length).equal(0)
should(dataEventCount).equal(0)
should(itemData.length).equal(0)
should(itemCount).equal(0)
done()
})
xmlStream.pipe(parser)
})
it('should be able to detect wrong resourcePath while parsing xml', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/manyItems.xml')
const parser = new XmlParser({ resourcePath: '/wrong/noNodes', emitOnNodeName: true })
let dataEventCount = 0
let itemCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('item', (data) => {
itemCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(0)
should(itemCount).equal(0)
done()
})
xmlStream.pipe(parser)
})
it('should properly parse a huge file.', (done) => {
const xmlStream = fs.createReadStream('./test/TestFiles/hugeFile.xml')
const parser = new XmlParser({ resourcePath: '/wrong/path', emitOnNodeName: true })
let dataEventCount = 0
let itemCount = 0
parser.on('data', (data) => {
dataEventCount++
})
parser.on('item', (item) => {
itemCount++
})
parser.on('error', (err) => {
done(err)
})
parser.on('end', () => {
// console.log('dataEventCount=', dataEventCount)
should(dataEventCount).equal(0)
should(itemCount).equal(0)
done()
})
xmlStream.pipe(parser)
})
})

46
tsconfig.json Normal file
View File

@@ -0,0 +1,46 @@
{
"compilerOptions": {
"types": [
"node"
],
"outDir": "dist",
"moduleResolution": "node",
"module": "commonjs",
"removeComments": false,
"sourceMap": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports":true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"noImplicitAny": true,
"declaration": true,
"resolveJsonModule": true,
"target":"es2017",
"lib": ["es2017"],
"typeRoots": [
"./node_modules/@types",
]
},
"include": [
"src/**/*"
],
"exclude": [
"node_modules",
"dist"
],
"typedocOptions":{
"exclude": ["**/*spec.ts"],
"excludeExternals": true,
"excludeNotExported": true,
"excludePrivate": true,
"hideGenerator": true,
"includes": "./src",
"out": "docs",
"module": "commonjs",
"stripInternal": true,
"mode": "modules",
"theme": "default",
"moduleResolution": "node",
"preserveConstEnums": true
}
}

22
tslint.json Normal file
View File

@@ -0,0 +1,22 @@
{
"rules": {
"no-console": [false],
"variable-name": false,
"radix": false,
"object-literal-sort-keys": false,
"trailing-comma":[false],
"indent": [true,"tabs"],
"max-line-length": [false],
"no-string-literal": false,
"class-name": false,
"no-namespace": [false],
"no-bitwise": false
},
"extends": "tslint:recommended",
"linterOptions": {
"exclude":[
"*.json",
"**/*.json"
]
}
}