cleanup documentation, examples and coverage

This commit is contained in:
Dror Gluska
2019-07-05 00:23:29 +03:00
parent c3a177244b
commit fa375b2d5a
16 changed files with 2040 additions and 265 deletions

2
.gitignore vendored
View File

@@ -36,3 +36,5 @@ jspm_packages
# Optional REPL history
.node_repl_history
/dist
/docs
drorgl-xml-streamer-*.tgz

View File

@@ -1,3 +1,12 @@
# Dependency directories
test
.travis.yml
tsconfig.json
tslint.json
.nyc_output
.vscode
coverage
docs
src
drorgl-xml-streamer*
.nycrc

20
.nycrc Normal file
View File

@@ -0,0 +1,20 @@
{
"extension": [
".ts"
],
"require": [
"ts-node/register"
],
"include": [
"src/**/*.ts"
],
"exclude": [
"**/*.d.ts"
],
"reporter": [
"html",
"text-summary",
"text"
],
"all": true
}

View File

@@ -3,9 +3,13 @@
"Dror",
"Gluska",
"Teja",
"apos",
"drorgl",
"dryrun",
"gmail",
"prefinish",
"saitejas",
"typedoc",
"xmltojs"
]
}

204
README.md
View File

@@ -1,13 +1,8 @@
# xml-streamer
[![Build Status](https://travis-ci.org/Sai1919/xml-streamer.svg?branch=master)](https://travis-ci.org/Sai1919/xml-streamer)
## Motivation
You use [Node.js](https://nodejs.org) for speed? You process XML streams? Then you want the fastest XML to JS parser: `xml-streamer`, based on [node-expat](https://github.com/astro/node-expat) and It implements the Node.js `stream.Transform API`.
## IMPORTANT
This is a modified version of xml-streamer, the parser + tests stayed mostly the same but the core xml parser was replaced with [SaxLtx xml parser](https://github.com/xmppjs/ltx) due to reliability issues with node-expat, both this library and ltx were converted to typescript.
This is a modified version of xml-streamer, the parser + tests stayed mostly the same but the core xml parser was replaced with [SaxLtx xml parser](https://github.com/xmppjs/ltx) due to reliability/stability issues with node-expat, both this library and ltx were converted to typescript.
Please note that ltx parser is about 20% slower than node-expat.
## Install
@@ -20,115 +15,148 @@ npm install xml-streamer
`xml-streamer can be used in four ways`
```javascript
```typescript
// 1. By passing the resourcePath and reading data by calling `read` method instead listening for data events.
(function () {
"use strict";
import {XmlParser} from "@drorgl/xml-streamer";
var Parser = require('xml-streamer')
const opts = { resourcePath: "/items/item" };
var opts = {resourcePath: '/items/item'}
const parser = new XmlParser(opts);
var parser = new Parser(opts)
parser.on('end', function () {
parser.on("end", () => {
// parsing ended no more data events will be raised
})
});
parser.on('error', function (error) {
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
console.error(error);
});
xmlStream.pipe(parser) // pipe your input xmlStream to parser.
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on('readable', function () {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
})
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait
// for readable event and consume data by calling parser.read()
});
// after readable event occured you can call read method and get data.
parser.read() // will return one object at a time.
}())
parser.read(); // will return one object at a time.
// 2. By listening for interested nodes.
(function () {
"use strict";
import { XmlParser } from "@drorgl/xml-streamer";
var Parser = require('xml-streamer')
const opts = {}; // see `Available Constructor Options` section below.
var opts = {} // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
var parser = new Parser(opts)
parser.on('item', function (item) {
parser.on("item", (item) => {
// consume the item object here
})
});
parser.on('end', function () {
parser.on("end", () => {
// parsing ended no more data events will be raised
})
});
parser.on('error', function (error) {
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
console.error(error);
});
xmlStream.pipe(parser) // pipe your input xmlStream to parser.
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on('readable', function () {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
})
}())
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait
//for readable event and consume data by calling parser.read()
});
// 3. By passing a resource path.
(function () {
"use strict";
import { XmlParser } from "@drorgl/xml-streamer";
var Parser = require('xml-streamer')
var opts = {resourcePath: '/items/item'}
const opts = { resourcePath: "/items/item" };
var parser = new Parser(opts)
const parser = new XmlParser(opts);
parser.on('data', function (data) {
parser.on("data", (data) => {
// consume the data object here
})
});
parser.on('end', function () {
parser.on("end", () => {
// parsing ended no more data events will be raised
})
});
parser.on('error', function (error) {
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error)
})
console.error(error);
});
xmlStream.pipe(parser) // pipe your input xmlStream to parser.
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on('readable', function () {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
})
}())
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you
// can wait for readable event and consume data by calling parser.read()
});
// 4. By passing a string or buffer to parse function
(function () {
"use strict";
import { XmlParser } from "@drorgl/xml-streamer";
var Parser = require('xml-streamer')
const opts = { resourcePath: "/items/item" }; // resourcePath is manditory when using parse method
var opts = {resourcePath: '/items/item'} // resourcePath is manditory when using parse method
const parser = new XmlParser(opts);
var parser = new Parser(opts)
parser.parse(stringOrBuffer, function (err, data) {
parser.parse(stringOrBuffer, (err, data) => {
// consume data here
})
}())
});
// 5. Compressed Stream Parsing
import { XmlParser } from "@drorgl/xml-streamer";
import { StreamZip } from "node-stream-zip";
const zip = new StreamZip({
file: archiveName,
storeEntries: true
});
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});
zip.on("ready", () => {
zip.stream('path/inside/zip.xml', (err, stm) => {
stm.pipe(parser);
stm.on('end', () => zip.close());
});
});
```
@@ -262,36 +290,30 @@ npm install xml-streamer
```
## upcoming features
1. `handling of compressed streams`
2. `handling of different encodings`
3. `Filtering of objects extracted from resourcePath based on xpaths and json paths`
## Namespace handling
A word about special parsing of *xmlns:* Note that "resourcePath" in the options is not an XPATH.
So the value given to the resourcePath is treated as simple value and no expression evaluations are done.
## Benchmark
`xml-streamer` internally uses `node-expat`
`npm run benchmark`
| module | ops/sec | native | XML compliant | stream |
|---------------------------------------------------------------------------------------|--------:|:------:|:-------------:|:--------------:|
| [sax-js](https://github.com/isaacs/sax-js) | 99,412 | ☐ | ☑ | ☑ |
| [node-xml](https://github.com/dylang/node-xml) | 130,631 | ☐ | ☑ | ☑ |
| [libxmljs](https://github.com/polotek/libxmljs) | 276,136 | ☑ | ☑ | ☐ |
| **node-expat** | 322,769 | ☑ | ☑ | ☑ |
Higher is better.
## Testing
```
npm install -g standard
npm test
```
## Coverage
```
npm coverage
=============================== Coverage summary ===============================
Statements : 90.91% ( 340/374 )
Branches : 81.66% ( 187/229 )
Functions : 78.13% ( 25/32 )
Lines : 90.86% ( 318/350 )
================================================================================
```
## Documentation
```
npm doc
```

View File

@@ -0,0 +1,41 @@
// Compressed Stream Parsing
import { XmlParser } from "../";
import { StreamZip } from "node-stream-zip";
const zip = new StreamZip({
file: archiveName,
storeEntries: true
});
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});
zip.on("ready", () => {
zip.stream('path/inside/zip.xml', (err, stm) => {
stm.pipe(parser);
stm.on('end', () => zip.close());
});
});

View File

@@ -0,0 +1,27 @@
// By listening for interested nodes.
import { XmlParser } from "../";
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});

11
examples/parse.ts Normal file
View File

@@ -0,0 +1,11 @@
// By passing a string or buffer to parse function
import { XmlParser } from "../";
const opts = { resourcePath: "/items/item" }; // resourcePath is manditory when using parse method
const parser = new XmlParser(opts);
parser.parse(stringOrBuffer, (err, data) => {
// consume data here
});

View File

@@ -0,0 +1,27 @@
// By passing the resourcePath and reading data by calling
// `read` method instead listening for data events.
import {XmlParser} from "../";
const opts = { resourcePath: "/items/item" };
const parser = new XmlParser(opts);
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait
// for readable event and consume data by calling parser.read()
});
// after readable event occured you can call read method and get data.
parser.read(); // will return one object at a time.

28
examples/resourcePath.ts Normal file
View File

@@ -0,0 +1,28 @@
// By passing a resource path.
import { XmlParser } from "../";
const opts = { resourcePath: "/items/item" };
const parser = new XmlParser(opts);
parser.on("data", (data) => {
// consume the data object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you
// can wait for readable event and consume data by calling parser.read()
});

1751
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"version": "0.2.1",
"name": "xml-streamer",
"name": "@drorgl/xml-streamer",
"description": "XML stream parser for parsing large files efficiently with less usage of memory.",
"author": {
"name": "Sai Teja",
@@ -21,7 +21,7 @@
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/Sai1919/xml-streamer"
"url": "https://github.com/drorgl/xml-streamer"
},
"dependencies": {
"lodash": "4.17.11"
@@ -35,7 +35,11 @@
"@types/should": "^13.0.0",
"ts-node": "^8.2.0",
"tslint": "^5.17.0",
"typescript": "^3.5.1"
"typescript": "^3.5.1",
"rimraf": "^2.6.3",
"source-map-support": "^0.5.12",
"typedoc": "^0.14.2",
"nyc": "^14.1.1"
},
"optionalDependencies": {},
"main": "dist/parser.js",
@@ -49,7 +53,10 @@
"dryrun": "tsc -noEmit",
"build": "tsc",
"prepublish": "npm run lint && npm run dryrun && npm run test",
"install": "npm run build"
"install": "npm run build",
"coverage": "rimraf ./.nyc_output && rimraf ./coverage && nyc mocha -r ts-node/register -r source-map-support/register --ui bdd test/**/*.spec.{ts,tsx}\"",
"doc": "rimraf ./docs && typedoc",
"publish-now": "npm publish --access public"
},
"contributors": [
{

View File

@@ -1,3 +1,4 @@
// Source: https://github.com/xmppjs/ltx/blob/master/lib/parsers/ltx.js
import events from "events";
import { unescapeXML } from "./unescape";

View File

@@ -15,11 +15,49 @@ const defaults = {
};
export interface IXmlParserOptions {
/**
* Optional field. Used to extract the XML nodes that you are interested in.
*
* @type {string}
* @memberof IXmlParserOptions
*/
resourcePath?: string;
/**
* Optional field. Set this to true if you want to listen on node names instead of data event. default: false
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
emitOnNodeName?: boolean;
/**
* Optional field. pass the value with which you want to reference attributes of a node in its object form. default: '$'
*
* @type {string}
* @memberof IXmlParserOptions
*/
attrsKey?: string;
/**
* Optional field. pass the value with which you want to reference node value in its object form. default: '_'
*
* @type {string}
* @memberof IXmlParserOptions
*/
textKey?: string;
/**
* Optional field. Default value is true. All children nodes will come in an array when this option is true.
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
explicitArray?: boolean;
/**
* Optional field. Default value is false. When set, text attribute will include all blanks found in xml.
* When unset, blanks are removed as long as they come in one expat single block (blank lines, newlines and entities).
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
verbatimText?: boolean;
preserveWhitespace?: boolean;
}
@@ -37,15 +75,9 @@ export class XmlParser extends stream.Transform {
this._readableState.objectMode = true;
}
public checkForInterestedNodeListeners() {
const ignore = ["end", "prefinish", "data", "error"];
const eventNames = Object.keys((this as any)._events);
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < eventNames.length; i++) {
if (_.includes(ignore, eventNames[i], 0)) { continue; }
this.parserState.interestedNodes.push(eventNames[i]);
}
public _flush(callback: () => void) {
this.processChunk("");
callback();
}
public _transform(chunk: Buffer | string, encoding: string, callback: () => void) {
@@ -55,18 +87,6 @@ export class XmlParser extends stream.Transform {
callback();
}
public processChunk(chunk: string | Buffer) {
const parser = this.parser;
const state = this.parserState;
if (state.isRootNode) {
this.checkForInterestedNodeListeners();
registerEvents.call(this);
}
parser.write(chunk);
}
public parse(chunk: Buffer | string, cb: (error: Error, data?: Buffer) => void) {
const parser = this.parser;
const state = this.parserState;
@@ -98,9 +118,27 @@ export class XmlParser extends stream.Transform {
return cb(null, result as any);
}
public _flush(callback: () => void) {
this.processChunk("");
callback();
private processChunk(chunk: string | Buffer) {
const parser = this.parser;
const state = this.parserState;
if (state.isRootNode) {
this.checkForInterestedNodeListeners();
registerEvents.call(this);
}
parser.write(chunk);
}
private checkForInterestedNodeListeners() {
const ignore = ["end", "prefinish", "data", "error"];
const eventNames = Object.keys((this as any)._events);
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < eventNames.length; i++) {
if (_.includes(ignore, eventNames[i], 0)) { continue; }
this.parserState.interestedNodes.push(eventNames[i]);
}
}
}

View File

@@ -43,9 +43,9 @@ function unescapeXMLReplace(match: string) {
throw new Error("Illegal XML entity " + match);
}
exports.escapeXML = function escapeXML(s: string) {
export function escapeXML(s: string) {
return s.replace(/&|<|>|"|'/g, escapeXMLReplace);
};
}
export function unescapeXML(s: string) {
let result = "";
@@ -74,10 +74,10 @@ export function unescapeXML(s: string) {
return result;
}
exports.escapeXMLText = function escapeXMLText(s: string) {
export function escapeXMLText(s: string) {
return s.replace(/&|<|>/g, escapeXMLReplace);
};
}
exports.unescapeXMLText = function unescapeXMLText(s: string) {
export function unescapeXMLText(s: string) {
return s.replace(/&(amp|#38|lt|#60|gt|#62);/g, unescapeXMLReplace);
};
}

View File

@@ -6,7 +6,7 @@
"outDir": "dist",
"moduleResolution": "node",
"module": "commonjs",
"removeComments": true,
"removeComments": false,
"sourceMap": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports":true,
@@ -27,5 +27,20 @@
"exclude": [
"node_modules",
"dist"
]
],
"typedocOptions":{
"exclude": ["**/*spec.ts"],
"excludeExternals": true,
"excludeNotExported": true,
"excludePrivate": true,
"hideGenerator": true,
"includes": "./src",
"out": "docs",
"module": "commonjs",
"stripInternal": true,
"mode": "modules",
"theme": "default",
"moduleResolution": "node",
"preserveConstEnums": true
}
}