cleanup documentation, examples and coverage

This commit is contained in:
Dror Gluska
2019-07-05 00:23:29 +03:00
parent c3a177244b
commit fa375b2d5a
16 changed files with 2040 additions and 265 deletions

2
.gitignore vendored
View File

@@ -36,3 +36,5 @@ jspm_packages
# Optional REPL history # Optional REPL history
.node_repl_history .node_repl_history
/dist /dist
/docs
drorgl-xml-streamer-*.tgz

View File

@@ -1,3 +1,12 @@
# Dependency directories # Dependency directories
test test
.travis.yml .travis.yml
tsconfig.json
tslint.json
.nyc_output
.vscode
coverage
docs
src
drorgl-xml-streamer*
.nycrc

20
.nycrc Normal file
View File

@@ -0,0 +1,20 @@
{
"extension": [
".ts"
],
"require": [
"ts-node/register"
],
"include": [
"src/**/*.ts"
],
"exclude": [
"**/*.d.ts"
],
"reporter": [
"html",
"text-summary",
"text"
],
"all": true
}

View File

@@ -3,9 +3,13 @@
"Dror", "Dror",
"Gluska", "Gluska",
"Teja", "Teja",
"apos",
"drorgl", "drorgl",
"dryrun",
"gmail", "gmail",
"prefinish",
"saitejas", "saitejas",
"typedoc",
"xmltojs" "xmltojs"
] ]
} }

204
README.md
View File

@@ -1,13 +1,8 @@
# xml-streamer # xml-streamer
[![Build Status](https://travis-ci.org/Sai1919/xml-streamer.svg?branch=master)](https://travis-ci.org/Sai1919/xml-streamer)
## Motivation
You use [Node.js](https://nodejs.org) for speed? You process XML streams? Then you want the fastest XML to JS parser: `xml-streamer`, based on [node-expat](https://github.com/astro/node-expat) and It implements the Node.js `stream.Transform API`.
## IMPORTANT ## IMPORTANT
This is a modified version of xml-streamer, the parser + tests stayed mostly the same but the core xml parser was replaced with [SaxLtx xml parser](https://github.com/xmppjs/ltx) due to reliability issues with node-expat, both this library and ltx were converted to typescript. This is a modified version of xml-streamer, the parser + tests stayed mostly the same but the core xml parser was replaced with [SaxLtx xml parser](https://github.com/xmppjs/ltx) due to reliability/stability issues with node-expat, both this library and ltx were converted to typescript.
Please note that ltx parser is about 20% slower than node-expat. Please note that ltx parser is about 20% slower than node-expat.
## Install ## Install
@@ -20,115 +15,148 @@ npm install xml-streamer
`xml-streamer can be used in four ways` `xml-streamer can be used in four ways`
```javascript ```typescript
// 1. By passing the resourcePath and reading data by calling `read` method instead listening for data events. // 1. By passing the resourcePath and reading data by calling `read` method instead listening for data events.
(function () { import {XmlParser} from "@drorgl/xml-streamer";
"use strict";
var Parser = require('xml-streamer') const opts = { resourcePath: "/items/item" };
var opts = {resourcePath: '/items/item'} const parser = new XmlParser(opts);
var parser = new Parser(opts) parser.on("end", () => {
parser.on('end', function () {
// parsing ended no more data events will be raised // parsing ended no more data events will be raised
}) });
parser.on('error', function (error) { parser.on("error", (error) => {
// error occurred // error occurred
// NOTE: when error event emitted no end event will be emitted // NOTE: when error event emitted no end event will be emitted
console.error(error) console.error(error);
}) });
xmlStream.pipe(parser) // pipe your input xmlStream to parser. xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable // readable
parser.on('readable', function () { parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read() // if you don't want to consume "data" on "data" events you can wait
}) // for readable event and consume data by calling parser.read()
});
// after readable event occured you can call read method and get data. // after readable event occured you can call read method and get data.
parser.read() // will return one object at a time. parser.read(); // will return one object at a time.
}())
// 2. By listening for interested nodes. // 2. By listening for interested nodes.
(function () { import { XmlParser } from "@drorgl/xml-streamer";
"use strict";
var Parser = require('xml-streamer') const opts = {}; // see `Available Constructor Options` section below.
var opts = {} // see `Available Constructor Options` section below. const parser = new XmlParser(opts);
var parser = new Parser(opts) parser.on("item", (item) => {
parser.on('item', function (item) {
// consume the item object here // consume the item object here
}) });
parser.on('end', function () { parser.on("end", () => {
// parsing ended no more data events will be raised // parsing ended no more data events will be raised
}) });
parser.on('error', function (error) { parser.on("error", (error) => {
// error occurred // error occurred
// NOTE: when error event emitted no end event will be emitted // NOTE: when error event emitted no end event will be emitted
console.error(error) console.error(error);
}) });
xmlStream.pipe(parser) // pipe your input xmlStream to parser. xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable // readable
parser.on('readable', function () { parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read() // if you don't want to consume "data" on "data" events you can wait
}) //for readable event and consume data by calling parser.read()
}()) });
// 3. By passing a resource path. // 3. By passing a resource path.
(function () { import { XmlParser } from "@drorgl/xml-streamer";
"use strict";
var Parser = require('xml-streamer') const opts = { resourcePath: "/items/item" };
var opts = {resourcePath: '/items/item'}
var parser = new Parser(opts) const parser = new XmlParser(opts);
parser.on('data', function (data) { parser.on("data", (data) => {
// consume the data object here // consume the data object here
}) });
parser.on('end', function () { parser.on("end", () => {
// parsing ended no more data events will be raised // parsing ended no more data events will be raised
}) });
parser.on('error', function (error) { parser.on("error", (error) => {
// error occurred // error occurred
// NOTE: when error event emitted no end event will be emitted // NOTE: when error event emitted no end event will be emitted
console.error(error) console.error(error);
}) });
xmlStream.pipe(parser) // pipe your input xmlStream to parser. xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable // readable
parser.on('readable', function () { parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read() // if you don't want to consume "data" on "data" events you
}) // can wait for readable event and consume data by calling parser.read()
}()) });
// 4. By passing a string or buffer to parse function // 4. By passing a string or buffer to parse function
(function () { import { XmlParser } from "@drorgl/xml-streamer";
"use strict";
var Parser = require('xml-streamer') const opts = { resourcePath: "/items/item" }; // resourcePath is manditory when using parse method
var opts = {resourcePath: '/items/item'} // resourcePath is manditory when using parse method const parser = new XmlParser(opts);
var parser = new Parser(opts) parser.parse(stringOrBuffer, (err, data) => {
parser.parse(stringOrBuffer, function (err, data) {
// consume data here // consume data here
}) });
}())
// 5. Compressed Stream Parsing
import { XmlParser } from "@drorgl/xml-streamer";
import { StreamZip } from "node-stream-zip";
const zip = new StreamZip({
file: archiveName,
storeEntries: true
});
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});
zip.on("ready", () => {
zip.stream('path/inside/zip.xml', (err, stm) => {
stm.pipe(parser);
stm.on('end', () => zip.close());
});
});
``` ```
@@ -262,36 +290,30 @@ npm install xml-streamer
``` ```
## upcoming features
1. `handling of compressed streams`
2. `handling of different encodings`
3. `Filtering of objects extracted from resourcePath based on xpaths and json paths`
## Namespace handling ## Namespace handling
A word about special parsing of *xmlns:* Note that "resourcePath" in the options is not an XPATH. A word about special parsing of *xmlns:* Note that "resourcePath" in the options is not an XPATH.
So the value given to the resourcePath is treated as simple value and no expression evaluations are done. So the value given to the resourcePath is treated as simple value and no expression evaluations are done.
## Benchmark
`xml-streamer` internally uses `node-expat`
`npm run benchmark`
| module | ops/sec | native | XML compliant | stream |
|---------------------------------------------------------------------------------------|--------:|:------:|:-------------:|:--------------:|
| [sax-js](https://github.com/isaacs/sax-js) | 99,412 | ☐ | ☑ | ☑ |
| [node-xml](https://github.com/dylang/node-xml) | 130,631 | ☐ | ☑ | ☑ |
| [libxmljs](https://github.com/polotek/libxmljs) | 276,136 | ☑ | ☑ | ☐ |
| **node-expat** | 322,769 | ☑ | ☑ | ☑ |
Higher is better.
## Testing ## Testing
``` ```
npm install -g standard
npm test npm test
``` ```
## Coverage
```
npm coverage
=============================== Coverage summary ===============================
Statements : 90.91% ( 340/374 )
Branches : 81.66% ( 187/229 )
Functions : 78.13% ( 25/32 )
Lines : 90.86% ( 318/350 )
================================================================================
```
## Documentation
```
npm doc
```

View File

@@ -0,0 +1,41 @@
// Compressed Stream Parsing
import { XmlParser } from "../";
import { StreamZip } from "node-stream-zip";
const zip = new StreamZip({
file: archiveName,
storeEntries: true
});
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});
zip.on("ready", () => {
zip.stream('path/inside/zip.xml', (err, stm) => {
stm.pipe(parser);
stm.on('end', () => zip.close());
});
});

View File

@@ -0,0 +1,27 @@
// By listening for interested nodes.
import { XmlParser } from "../";
const opts = {}; // see `Available Constructor Options` section below.
const parser = new XmlParser(opts);
parser.on("item", (item) => {
// consume the item object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait for readable event and consume data by calling parser.read()
});

11
examples/parse.ts Normal file
View File

@@ -0,0 +1,11 @@
// By passing a string or buffer to parse function
import { XmlParser } from "../";
const opts = { resourcePath: "/items/item" }; // resourcePath is manditory when using parse method
const parser = new XmlParser(opts);
parser.parse(stringOrBuffer, (err, data) => {
// consume data here
});

View File

@@ -0,0 +1,27 @@
// By passing the resourcePath and reading data by calling
// `read` method instead listening for data events.
import {XmlParser} from "../";
const opts = { resourcePath: "/items/item" };
const parser = new XmlParser(opts);
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you can wait
// for readable event and consume data by calling parser.read()
});
// after readable event occured you can call read method and get data.
parser.read(); // will return one object at a time.

28
examples/resourcePath.ts Normal file
View File

@@ -0,0 +1,28 @@
// By passing a resource path.
import { XmlParser } from "../";
const opts = { resourcePath: "/items/item" };
const parser = new XmlParser(opts);
parser.on("data", (data) => {
// consume the data object here
});
parser.on("end", () => {
// parsing ended no more data events will be raised
});
parser.on("error", (error) => {
// error occurred
// NOTE: when error event emitted no end event will be emitted
console.error(error);
});
xmlStream.pipe(parser); // pipe your input xmlStream to parser.
// readable
parser.on("readable", () => {
// if you don't want to consume "data" on "data" events you
// can wait for readable event and consume data by calling parser.read()
});

1751
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{ {
"version": "0.2.1", "version": "0.2.1",
"name": "xml-streamer", "name": "@drorgl/xml-streamer",
"description": "XML stream parser for parsing large files efficiently with less usage of memory.", "description": "XML stream parser for parsing large files efficiently with less usage of memory.",
"author": { "author": {
"name": "Sai Teja", "name": "Sai Teja",
@@ -21,7 +21,7 @@
"license": "MIT", "license": "MIT",
"repository": { "repository": {
"type": "git", "type": "git",
"url": "https://github.com/Sai1919/xml-streamer" "url": "https://github.com/drorgl/xml-streamer"
}, },
"dependencies": { "dependencies": {
"lodash": "4.17.11" "lodash": "4.17.11"
@@ -35,7 +35,11 @@
"@types/should": "^13.0.0", "@types/should": "^13.0.0",
"ts-node": "^8.2.0", "ts-node": "^8.2.0",
"tslint": "^5.17.0", "tslint": "^5.17.0",
"typescript": "^3.5.1" "typescript": "^3.5.1",
"rimraf": "^2.6.3",
"source-map-support": "^0.5.12",
"typedoc": "^0.14.2",
"nyc": "^14.1.1"
}, },
"optionalDependencies": {}, "optionalDependencies": {},
"main": "dist/parser.js", "main": "dist/parser.js",
@@ -49,7 +53,10 @@
"dryrun": "tsc -noEmit", "dryrun": "tsc -noEmit",
"build": "tsc", "build": "tsc",
"prepublish": "npm run lint && npm run dryrun && npm run test", "prepublish": "npm run lint && npm run dryrun && npm run test",
"install": "npm run build" "install": "npm run build",
"coverage": "rimraf ./.nyc_output && rimraf ./coverage && nyc mocha -r ts-node/register -r source-map-support/register --ui bdd test/**/*.spec.{ts,tsx}\"",
"doc": "rimraf ./docs && typedoc",
"publish-now": "npm publish --access public"
}, },
"contributors": [ "contributors": [
{ {

View File

@@ -1,3 +1,4 @@
// Source: https://github.com/xmppjs/ltx/blob/master/lib/parsers/ltx.js
import events from "events"; import events from "events";
import { unescapeXML } from "./unescape"; import { unescapeXML } from "./unescape";

View File

@@ -15,11 +15,49 @@ const defaults = {
}; };
export interface IXmlParserOptions { export interface IXmlParserOptions {
/**
* Optional field. Used to extract the XML nodes that you are interested in.
*
* @type {string}
* @memberof IXmlParserOptions
*/
resourcePath?: string; resourcePath?: string;
/**
* Optional field. Set this to true if you want to listen on node names instead of data event. default: false
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
emitOnNodeName?: boolean; emitOnNodeName?: boolean;
/**
* Optional field. pass the value with which you want to reference attributes of a node in its object form. default: '$'
*
* @type {string}
* @memberof IXmlParserOptions
*/
attrsKey?: string; attrsKey?: string;
/**
* Optional field. pass the value with which you want to reference node value in its object form. default: '_'
*
* @type {string}
* @memberof IXmlParserOptions
*/
textKey?: string; textKey?: string;
/**
* Optional field. Default value is true. All children nodes will come in an array when this option is true.
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
explicitArray?: boolean; explicitArray?: boolean;
/**
* Optional field. Default value is false. When set, text attribute will include all blanks found in xml.
* When unset, blanks are removed as long as they come in one expat single block (blank lines, newlines and entities).
*
* @type {boolean}
* @memberof IXmlParserOptions
*/
verbatimText?: boolean; verbatimText?: boolean;
preserveWhitespace?: boolean; preserveWhitespace?: boolean;
} }
@@ -37,15 +75,9 @@ export class XmlParser extends stream.Transform {
this._readableState.objectMode = true; this._readableState.objectMode = true;
} }
public checkForInterestedNodeListeners() { public _flush(callback: () => void) {
const ignore = ["end", "prefinish", "data", "error"]; this.processChunk("");
const eventNames = Object.keys((this as any)._events); callback();
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < eventNames.length; i++) {
if (_.includes(ignore, eventNames[i], 0)) { continue; }
this.parserState.interestedNodes.push(eventNames[i]);
}
} }
public _transform(chunk: Buffer | string, encoding: string, callback: () => void) { public _transform(chunk: Buffer | string, encoding: string, callback: () => void) {
@@ -55,18 +87,6 @@ export class XmlParser extends stream.Transform {
callback(); callback();
} }
public processChunk(chunk: string | Buffer) {
const parser = this.parser;
const state = this.parserState;
if (state.isRootNode) {
this.checkForInterestedNodeListeners();
registerEvents.call(this);
}
parser.write(chunk);
}
public parse(chunk: Buffer | string, cb: (error: Error, data?: Buffer) => void) { public parse(chunk: Buffer | string, cb: (error: Error, data?: Buffer) => void) {
const parser = this.parser; const parser = this.parser;
const state = this.parserState; const state = this.parserState;
@@ -98,9 +118,27 @@ export class XmlParser extends stream.Transform {
return cb(null, result as any); return cb(null, result as any);
} }
public _flush(callback: () => void) { private processChunk(chunk: string | Buffer) {
this.processChunk(""); const parser = this.parser;
callback(); const state = this.parserState;
if (state.isRootNode) {
this.checkForInterestedNodeListeners();
registerEvents.call(this);
}
parser.write(chunk);
}
private checkForInterestedNodeListeners() {
const ignore = ["end", "prefinish", "data", "error"];
const eventNames = Object.keys((this as any)._events);
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < eventNames.length; i++) {
if (_.includes(ignore, eventNames[i], 0)) { continue; }
this.parserState.interestedNodes.push(eventNames[i]);
}
} }
} }

View File

@@ -43,9 +43,9 @@ function unescapeXMLReplace(match: string) {
throw new Error("Illegal XML entity " + match); throw new Error("Illegal XML entity " + match);
} }
exports.escapeXML = function escapeXML(s: string) { export function escapeXML(s: string) {
return s.replace(/&|<|>|"|'/g, escapeXMLReplace); return s.replace(/&|<|>|"|'/g, escapeXMLReplace);
}; }
export function unescapeXML(s: string) { export function unescapeXML(s: string) {
let result = ""; let result = "";
@@ -74,10 +74,10 @@ export function unescapeXML(s: string) {
return result; return result;
} }
exports.escapeXMLText = function escapeXMLText(s: string) { export function escapeXMLText(s: string) {
return s.replace(/&|<|>/g, escapeXMLReplace); return s.replace(/&|<|>/g, escapeXMLReplace);
}; }
exports.unescapeXMLText = function unescapeXMLText(s: string) { export function unescapeXMLText(s: string) {
return s.replace(/&(amp|#38|lt|#60|gt|#62);/g, unescapeXMLReplace); return s.replace(/&(amp|#38|lt|#60|gt|#62);/g, unescapeXMLReplace);
}; }

View File

@@ -6,7 +6,7 @@
"outDir": "dist", "outDir": "dist",
"moduleResolution": "node", "moduleResolution": "node",
"module": "commonjs", "module": "commonjs",
"removeComments": true, "removeComments": false,
"sourceMap": true, "sourceMap": true,
"esModuleInterop": true, "esModuleInterop": true,
"allowSyntheticDefaultImports":true, "allowSyntheticDefaultImports":true,
@@ -27,5 +27,20 @@
"exclude": [ "exclude": [
"node_modules", "node_modules",
"dist" "dist"
] ],
"typedocOptions":{
"exclude": ["**/*spec.ts"],
"excludeExternals": true,
"excludeNotExported": true,
"excludePrivate": true,
"hideGenerator": true,
"includes": "./src",
"out": "docs",
"module": "commonjs",
"stripInternal": true,
"mode": "modules",
"theme": "default",
"moduleResolution": "node",
"preserveConstEnums": true
}
} }