Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Typescript conversion of parse5 package #359

Closed
wants to merge 15 commits into from
Closed
20 changes: 0 additions & 20 deletions .eslintrc.js

This file was deleted.

21 changes: 21 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"env": {
"es2020": true,
"node": true,
"mocha": true
},
"extends": ["eslint:recommended", "prettier"],
"plugins": ["prettier"],
"rules": {
"prettier/prettier": "error",
"no-console": "error",
"curly": ["error", "all"],
"prefer-arrow-callback": "error",
"one-var": ["error", "never"],
"no-var": "error",
"prefer-const": "error"
},
"parserOptions": {
"sourceType": "module"
}
}
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ docs/05_api_reference.md
package-lock.json
bench/package-lock.json
.DS_Store
.nyc_output
packages/*/coverage
packages/parse5/lib
*.swp
3 changes: 3 additions & 0 deletions .mocharc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"ui": "tdd"
}
9 changes: 0 additions & 9 deletions bench/.eslintrc.js

This file was deleted.

6 changes: 6 additions & 0 deletions bench/.eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"extends": ["../.eslintrc.json"],
"rules": {
"no-console": "off"
}
}
8 changes: 3 additions & 5 deletions bench/memory/named-entity-data.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
'use strict';

const format = require('human-format');
import format from 'human-format';

main();

function main() {
async function main() {
const before = process.memoryUsage().rss;

require('../../packages/parse5/lib/tokenizer/named-entity-data');
await import('../../packages/parse5/lib/tokenizer/named-entity-data.js');

const after = process.memoryUsage().rss;

Expand Down
20 changes: 9 additions & 11 deletions bench/memory/sax-parser.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
'use strict';

const fs = require('fs');
const format = require('human-format');
const promisifyEvent = require('promisify-event');
const memwatch = require('node-memwatch');
const SAXParser = require('../../packages/parse5-sax-parser/lib');
import * as fs from 'fs';
import format from 'human-format';
import promisifyEvent from 'promisify-event';
import memwatch from '@airbnb/node-memwatch';
import SAXParser from '../../packages/parse5-sax-parser/lib/index.js';

main();

Expand All @@ -16,21 +14,21 @@ async function main() {
const heapDiffMeasurement = new memwatch.HeapDiff();
let heapDiff = null;

memwatch.on('stats', stats => {
memwatch.on('stats', (stats) => {
maxMemUsage = Math.max(maxMemUsage, stats['current_base']);
});

startDate = new Date();

const parserPromise = parse().then(dataSize => {
const parserPromise = parse().then((dataSize) => {
parsedDataSize = dataSize;
endDate = new Date();
heapDiff = heapDiffMeasurement.end();
});

await Promise.all([
parserPromise,
promisifyEvent(memwatch, 'stats') // NOTE: we need at least one `stats` result
promisifyEvent(memwatch, 'stats'), // NOTE: we need at least one `stats` result
]);

printResults(parsedDataSize, startDate, endDate, heapDiff, maxMemUsage);
Expand All @@ -57,7 +55,7 @@ function getDuration(startDate, endDate) {
const scale = new format.Scale({
seconds: 1,
minutes: 60,
hours: 3600
hours: 3600,
});

return format((endDate - startDate) / 1000, { scale: scale });
Expand Down
3 changes: 2 additions & 1 deletion bench/package.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
{
"name": "parse5-benchmarks",
"type": "module",
"version": "1.0.0",
"description": "parse5 regression benchmarks",
"author": "Ivan Nikulin <ifaaan@gmail.com>",
"license": "MIT",
"dependencies": {
"benchmark": "^2.1.4",
"human-format": "^0.7.0",
"node-memwatch": "^1.0.1",
"@airbnb/node-memwatch": "^2.0.0",
"parse5": "*",
"promisify-event": "^1.0.0"
}
Expand Down
75 changes: 38 additions & 37 deletions bench/perf/index.js
Original file line number Diff line number Diff line change
@@ -1,37 +1,38 @@
'use strict';

const { join } = require('path');
const { readFileSync, createReadStream, readdirSync } = require('fs');
const Benchmark = require('benchmark');
const { loadTreeConstructionTestData } = require('../../test/utils/generate-parsing-tests');
const loadSAXParserTestData = require('../../test/utils/load-sax-parser-test-data');
const { treeAdapters, WritableStreamStub } = require('../../test/utils/common');
import { readFileSync, createReadStream, readdirSync } from 'fs';
import Benchmark from 'benchmark';
import { loadTreeConstructionTestData } from '../../test/utils/generate-parsing-tests.js';
import { loadSAXParserTestData } from '../../test/utils/load-sax-parser-test-data.js';
import { treeAdapters, WritableStreamStub } from '../../test/utils/common.js';
import * as parse5 from '../../packages/parse5/lib/index.js';
import { ParserStream as parse5Stream } from '../../packages/parse5-parser-stream/lib/index.js';
import * as parse5Upstream from 'parse5';

const hugePagePath = new URL('../../test/data/huge-page/huge-page.html', import.meta.url);
const treeConstructionPath = new URL('../../test/data/html5lib-tests/tree-construction', import.meta.url);
const saxPath = new URL('../../test/data/sax/', import.meta.url);

//HACK: https://github.com/bestiejs/benchmark.js/issues/51
/* global workingCopy, WorkingCopyParserStream, upstreamParser, hugePage, microTests, runMicro, runPages, files */
global.workingCopy = require('../../packages/parse5/lib');
global.WorkingCopyParserStream = require('../../packages/parse5-parser-stream/lib');
global.upstreamParser = require('parse5');
global.workingCopy = parse5;
global.WorkingCopyParserStream = parse5Stream;
global.upstreamParser = parse5Upstream;

// Huge page data
global.hugePage = readFileSync(join(__dirname, '../../test/data/huge-page/huge-page.html')).toString();
global.hugePage = readFileSync(hugePagePath).toString();

// Micro data
global.microTests = loadTreeConstructionTestData(
[join(__dirname, '../../test/data/html5lib-tests/tree-construction')],
treeAdapters.default
)
global.microTests = loadTreeConstructionTestData([treeConstructionPath], treeAdapters.default)
.filter(
test =>
(test) =>
//NOTE: this test caused stack overflow in parse5 v1.x
test.input !== '<button><p><button>'
)
.map(test => ({
.map((test) => ({
html: test.input,
fragmentContext: test.fragmentContext
fragmentContext: test.fragmentContext,
}));

global.runMicro = function(parser) {
global.runMicro = function (parser) {
for (const test of microTests) {
if (test.fragmentContext) {
parser.parseFragment(test.fragmentContext, test.html);
Expand All @@ -42,22 +43,22 @@ global.runMicro = function(parser) {
};

// Pages data
const pages = loadSAXParserTestData().map(test => test.src);
const pages = loadSAXParserTestData().map((test) => test.src);

global.runPages = function(parser) {
global.runPages = function (parser) {
for (let j = 0; j < pages.length; j++) {
parser.parse(pages[j]);
}
};

// Stream data
global.files = readdirSync(join(__dirname, '../../test/data/sax')).map(dirName =>
join(__dirname, '../../test/data/sax', dirName, 'src.html')
);
global.files = readdirSync(saxPath).map((dirName) => {
return new URL(`./${dirName}/src.html`, saxPath).pathname;
});

// Utils
function getHz(suite, testName) {
return suite.filter(t => t.name === testName)[0].hz;
return suite.filter((t) => t.name === testName)[0].hz;
}

function runBench({ name, workingCopyFn, upstreamFn, defer = false }) {
Expand All @@ -67,7 +68,7 @@ function runBench({ name, workingCopyFn, upstreamFn, defer = false }) {
.add('Working copy', workingCopyFn, { defer })
.add('Upstream', upstreamFn, { defer })
.on('start', () => console.log(name))
.on('cycle', event => console.log(String(event.target)))
.on('cycle', (event) => console.log(String(event.target)))
.on('complete', () => {
const workingCopyHz = getHz(suite, 'Working copy');
const upstreamHz = getHz(suite, 'Upstream');
Expand All @@ -85,28 +86,28 @@ function runBench({ name, workingCopyFn, upstreamFn, defer = false }) {
runBench({
name: 'parse5 regression benchmark - MICRO',
workingCopyFn: () => runMicro(workingCopy),
upstreamFn: () => runMicro(upstreamParser)
upstreamFn: () => runMicro(upstreamParser),
});

runBench({
name: 'parse5 regression benchmark - HUGE',
workingCopyFn: () => workingCopy.parse(hugePage),
upstreamFn: () => upstreamParser.parse(hugePage)
upstreamFn: () => upstreamParser.parse(hugePage),
});

runBench({
name: 'parse5 regression benchmark - PAGES',
workingCopyFn: () => runPages(workingCopy),
upstreamFn: () => runPages(upstreamParser)
upstreamFn: () => runPages(upstreamParser),
});

runBench({
name: 'parse5 regression benchmark - STREAM',
defer: true,
workingCopyFn: async deferred => {
workingCopyFn: async (deferred) => {
const parsePromises = files.map(
fileName =>
new Promise(resolve => {
(fileName) =>
new Promise((resolve) => {
const stream = createReadStream(fileName, 'utf8');
const parserStream = new WorkingCopyParserStream();

Expand All @@ -118,10 +119,10 @@ runBench({
await Promise.all(parsePromises);
deferred.resolve();
},
upstreamFn: async deferred => {
upstreamFn: async (deferred) => {
const parsePromises = files.map(
fileName =>
new Promise(resolve => {
(fileName) =>
new Promise((resolve) => {
const stream = createReadStream(fileName, 'utf8');
const writable = new WritableStreamStub();

Expand All @@ -136,5 +137,5 @@ runBench({

await Promise.all(parsePromises);
deferred.resolve();
}
},
});
Loading