From 7eb4cc4903964c390d57039eda15511b33515f39 Mon Sep 17 00:00:00 2001 From: 5saviahv <49443574+5saviahv@users.noreply.github.com> Date: Sun, 7 Feb 2021 20:10:01 +0200 Subject: [PATCH] feat(parse): Expose parse5 option scriptingEnabled (#1707) * expose parse5 option scriptingEnabled * parse5 options test * add option scriptingEnabled into types * typo in comment Co-authored-by: 5saviahv <5saviahv@users.noreply.github.com> --- lib/parsers/parse5.js | 4 +++ test/__fixtures__/fixtures.js | 10 ++++++ test/cheerio.js | 64 +++++++++++++++++++++++++++++++++++ types/index.d.ts | 3 ++ types/index.test-d.ts | 8 +++++ 5 files changed, 89 insertions(+) diff --git a/lib/parsers/parse5.js b/lib/parsers/parse5.js index 870103c3f5..e20bc84419 100644 --- a/lib/parsers/parse5.js +++ b/lib/parsers/parse5.js @@ -4,6 +4,10 @@ var htmlparser2Adapter = require('parse5-htmlparser2-tree-adapter'); exports.parse = function (content, options, isDocument) { var opts = { + scriptingEnabled: + typeof options.scriptingEnabled === 'boolean' + ? options.scriptingEnabled + : true, treeAdapter: htmlparser2Adapter, sourceCodeLocationInfo: options.sourceCodeLocationInfo, }; diff --git a/test/__fixtures__/fixtures.js b/test/__fixtures__/fixtures.js index 79f85ca3f0..e232515c1c 100644 --- a/test/__fixtures__/fixtures.js +++ b/test/__fixtures__/fixtures.js @@ -84,3 +84,13 @@ exports.forms = [ '
', '
', ].join(''); + +exports.noscript = [ + '', + '', + '

Rocks!

', + '', +].join(''); diff --git a/test/cheerio.js b/test/cheerio.js index 27bb9b7769..f64dfe2381 100644 --- a/test/cheerio.js +++ b/test/cheerio.js @@ -415,4 +415,68 @@ describe('cheerio', function () { expect(utils.isHtml('<123>')).toBe(false); }); }); + + describe('parse5 options', function () { + var noscript = fixtures.noscript; + + // should parse noscript tags only with false option value + test('{scriptingEnabled: ???}', function () { + var opt = 'scriptingEnabled'; + var options = {}; + var result; + + // [default] scriptingEnabled: true - tag contains one text element + result = cheerio.load(noscript)('noscript'); + expect(result).toHaveLength(1); + expect(result[0].children).toHaveLength(1); + expect(result[0].children[0].type).toBe('text'); + + // scriptingEnabled: false - content of noscript will parsed + options[opt] = false; + result = cheerio.load(fixtures.noscript, options)('noscript'); + expect(result).toHaveLength(1); + expect(result[0].children).toHaveLength(2); + expect(result[0].children[0].type).toBe('comment'); + expect(result[0].children[1].type).toBe('tag'); + expect(result[0].children[1].name).toBe('a'); + + // scriptingEnabled: ??? - should acts as true + var values = [undefined, null, 0, '']; + for (var val of values) { + options[opt] = val; + result = cheerio.load(noscript, options)('noscript'); + expect(result).toHaveLength(1); + expect(result[0].children).toHaveLength(1); + expect(result[0].children[0].type).toBe('text'); + } + }); + + // should contain location data only with truthful option value + test('{sourceCodeLocationInfo: ???}', function () { + var prop = 'sourceCodeLocation'; + var opt = 'sourceCodeLocationInfo'; + var options = {}; + var result; + var i; + + // Location data should not be present + var values = [undefined, null, 0, false, '']; + for (i = 0; i < values.length; i++) { + options[opt] = values[i]; + result = cheerio.load(noscript, options)('noscript'); + expect(result).toHaveLength(1); + expect(result[0]).not.toHaveProperty(prop); + } + + // Location data should be present + values = [true, 1, 'test']; + for (i = 0; i < values.length; i++) { + options[opt] = values[i]; + result = cheerio.load(noscript, options)('noscript'); + expect(result).toHaveLength(1); + expect(result[0]).toHaveProperty(prop); + expect(typeof result[0][prop]).toBe('object'); + } + }); + }); }); diff --git a/types/index.d.ts b/types/index.d.ts index 3d7cd34ce7..536efa0310 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -228,6 +228,9 @@ declare namespace cheerio { /** Enable location support for parse5 */ sourceCodeLocationInfo?: boolean; + + /** Disable scripting in parse5, so noscript tags would be parsed */ + scriptingEnabled?: boolean; } interface Selector { diff --git a/types/index.test-d.ts b/types/index.test-d.ts index c86fcbfe0c..196b945c2c 100644 --- a/types/index.test-d.ts +++ b/types/index.test-d.ts @@ -34,6 +34,14 @@ $ = cheerio.load(html, { xmlMode: true, }); +$ = cheerio.load(html, { + scriptingEnabled: false, +}); + +$ = cheerio.load(html, { + sourceCodeLocationInfo: true, +}); + $ = cheerio.load(html, { normalizeWhitespace: true, withStartIndices: true,