From ac02cdb0adcfd864648f0fba6ec7c804b07b7976 Mon Sep 17 00:00:00 2001 From: Aras Abbasi Date: Sun, 8 Oct 2023 17:43:16 +0200 Subject: [PATCH] util: lazy parse mime parameters PR-URL: https://github.com/nodejs/node/pull/49889 Reviewed-By: Antoine du Hamel Reviewed-By: Yagiz Nizipli --- benchmark/mime/mimetype-instantiation.js | 53 +++++++++++++++++++++ benchmark/mime/mimetype-to-string.js | 55 ++++++++++++++++++++++ benchmark/mime/parse-type-and-subtype.js | 53 +++++++++++++++++++++ benchmark/mime/to-ascii-lower.js | 54 +++++++++++++++++++++ lib/internal/mime.js | 60 +++++++++++++++++------- test/benchmark/test-benchmark-mime.js | 7 +++ test/parallel/test-mime-api.js | 7 +++ 7 files changed, 271 insertions(+), 18 deletions(-) create mode 100644 benchmark/mime/mimetype-instantiation.js create mode 100644 benchmark/mime/mimetype-to-string.js create mode 100644 benchmark/mime/parse-type-and-subtype.js create mode 100644 benchmark/mime/to-ascii-lower.js create mode 100644 test/benchmark/test-benchmark-mime.js diff --git a/benchmark/mime/mimetype-instantiation.js b/benchmark/mime/mimetype-instantiation.js new file mode 100644 index 00000000000000..ec0f68e819a753 --- /dev/null +++ b/benchmark/mime/mimetype-instantiation.js @@ -0,0 +1,53 @@ +'use strict'; + +const common = require('../common'); +const assert = require('assert'); +const { MIMEType } = require('util'); + +const bench = common.createBenchmark(main, { + n: [1e5], + value: [ + 'application/ecmascript; ', + 'text/html;charset=gbk', + `text/html;${'0123456789'.repeat(12)}=x;charset=gbk`, + 'text/html;test=\u00FF;charset=gbk', + 'x/x;\n\r\t x=x\n\r\t ;x=y', + ], +}, { +}); + +function main({ n, value }) { + // Warm up. + const length = 1024; + const array = []; + let errCase = false; + + for (let i = 0; i < length; ++i) { + try { + array.push(new MIMEType(value)); + } catch (e) { + errCase = true; + array.push(e); + } + } + + // console.log(`errCase: ${errCase}`); + bench.start(); + + for (let i = 0; i < n; ++i) { + const index = i % length; + try { + array[index] = new MIMEType(value); + } catch (e) { + array[index] = e; + } + } + + bench.end(n); + + // Verify the entries to prevent dead code elimination from making + // the benchmark invalid. + for (let i = 0; i < length; ++i) { + assert.strictEqual(typeof array[i], errCase ? 'object' : 'object'); + } +} diff --git a/benchmark/mime/mimetype-to-string.js b/benchmark/mime/mimetype-to-string.js new file mode 100644 index 00000000000000..e5cb8d6fae2e60 --- /dev/null +++ b/benchmark/mime/mimetype-to-string.js @@ -0,0 +1,55 @@ +'use strict'; + +const common = require('../common'); +const assert = require('assert'); +const { MIMEType } = require('util'); + +const bench = common.createBenchmark(main, { + n: [1e5], + value: [ + 'application/ecmascript; ', + 'text/html;charset=gbk', + `text/html;${'0123456789'.repeat(12)}=x;charset=gbk`, + 'text/html;test=\u00FF;charset=gbk', + 'x/x;\n\r\t x=x\n\r\t ;x=y', + ], +}, { +}); + +function main({ n, value }) { + // Warm up. + const length = 1024; + const array = []; + let errCase = false; + + const mime = new MIMEType(value); + + for (let i = 0; i < length; ++i) { + try { + array.push(mime.toString()); + } catch (e) { + errCase = true; + array.push(e); + } + } + + // console.log(`errCase: ${errCase}`); + bench.start(); + + for (let i = 0; i < n; ++i) { + const index = i % length; + try { + array[index] = mime.toString(); + } catch (e) { + array[index] = e; + } + } + + bench.end(n); + + // Verify the entries to prevent dead code elimination from making + // the benchmark invalid. + for (let i = 0; i < length; ++i) { + assert.strictEqual(typeof array[i], errCase ? 'object' : 'string'); + } +} diff --git a/benchmark/mime/parse-type-and-subtype.js b/benchmark/mime/parse-type-and-subtype.js new file mode 100644 index 00000000000000..4b91dc1db6ab24 --- /dev/null +++ b/benchmark/mime/parse-type-and-subtype.js @@ -0,0 +1,53 @@ +'use strict'; + +const common = require('../common'); +const assert = require('assert'); + +const bench = common.createBenchmark(main, { + n: [1e7], + value: [ + 'application/ecmascript; ', + 'text/html;charset=gbk', + // eslint-disable-next-line max-len + 'text/html;0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789=x;charset=gbk', + ], +}, { + flags: ['--expose-internals'], +}); + +function main({ n, value }) { + + const parseTypeAndSubtype = require('internal/mime').parseTypeAndSubtype; + // Warm up. + const length = 1024; + const array = []; + let errCase = false; + + for (let i = 0; i < length; ++i) { + try { + array.push(parseTypeAndSubtype(value)); + } catch (e) { + errCase = true; + array.push(e); + } + } + + // console.log(`errCase: ${errCase}`); + bench.start(); + for (let i = 0; i < n; ++i) { + const index = i % length; + try { + array[index] = parseTypeAndSubtype(value); + } catch (e) { + array[index] = e; + } + } + + bench.end(n); + + // Verify the entries to prevent dead code elimination from making + // the benchmark invalid. + for (let i = 0; i < length; ++i) { + assert.strictEqual(typeof array[i], errCase ? 'object' : 'object'); + } +} diff --git a/benchmark/mime/to-ascii-lower.js b/benchmark/mime/to-ascii-lower.js new file mode 100644 index 00000000000000..93a4a14dc6d96b --- /dev/null +++ b/benchmark/mime/to-ascii-lower.js @@ -0,0 +1,54 @@ +'use strict'; + +const common = require('../common'); +const assert = require('assert'); + +const bench = common.createBenchmark(main, { + n: [1e7], + value: [ + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'UPPERCASE', + 'lowercase', + 'mixedCase', + ], +}, { + flags: ['--expose-internals'], +}); + +function main({ n, value }) { + + const toASCIILower = require('internal/mime').toASCIILower; + // Warm up. + const length = 1024; + const array = []; + let errCase = false; + + for (let i = 0; i < length; ++i) { + try { + array.push(toASCIILower(value)); + } catch (e) { + errCase = true; + array.push(e); + } + } + + // console.log(`errCase: ${errCase}`); + bench.start(); + + for (let i = 0; i < n; ++i) { + const index = i % length; + try { + array[index] = toASCIILower(value); + } catch (e) { + array[index] = e; + } + } + + bench.end(n); + + // Verify the entries to prevent dead code elimination from making + // the benchmark invalid. + for (let i = 0; i < length; ++i) { + assert.strictEqual(typeof array[i], errCase ? 'object' : 'string'); + } +} diff --git a/lib/internal/mime.js b/lib/internal/mime.js index 4dbfcb3d9b0715..332ed8b5368c84 100644 --- a/lib/internal/mime.js +++ b/lib/internal/mime.js @@ -36,6 +36,7 @@ function toASCIILower(str) { const SOLIDUS = '/'; const SEMICOLON = ';'; + function parseTypeAndSubtype(str) { // Skip only HTTP whitespace from start let position = SafeStringPrototypeSearch(str, END_BEGINNING_WHITESPACE); @@ -72,12 +73,11 @@ function parseTypeAndSubtype(str) { throw new ERR_INVALID_MIME_SYNTAX('subtype', str, trimmedSubtype); } const subtype = toASCIILower(trimmedSubtype); - return { - __proto__: null, + return [ type, subtype, - parametersStringIndex: position, - }; + position, + ]; } const EQUALS_SEMICOLON_OR_END = /[;=]|$/; @@ -123,12 +123,29 @@ const encode = (value) => { class MIMEParams { #data = new SafeMap(); + // We set the flag the MIMEParams instance as processed on initialization + // to defer the parsing of a potentially large string. + #processed = true; + #string = null; + + /** + * Used to instantiate a MIMEParams object within the MIMEType class and + * to allow it to be parsed lazily. + */ + static instantiateMimeParams(str) { + const instance = new MIMEParams(); + instance.#string = str; + instance.#processed = false; + return instance; + } delete(name) { + this.#parse(); this.#data.delete(name); } get(name) { + this.#parse(); const data = this.#data; if (data.has(name)) { return data.get(name); @@ -137,10 +154,12 @@ class MIMEParams { } has(name) { + this.#parse(); return this.#data.has(name); } set(name, value) { + this.#parse(); const data = this.#data; name = `${name}`; value = `${value}`; @@ -166,18 +185,22 @@ class MIMEParams { } *entries() { + this.#parse(); yield* this.#data.entries(); } *keys() { + this.#parse(); yield* this.#data.keys(); } *values() { + this.#parse(); yield* this.#data.values(); } toString() { + this.#parse(); let ret = ''; for (const { 0: key, 1: value } of this.#data) { const encoded = encode(value); @@ -190,8 +213,11 @@ class MIMEParams { // Used to act as a friendly class to stringifying stuff // not meant to be exposed to users, could inject invalid values - static parseParametersString(str, position, params) { - const paramsMap = params.#data; + #parse() { + if (this.#processed) return; // already parsed + const paramsMap = this.#data; + let position = 0; + const str = this.#string; const endOfSource = SafeStringPrototypeSearch( StringPrototypeSlice(str, position), START_ENDING_WHITESPACE, @@ -270,13 +296,14 @@ class MIMEParams { NOT_HTTP_TOKEN_CODE_POINT) === -1 && SafeStringPrototypeSearch(parameterValue, NOT_HTTP_QUOTED_STRING_CODE_POINT) === -1 && - params.has(parameterString) === false + paramsMap.has(parameterString) === false ) { paramsMap.set(parameterString, parameterValue); } position++; } - return paramsMap; + this.#data = paramsMap; + this.#processed = true; } } const MIMEParamsStringify = MIMEParams.prototype.toString; @@ -293,8 +320,8 @@ ObjectDefineProperty(MIMEParams.prototype, 'toJSON', { writable: true, }); -const { parseParametersString } = MIMEParams; -delete MIMEParams.parseParametersString; +const { instantiateMimeParams } = MIMEParams; +delete MIMEParams.instantiateMimeParams; class MIMEType { #type; @@ -303,14 +330,9 @@ class MIMEType { constructor(string) { string = `${string}`; const data = parseTypeAndSubtype(string); - this.#type = data.type; - this.#subtype = data.subtype; - this.#parameters = new MIMEParams(); - parseParametersString( - string, - data.parametersStringIndex, - this.#parameters, - ); + this.#type = data[0]; + this.#subtype = data[1]; + this.#parameters = instantiateMimeParams(StringPrototypeSlice(string, data[2])); } get type() { @@ -362,6 +384,8 @@ ObjectDefineProperty(MIMEType.prototype, 'toJSON', { }); module.exports = { + toASCIILower, + parseTypeAndSubtype, MIMEParams, MIMEType, }; diff --git a/test/benchmark/test-benchmark-mime.js b/test/benchmark/test-benchmark-mime.js new file mode 100644 index 00000000000000..275184b9b90045 --- /dev/null +++ b/test/benchmark/test-benchmark-mime.js @@ -0,0 +1,7 @@ +'use strict'; + +require('../common'); + +const runBenchmark = require('../common/benchmark'); + +runBenchmark('mime', { NODEJS_BENCHMARK_ZERO_ALLOWED: 1 }); diff --git a/test/parallel/test-mime-api.js b/test/parallel/test-mime-api.js index 0510ddaed20267..0fabd1a4cce35f 100644 --- a/test/parallel/test-mime-api.js +++ b/test/parallel/test-mime-api.js @@ -1,8 +1,10 @@ +// Flags: --expose-internals 'use strict'; require('../common'); const assert = require('assert'); const { MIMEType, MIMEParams } = require('util'); +const { toASCIILower } = require('internal/mime'); const WHITESPACES = '\t\n\f\r '; @@ -158,3 +160,8 @@ assert.throws(() => params.set(`x${NOT_HTTP_TOKEN_CODE_POINT}`, 'x'), /parameter assert.throws(() => params.set('x', `${NOT_HTTP_QUOTED_STRING_CODE_POINT};`), /parameter value/i); assert.throws(() => params.set('x', `${NOT_HTTP_QUOTED_STRING_CODE_POINT}x`), /parameter value/i); assert.throws(() => params.set('x', `x${NOT_HTTP_QUOTED_STRING_CODE_POINT}`), /parameter value/i); + +assert.strictEqual(toASCIILower('someThing'), 'something'); +assert.strictEqual(toASCIILower('SomeThing'), 'something'); +assert.strictEqual(toASCIILower('SomeThing3'), 'something3'); +assert.strictEqual(toASCIILower('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), 'abcdefghijklmnopqrstuvwxyz');