From 55fde47b1dcd8d21ef2858f8b5d610b5b2170697 Mon Sep 17 00:00:00 2001 From: Jungku Lee Date: Sat, 23 Sep 2023 21:20:39 +0900 Subject: [PATCH] lib: update encoding sets in `WHATWG API` PR-URL: https://github.com/nodejs/node/pull/49610 Refs: https://encoding.spec.whatwg.org/#names-and-labels Reviewed-By: Daeyeon Jeong Reviewed-By: Minwoo Jung --- lib/internal/encoding.js | 17 ++++++++++++++ .../test-whatwg-encoding-custom-internals.js | 22 +++++++++++++++++-- ...ng-custom-textdecoder-api-invalid-label.js | 9 ++++++++ ...test-whatwg-encoding-custom-textdecoder.js | 10 +++++---- 4 files changed, 52 insertions(+), 6 deletions(-) diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 996b2506a49d3b..a9bfb665c2f1e8 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -76,8 +76,11 @@ const empty = new Uint8Array(0); const encodings = new SafeMap([ ['unicode-1-1-utf-8', 'utf-8'], + ['unicode11utf8', 'utf-8'], + ['unicode20utf8', 'utf-8'], ['utf8', 'utf-8'], ['utf-8', 'utf-8'], + ['x-unicode20utf8', 'utf-8'], ['866', 'ibm866'], ['cp866', 'ibm866'], ['csibm866', 'ibm866'], @@ -176,6 +179,7 @@ const encodings = new SafeMap([ ['iso885915', 'iso-8859-15'], ['iso_8859-15', 'iso-8859-15'], ['l9', 'iso-8859-15'], + ['iso-8859-16', 'iso-8859-16'], ['cskoi8r', 'koi8-r'], ['koi', 'koi8-r'], ['koi8', 'koi8-r'], @@ -283,9 +287,22 @@ const encodings = new SafeMap([ ['ksc5601', 'euc-kr'], ['ksc_5601', 'euc-kr'], ['windows-949', 'euc-kr'], + ['csiso2022kr', 'replacement'], + ['hz-gb-2312', 'replacement'], + ['iso-2022-cn', 'replacement'], + ['iso-2022-cn-ext', 'replacement'], + ['iso-2022-kr', 'replacement'], + ['replacement', 'replacement'], + ['unicodefffe', 'utf-16be'], ['utf-16be', 'utf-16be'], + ['csunicode', 'utf-16le'], + ['iso-10646-ucs-2', 'utf-16le'], + ['ucs-2', 'utf-16le'], + ['unicode', 'utf-16le'], + ['unicodefeff', 'utf-16le'], ['utf-16le', 'utf-16le'], ['utf-16', 'utf-16le'], + ['x-user-defined', 'x-user-defined'], ]); // Unfortunately, String.prototype.trim also removes non-ascii whitespace, diff --git a/test/parallel/test-whatwg-encoding-custom-internals.js b/test/parallel/test-whatwg-encoding-custom-internals.js index c810b43b1ae447..505e623a681998 100644 --- a/test/parallel/test-whatwg-encoding-custom-internals.js +++ b/test/parallel/test-whatwg-encoding-custom-internals.js @@ -13,10 +13,20 @@ const { getEncodingFromLabel } = require('internal/encoding'); const mappings = { 'utf-8': [ 'unicode-1-1-utf-8', + 'unicode11utf8', + 'unicode20utf8', 'utf8', + 'x-unicode20utf8', + ], + 'utf-16be': [ + 'unicodefffe', ], - 'utf-16be': [], 'utf-16le': [ + 'csunicode', + 'iso-10646-ucs-2', + 'ucs-2', + 'unicode', + 'unicodefeff', 'utf-16', ], 'ibm866': [ @@ -258,7 +268,15 @@ const { getEncodingFromLabel } = require('internal/encoding'); 'ksc5601', 'ksc_5601', 'windows-949', - ] + ], + 'replacement': [ + 'csiso2022kr', + 'hz-gb-2312', + 'iso-2022-cn', + 'iso-2022-cn-ext', + 'iso-2022-kr', + ], + 'x-user-defined': [] }; Object.entries(mappings).forEach((i) => { const enc = i[0]; diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder-api-invalid-label.js b/test/parallel/test-whatwg-encoding-custom-textdecoder-api-invalid-label.js index a701d79a285a16..569c17baff4b6e 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder-api-invalid-label.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder-api-invalid-label.js @@ -8,8 +8,17 @@ const assert = require('assert'); [ 'utf-8', 'unicode-1-1-utf-8', + 'unicode11utf8', + 'unicode20utf8', + 'x-unicode20utf8', 'utf8', + 'unicodefffe', 'utf-16be', + 'csunicode', + 'iso-10646-ucs-2', + 'ucs-2', + 'unicode', + 'unicodefeff', 'utf-16le', 'utf-16', ].forEach((i) => { diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js index e4379aa03f870d..835c4d962d2ada 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js @@ -13,19 +13,21 @@ const util = require('util'); const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65, 0x73, 0x74, 0xe2, 0x82, 0xac]); +const encoding_sets = ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', + 'utf8', 'utf-8', 'x-unicode20utf8']; // Make Sure TextDecoder exist assert(TextDecoder); // Test TextDecoder, UTF-8, fatal: false, ignoreBOM: false { - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + encoding_sets.forEach((i) => { const dec = new TextDecoder(i); assert.strictEqual(dec.encoding, 'utf-8'); const res = dec.decode(buf); assert.strictEqual(res, 'test€'); }); - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + encoding_sets.forEach((i) => { const dec = new TextDecoder(i); let res = ''; res += dec.decode(buf.slice(0, 8), { stream: true }); @@ -36,13 +38,13 @@ assert(TextDecoder); // Test TextDecoder, UTF-8, fatal: false, ignoreBOM: true { - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + encoding_sets.forEach((i) => { const dec = new TextDecoder(i, { ignoreBOM: true }); const res = dec.decode(buf); assert.strictEqual(res, '\ufefftest€'); }); - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + encoding_sets.forEach((i) => { const dec = new TextDecoder(i, { ignoreBOM: true }); let res = ''; res += dec.decode(buf.slice(0, 8), { stream: true });