Skip to content

Commit

Permalink
encoding: update encoding sets in WHATWG API
Browse files Browse the repository at this point in the history
  • Loading branch information
pluris committed Sep 13, 2023
1 parent ccf46ba commit 6a577bc
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 6 deletions.
17 changes: 17 additions & 0 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,11 @@ const empty = new Uint8Array(0);

const encodings = new SafeMap([
['unicode-1-1-utf-8', 'utf-8'],
['unicode-11utf8', 'utf-8'],
['unicode-20utf8', 'utf-8'],
['utf8', 'utf-8'],
['utf-8', 'utf-8'],
['x-unicode20utf8', 'utf-8'],
['866', 'ibm866'],
['cp866', 'ibm866'],
['csibm866', 'ibm866'],
Expand Down Expand Up @@ -176,6 +179,7 @@ const encodings = new SafeMap([
['iso885915', 'iso-8859-15'],
['iso_8859-15', 'iso-8859-15'],
['l9', 'iso-8859-15'],
['iso-8859-16', 'iso-8859-16'],
['cskoi8r', 'koi8-r'],
['koi', 'koi8-r'],
['koi8', 'koi8-r'],
Expand Down Expand Up @@ -283,9 +287,22 @@ const encodings = new SafeMap([
['ksc5601', 'euc-kr'],
['ksc_5601', 'euc-kr'],
['windows-949', 'euc-kr'],
['csiso2022kr', 'replacement'],
['hz-gb-2312', 'replacement'],
['iso-2022-cn', 'replacement'],
['iso-2022-cn-ext', 'replacement'],
['iso-2022-kr', 'replacement'],
['replacement', 'replacement'],
['unicodefffe', 'utf-16be'],
['utf-16be', 'utf-16be'],
['csunicode', 'utf-16le'],
['iso-10646-ucs-2', 'utf-16le'],
['ucs-2', 'utf-16le'],
['unicode', 'utf-16le'],
['unicodefeff', 'utf-16le'],
['utf-16le', 'utf-16le'],
['utf-16', 'utf-16le'],
['x-user-defined', 'x-user-defined'],
]);

// Unfortunately, String.prototype.trim also removes non-ascii whitespace,
Expand Down
22 changes: 20 additions & 2 deletions test/parallel/test-whatwg-encoding-custom-internals.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,20 @@ const { getEncodingFromLabel } = require('internal/encoding');
const mappings = {
'utf-8': [
'unicode-1-1-utf-8',
'unicode-11utf8',
'unicode-20utf8',
'utf8',
'x-unicode20utf8',
],
'utf-16be': [
'unicodefffe',
],
'utf-16be': [],
'utf-16le': [
'csunicode',
'iso-10646-ucs-2',
'ucs-2',
'unicode',
'unicodefeff',
'utf-16',
],
'ibm866': [
Expand Down Expand Up @@ -258,7 +268,15 @@ const { getEncodingFromLabel } = require('internal/encoding');
'ksc5601',
'ksc_5601',
'windows-949',
]
],
'replacement': [
'csiso2022kr',
'hz-gb-2312',
'iso-2022-cn',
'iso-2022-cn-ext',
'iso-2022-kr',
],
'x-user-defined': []
};
Object.entries(mappings).forEach((i) => {
const enc = i[0];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,17 @@ const assert = require('assert');
[
'utf-8',
'unicode-1-1-utf-8',
'unicode-11utf8',
'unicode-20utf8',
'x-unicode20utf8',
'utf8',
'unicodefffe',
'utf-16be',
'csunicode',
'iso-10646-ucs-2',
'ucs-2',
'unicode',
'unicodefeff',
'utf-16le',
'utf-16',
].forEach((i) => {
Expand Down
10 changes: 6 additions & 4 deletions test/parallel/test-whatwg-encoding-custom-textdecoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,21 @@ const util = require('util');
const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65,
0x73, 0x74, 0xe2, 0x82, 0xac]);

const encoding_sets = ['unicode-1-1-utf-8', 'unicode-11utf8', 'unicode-20utf8',
'utf8', 'utf-8', 'x-unicode20utf8'];
// Make Sure TextDecoder exist
assert(TextDecoder);

// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: false
{
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
encoding_sets.forEach((i) => {
const dec = new TextDecoder(i);
assert.strictEqual(dec.encoding, 'utf-8');
const res = dec.decode(buf);
assert.strictEqual(res, 'test€');
});

['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
encoding_sets.forEach((i) => {
const dec = new TextDecoder(i);
let res = '';
res += dec.decode(buf.slice(0, 8), { stream: true });
Expand All @@ -36,13 +38,13 @@ assert(TextDecoder);

// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: true
{
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
encoding_sets.forEach((i) => {
const dec = new TextDecoder(i, { ignoreBOM: true });
const res = dec.decode(buf);
assert.strictEqual(res, '\ufefftest€');
});

['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
encoding_sets.forEach((i) => {
const dec = new TextDecoder(i, { ignoreBOM: true });
let res = '';
res += dec.decode(buf.slice(0, 8), { stream: true });
Expand Down

0 comments on commit 6a577bc

Please sign in to comment.