Skip to content

Commit cffffa4

Browse files
joyeecheungBridgeAR
authored andcommitted
test: pull enconding WPT test fixtures
PR-URL: nodejs#25321 Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de>
1 parent 0d92b0f commit cffffa4

File tree

323 files changed

+24110
-10
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

323 files changed

+24110
-10
lines changed

test/fixtures/wpt/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ See [test/wpt](../../wpt/README.md) for information on how these tests are run.
1010

1111
Last update:
1212

13-
- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
14-
- interfaces: https://github.com/web-platform-tests/wpt/tree/db7f86289e/interfaces
1513
- console: https://github.com/web-platform-tests/wpt/tree/9786a4b131/console
14+
- encoding: https://github.com/web-platform-tests/wpt/tree/a093a659ed/encoding
1615
- url: https://github.com/web-platform-tests/wpt/tree/75b0f336c5/url
16+
- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
17+
- interfaces: https://github.com/web-platform-tests/wpt/tree/712c9f275e/interfaces
1718

1819
[Web Platform Tests]: https://github.com/web-platform-tests/wpt
1920
[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/master/docs/git-node.md#git-node-wpt

test/fixtures/wpt/encoding/META.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
spec: https://encoding.spec.whatwg.org/
2+
suggested_reviewers:
3+
- inexorabletash
4+
- annevk
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// META: title=Encoding API: Basics
2+
3+
test(function() {
4+
assert_equals((new TextEncoder).encoding, 'utf-8', 'default encoding is utf-8');
5+
assert_equals((new TextDecoder).encoding, 'utf-8', 'default encoding is utf-8');
6+
}, 'Default encodings');
7+
8+
test(function() {
9+
assert_array_equals(new TextEncoder().encode(), [], 'input default should be empty string')
10+
assert_array_equals(new TextEncoder().encode(undefined), [], 'input default should be empty string')
11+
}, 'Default inputs');
12+
13+
14+
function testDecodeSample(encoding, string, bytes) {
15+
test(function() {
16+
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
17+
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
18+
}, 'Decode sample: ' + encoding);
19+
}
20+
21+
// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34),
22+
// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD)
23+
// byte-swapped BOM (non-character U+FFFE)
24+
var sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE';
25+
26+
test(function() {
27+
var encoding = 'utf-8';
28+
var string = sample;
29+
var bytes = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE];
30+
var encoded = new TextEncoder().encode(string);
31+
assert_array_equals([].slice.call(encoded), bytes);
32+
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
33+
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
34+
}, 'Encode/decode round trip: utf-8');
35+
36+
testDecodeSample(
37+
'utf-16le',
38+
sample,
39+
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
40+
);
41+
42+
testDecodeSample(
43+
'utf-16be',
44+
sample,
45+
[0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE]
46+
);
47+
48+
testDecodeSample(
49+
'utf-16',
50+
sample,
51+
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
52+
);
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// META: title=Encoding API: invalid label
2+
// META: timeout=long
3+
// META: script=resources/encodings.js
4+
5+
var tests = ["invalid-invalidLabel"];
6+
setup(function() {
7+
encodings_table.forEach(function(section) {
8+
section.encodings.forEach(function(encoding) {
9+
encoding.labels.forEach(function(label) {
10+
["\u0000", "\u000b", "\u00a0", "\u2028", "\u2029"].forEach(function(ws) {
11+
tests.push(ws + label);
12+
tests.push(label + ws);
13+
tests.push(ws + label + ws);
14+
});
15+
});
16+
});
17+
});
18+
});
19+
20+
tests.forEach(function(input) {
21+
test(function() {
22+
assert_throws(new RangeError(), function() { new TextDecoder(input); });
23+
}, 'Invalid label ' + format_value(input) + ' should be rejected by TextDecoder.');
24+
});
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// META: title=Encoding API: replacement encoding
2+
// META: script=resources/encodings.js
3+
4+
encodings_table.forEach(function(section) {
5+
section.encodings.filter(function(encoding) {
6+
return encoding.name === 'replacement';
7+
}).forEach(function(encoding) {
8+
encoding.labels.forEach(function(label) {
9+
test(function() {
10+
assert_throws(new RangeError(), function() { new TextDecoder(label); });
11+
}, 'Label for "replacement" should be rejected by API: ' + label);
12+
});
13+
});
14+
});
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// META: title=Encoding API: Invalid UTF-16 surrogates with UTF-8 encoding
2+
3+
var badStrings = [
4+
{
5+
input: 'abc123',
6+
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
7+
decoded: 'abc123',
8+
name: 'Sanity check'
9+
},
10+
{
11+
input: '\uD800',
12+
expected: [0xef, 0xbf, 0xbd],
13+
decoded: '\uFFFD',
14+
name: 'Surrogate half (low)'
15+
},
16+
{
17+
input: '\uDC00',
18+
expected: [0xef, 0xbf, 0xbd],
19+
decoded: '\uFFFD',
20+
name: 'Surrogate half (high)'
21+
},
22+
{
23+
input: 'abc\uD800123',
24+
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
25+
decoded: 'abc\uFFFD123',
26+
name: 'Surrogate half (low), in a string'
27+
},
28+
{
29+
input: 'abc\uDC00123',
30+
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
31+
decoded: 'abc\uFFFD123',
32+
name: 'Surrogate half (high), in a string'
33+
},
34+
{
35+
input: '\uDC00\uD800',
36+
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
37+
decoded: '\uFFFD\uFFFD',
38+
name: 'Wrong order'
39+
}
40+
];
41+
42+
badStrings.forEach(function(t) {
43+
test(function() {
44+
var encoded = new TextEncoder().encode(t.input);
45+
assert_array_equals([].slice.call(encoded), t.expected);
46+
assert_equals(new TextDecoder('utf-8').decode(encoded), t.decoded);
47+
}, 'Invalid surrogates encoded into UTF-8: ' + t.name);
48+
});
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<!doctype html>
2+
<meta charset=big5> <!-- test breaks if the server overrides this -->
3+
<script src=/resources/testharness.js></script>
4+
<script src=/resources/testharnessreport.js></script>
5+
<div id=log></div>
6+
<script>
7+
function encode(input, output, desc) {
8+
test(function() {
9+
var a = document.createElement("a"); // <a> uses document encoding for URL's query
10+
// Append and prepend X to test for off-by-one errors
11+
a.href = "https://example.com/?X" + input + "X";
12+
assert_equals(a.search.substr(1), "X" + output + "X"); // remove leading "?"
13+
}, "big5 encoder: " + desc);
14+
}
15+
16+
encode("ab", "ab", "very basic")
17+
// edge cases
18+
encode("\u9EA6", "%26%2340614%3B", "Highest-pointer BMP character excluded from encoder");
19+
encode("\uD858\uDE6B", "%26%23156267%3B", "Highest-pointer character excluded from encoder");
20+
encode("\u3000", "%A1@", "Lowest-pointer character included in encoder");
21+
encode("\u20AC", "%A3%E1", "Euro; the highest-pointer character before a range of 30 unmapped pointers");
22+
encode("\u4E00", "%A4@", "The lowest-pointer character after the range of 30 unmapped pointers");
23+
encode("\uD85D\uDE07", "%C8%A4", "The highest-pointer character before a range of 41 unmapped pointers");
24+
encode("\uFFE2", "%C8%CD", "The lowest-pointer character after the range of 41 unmapped pointers");
25+
encode("\u79D4", "%FE%FE", "The last character in the index");
26+
// not in index
27+
encode("\u2603", "%26%239731%3B", "The canonical BMP test character that is not in the index");
28+
encode("\uD83D\uDCA9", "%26%23128169%3B", "The canonical astral test character that is not in the index");
29+
// duplicate low bits
30+
encode("\uD840\uDFB5", "%FDj", "A Plane 2 character whose low 16 bits match a BMP character that has a lower pointer");
31+
// prefer last
32+
encode("\u2550", "%F9%F9", "A duplicate-mapped code point that prefers the highest pointer in the encoder");
33+
</script>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!doctype html>
2+
<meta charset=shift_jis>
3+
<title>Shift_JIS file ending with a truncated sequence</title>
4+
One-byte truncated sequence:&#xFFFD;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<!doctype html>
2+
<meta charset=shift_jis>
3+
<title>Shift_JIS file ending with a truncated sequence</title>
4+
<link rel=match href=/encoding/eof-shift_jis-ref.html>
5+
One-byte truncated sequence:�
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!doctype html>
2+
<meta charset=utf-8>
3+
<title>UTF-8 file ending with a one-byte truncated sequence</title>
4+
One-byte truncated sequence:&#xFFFD;

0 commit comments

Comments
 (0)