Skip to content

Commit 35b7960

Browse files
committed
src: move all 1-byte encodings to native
1 parent 70ec5c0 commit 35b7960

File tree

8 files changed

+508
-159
lines changed

8 files changed

+508
-159
lines changed

lib/internal/encoding.js

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
// https://encoding.spec.whatwg.org
55

66
const {
7+
ArrayPrototypeMap,
78
Boolean,
89
ObjectDefineProperties,
910
ObjectGetOwnPropertyDescriptors,
1011
ObjectSetPrototypeOf,
1112
ObjectValues,
13+
SafeArrayIterator,
1214
SafeMap,
1315
StringPrototypeSlice,
1416
Symbol,
@@ -32,8 +34,6 @@ const kFatal = Symbol('kFatal');
3234
const kUTF8FastPath = Symbol('kUTF8FastPath');
3335
const kIgnoreBOM = Symbol('kIgnoreBOM');
3436

35-
const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
36-
3737
const {
3838
getConstructorOf,
3939
customInspectSymbol: inspect,
@@ -58,6 +58,7 @@ const {
5858
encodeIntoResults,
5959
encodeUtf8String,
6060
decodeUTF8,
61+
decodeSingleByte,
6162
} = binding;
6263

6364
function validateDecoder(obj) {
@@ -71,6 +72,47 @@ const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
7172

7273
const empty = new FastBuffer();
7374

75+
// Has to be synced with src/
76+
const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap([
77+
'ibm866',
78+
'koi8-r',
79+
'koi8-u',
80+
'macintosh',
81+
'x-mac-cyrillic',
82+
'iso-8859-2',
83+
'iso-8859-3',
84+
'iso-8859-4',
85+
'iso-8859-5',
86+
'iso-8859-6',
87+
'iso-8859-7',
88+
'iso-8859-8',
89+
'iso-8859-8-i',
90+
'iso-8859-10',
91+
'iso-8859-13',
92+
'iso-8859-14',
93+
'iso-8859-15',
94+
'iso-8859-16',
95+
'windows-874',
96+
'windows-1250',
97+
'windows-1251',
98+
'windows-1252',
99+
'windows-1253',
100+
'windows-1254',
101+
'windows-1255',
102+
'windows-1256',
103+
'windows-1257',
104+
'windows-1258',
105+
'x-user-defined', // Has to be last, special case
106+
], (e, i) => [e, i])));
107+
108+
const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc);
109+
110+
function createSinglebyteDecoder(encoding, fatal) {
111+
const key = encodingsSinglebyte.get(encoding);
112+
if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
113+
return (buf) => decodeSingleByte(buf, key, fatal);
114+
}
115+
74116
const encodings = new SafeMap([
75117
['unicode-1-1-utf-8', 'utf-8'],
76118
['unicode11utf8', 'utf-8'],
@@ -462,7 +504,7 @@ function makeTextDecoderICU() {
462504
validateDecoder(this);
463505
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
464506

465-
if (this[kMethod]) return this[kMethod](parseInput(input));
507+
if (this[kMethod]) return this[kMethod](input);
466508

467509
this[kUTF8FastPath] &&= !(options?.stream);
468510

lib/internal/encoding/single-byte.js

Lines changed: 0 additions & 155 deletions
This file was deleted.

src/encoding_binding.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
3+
#include "encoding_singlebyte.h"
34
#include "env-inl.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
@@ -389,6 +390,66 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
389390
}
390391
}
391392

393+
void BindingData::DecodeSingleByte(const FunctionCallbackInfo<Value>& args) {
394+
Environment* env = Environment::GetCurrent(args);
395+
396+
CHECK_GE(args.Length(), 2);
397+
398+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
399+
args[0]->IsArrayBufferView())) {
400+
return node::THROW_ERR_INVALID_ARG_TYPE(
401+
env->isolate(),
402+
"The \"input\" argument must be an instance of SharedArrayBuffer, "
403+
"ArrayBuffer or ArrayBufferView.");
404+
}
405+
406+
CHECK(args[1]->IsInt32());
407+
const int encoding = args[1].As<v8::Int32>()->Value();
408+
CHECK(encoding >= 0 && encoding < 29);
409+
410+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
411+
const uint8_t* data = buffer.data();
412+
size_t length = buffer.length();
413+
414+
if (length == 0) return args.GetReturnValue().SetEmptyString();
415+
416+
const char* dataChar = reinterpret_cast<const char*>(data);
417+
if (!simdutf::validate_ascii_with_errors(dataChar, length).error) {
418+
Local<Value> ret;
419+
if (StringBytes::Encode(env->isolate(), dataChar, length, LATIN1)
420+
.ToLocal(&ret)) {
421+
args.GetReturnValue().Set(ret);
422+
}
423+
return;
424+
}
425+
426+
uint16_t* dst = node::UncheckedMalloc<uint16_t>(length);
427+
428+
if (encoding == 28) {
429+
// x-user-defined
430+
for (size_t i = 0; i < length; i++) {
431+
dst[i] = data[i] >= 0x80 ? data[i] + 0xf700 : data[i];
432+
}
433+
} else {
434+
bool has_fatal = args[2]->IsTrue();
435+
436+
const uint16_t* table = tSingleByteEncodings[encoding];
437+
for (size_t i = 0; i < length; i++) dst[i] = table[data[i]];
438+
439+
const char16_t* dst16 = reinterpret_cast<char16_t*>(dst);
440+
if (has_fatal && fSingleByteEncodings[encoding] &&
441+
simdutf::find(dst16, dst16 + length, 0xfffd) != dst16 + length) {
442+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
443+
env->isolate(), "The encoded data was not valid for this encoding");
444+
}
445+
}
446+
447+
Local<Value> ret;
448+
if (StringBytes::Raw(env->isolate(), dst, length).ToLocal(&ret)) {
449+
args.GetReturnValue().Set(ret);
450+
}
451+
}
452+
392453
void BindingData::ToASCII(const FunctionCallbackInfo<Value>& args) {
393454
Environment* env = Environment::GetCurrent(args);
394455
CHECK_GE(args.Length(), 1);
@@ -421,6 +482,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
421482
SetMethod(isolate, target, "encodeInto", EncodeInto);
422483
SetMethodNoSideEffect(isolate, target, "encodeUtf8String", EncodeUtf8String);
423484
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
485+
SetMethodNoSideEffect(isolate, target, "decodeSingleByte", DecodeSingleByte);
424486
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
425487
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
426488
}
@@ -438,6 +500,7 @@ void BindingData::RegisterTimerExternalReferences(
438500
registry->Register(EncodeInto);
439501
registry->Register(EncodeUtf8String);
440502
registry->Register(DecodeUTF8);
503+
registry->Register(DecodeSingleByte);
441504
registry->Register(ToASCII);
442505
registry->Register(ToUnicode);
443506
}

src/encoding_binding.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeSingleByte(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

0 commit comments

Comments
 (0)