Skip to content

Commit f6063e3

Browse files
committed
add buffer.transcode for nodejs_compat
1 parent 3f94280 commit f6063e3

File tree

8 files changed

+321
-34
lines changed

8 files changed

+321
-34
lines changed

src/node/buffer.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
SlowBuffer,
1111
isAscii,
1212
isUtf8,
13+
transcode,
1314
} from 'node-internal:internal_buffer';
1415

1516
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
@@ -30,6 +31,7 @@ export {
3031
SlowBuffer,
3132
isAscii,
3233
isUtf8,
34+
transcode,
3335
};
3436

3537
export default {
@@ -46,4 +48,5 @@ export default {
4648
SlowBuffer,
4749
isAscii,
4850
isUtf8,
51+
transcode,
4952
};

src/node/internal/buffer.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@ export function decode(buffer: Uint8Array, state: Uint8Array): string;
3737
export function flush(state: Uint8Array): string;
3838
export function isAscii(value: ArrayBufferView): boolean;
3939
export function isUtf8(value: ArrayBufferView): boolean;
40+
export function transcode(source: ArrayBufferView, fromEncoding: string, toEncoding: string): ArrayBuffer;

src/node/internal/internal_buffer.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2294,6 +2294,18 @@ export function isUtf8(value: ArrayBufferView) {
22942294
return bufferUtil.isUtf8(value);
22952295
}
22962296

2297+
export function transcode(source: ArrayBufferView, fromEncoding: string, toEncoding: string) {
2298+
const normalizedFromEncoding = normalizeEncoding(fromEncoding);
2299+
if (!Buffer.isEncoding(normalizedFromEncoding)) {
2300+
throw new ERR_UNKNOWN_ENCODING(fromEncoding);
2301+
}
2302+
const normalizedToEncoding = normalizeEncoding(toEncoding);
2303+
if (!Buffer.isEncoding(normalizedToEncoding)) {
2304+
throw new ERR_UNKNOWN_ENCODING(toEncoding);
2305+
}
2306+
return bufferUtil.transcode(source, fromEncoding, toEncoding);
2307+
}
2308+
22972309
export default {
22982310
Buffer,
22992311
constants,

src/workerd/api/node/buffer.c++

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
#include "buffer-string-search.h"
99
#include <workerd/jsg/buffersource.h>
1010
#include <kj/encoding.h>
11-
#include <algorithm>
11+
#include <kj/array.h>
1212
#include "simdutf.h"
13+
#include "i18n.h"
14+
15+
#include <algorithm>
1316

1417
// These are defined by <sys/byteorder.h> or <netinet/in.h> on some systems.
1518
// To avoid warnings, undefine them before redefining them.
@@ -85,36 +88,6 @@ void SwapBytes(kj::ArrayPtr<kj::byte> bytes) {
8588
}
8689
}
8790

88-
enum class Encoding {
89-
ASCII,
90-
LATIN1,
91-
UTF8,
92-
UTF16LE,
93-
BASE64,
94-
BASE64URL,
95-
HEX,
96-
};
97-
98-
Encoding getEncoding(kj::StringPtr encoding) {
99-
if (encoding == "utf8"_kj) {
100-
return Encoding::UTF8;
101-
} else if (encoding == "ascii") {
102-
return Encoding::ASCII;
103-
} else if (encoding == "latin1") {
104-
return Encoding::LATIN1;
105-
} else if (encoding == "utf16le") {
106-
return Encoding::UTF16LE;
107-
} else if (encoding == "base64") {
108-
return Encoding::BASE64;
109-
} else if (encoding == "base64url") {
110-
return Encoding::BASE64URL;
111-
} else if (encoding == "hex") {
112-
return Encoding::HEX;
113-
}
114-
115-
KJ_UNREACHABLE;
116-
}
117-
11891
kj::Maybe<uint> tryFromHexDigit(char c) {
11992
if ('0' <= c && c <= '9') {
12093
return c - '0';
@@ -216,8 +189,9 @@ uint32_t writeInto(
216189
dest.first(amountToCopy).copyFrom(bytes.first(amountToCopy));
217190
return amountToCopy;
218191
}
192+
default:
193+
KJ_UNREACHABLE;
219194
}
220-
KJ_UNREACHABLE;
221195
}
222196

223197
kj::Array<kj::byte> decodeStringImpl(
@@ -272,8 +246,9 @@ kj::Array<kj::byte> decodeStringImpl(
272246
string.writeInto(js, buf, options);
273247
return decodeHexTruncated(buf, strict);
274248
}
249+
default:
250+
KJ_UNREACHABLE;
275251
}
276-
KJ_UNREACHABLE;
277252
}
278253
} // namespace
279254

@@ -561,8 +536,9 @@ jsg::JsString toStringImpl(
561536
case Encoding::HEX: {
562537
return js.str(kj::encodeHex(slice));
563538
}
539+
default:
540+
KJ_UNREACHABLE;
564541
}
565-
KJ_UNREACHABLE;
566542
}
567543

568544
} // namespace
@@ -876,5 +852,16 @@ bool BufferUtil::isUtf8(kj::Array<kj::byte> buffer) {
876852
return simdutf::validate_utf8(buffer.asChars().begin(), buffer.size());
877853
}
878854

855+
kj::Array<kj::byte> BufferUtil::transcode(kj::Array<kj::byte> source, kj::String rawFromEncoding, kj::String rawToEncoding) {
856+
auto fromEncoding = getEncoding(rawFromEncoding.asPtr());
857+
auto toEncoding = getEncoding(rawToEncoding.asPtr());
858+
859+
JSG_REQUIRE(i18n::canBeTranscoded(fromEncoding) &&
860+
i18n::canBeTranscoded(toEncoding), Error,
861+
"Unable to transcode Buffer");
862+
863+
return i18n::transcode(source, fromEncoding, toEncoding);
864+
}
865+
879866
} // namespace workerd::api::node {
880867

src/workerd/api/node/buffer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ class BufferUtil final: public jsg::Object {
8181
jsg::JsString flush(jsg::Lock& js, kj::Array<kj::byte> state);
8282
bool isAscii(kj::Array<kj::byte> bytes);
8383
bool isUtf8(kj::Array<kj::byte> bytes);
84+
kj::Array<kj::byte> transcode(kj::Array<kj::byte> source,
85+
kj::String rawFromEncoding,
86+
kj::String rawToEncoding);
8487

8588
JSG_RESOURCE_TYPE(BufferUtil) {
8689
JSG_METHOD(byteLength);
@@ -94,6 +97,7 @@ class BufferUtil final: public jsg::Object {
9497
JSG_METHOD(write);
9598
JSG_METHOD(isAscii);
9699
JSG_METHOD(isUtf8);
100+
JSG_METHOD(trancode);
97101

98102
// For StringDecoder
99103
JSG_METHOD(decode);

src/workerd/api/node/i18n.c++

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#include "i18n.h"
2+
3+
#include <kj/common.h>
4+
#include <kj/debug.h>
5+
#include <kj/one-of.h>
6+
7+
#include <workerd/jsg/exception.h>
8+
9+
#include <string>
10+
11+
namespace workerd::api::node {
12+
13+
namespace i18n {
14+
15+
namespace {
16+
17+
const char* getEncodingName(Encoding input) {
18+
switch (input) {
19+
case Encoding::ASCII:
20+
return "us-ascii";
21+
case Encoding::LATIN1:
22+
return "iso8859-1";
23+
case Encoding::UCS2:
24+
return "utf16le";
25+
case Encoding::UTF8:
26+
return "utf-8";
27+
default:
28+
KJ_UNREACHABLE;
29+
}
30+
}
31+
32+
typedef TranscodeResult (*TranscodeImpl)(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
33+
Encoding toEncoding);
34+
35+
TranscodeResult TranscodeDefault(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
36+
Encoding toEncoding) {
37+
Converter to(toEncoding);
38+
std::string substitude(to.minSize(), '?');
39+
to.setSubstitudeChars(substitude);
40+
41+
Converter from(fromEncoding);
42+
43+
auto limit = source.size() + to.maxSize();
44+
KJ_STACK_ARRAY(kj::byte, out, limit, 0, limit);
45+
char* target = out.asChars().begin();
46+
const char* source_ = source.asChars().begin();
47+
UErrorCode status{};
48+
ucnv_convertEx(to.conv(), from.conv(), &target, target + limit, &source_, source_ + source.size(),
49+
nullptr, nullptr, nullptr, nullptr, true, true, &status);
50+
51+
if (U_SUCCESS(status)) {
52+
return out.slice(0, target - out.asChars().begin()).attach();
53+
}
54+
55+
return status;
56+
}
57+
58+
TranscodeResult TranscodeToUCS2(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
59+
Encoding toEncoding) {
60+
UErrorCode status{};
61+
const size_t length_in_chars = source.size() * sizeof(UChar);
62+
Converter from(fromEncoding);
63+
KJ_STACK_ARRAY(UChar, out, source.size(), 0, source.size());
64+
const auto source_ = source.asChars().begin();
65+
ucnv_toUChars(from.conv(), out.begin(), length_in_chars, source_, source.size(), &status);
66+
67+
if (U_SUCCESS(status)) {
68+
return out.asBytes().attach();
69+
}
70+
return status;
71+
}
72+
73+
TranscodeResult TranscodeFromUCS2(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
74+
Encoding toEncoding) {
75+
UErrorCode status{};
76+
const size_t length_in_chars = source.size() * sizeof(UChar);
77+
// Transcode from UCS2.
78+
Converter to(toEncoding);
79+
// KJ_STACK_ARRAY(kj::byte, dest, length_in_chars, 0, length_in_chars);
80+
// dest.copyFrom(source.asConst().slice(0, length_in_chars));
81+
82+
// const uint32_t len = ucnv_fromUChars(to.conv(), dest.begin(), length_in_chars,
83+
// *sourcebuf, length_in_chars, status);
84+
85+
// if (U_SUCCESS(status)) {
86+
// return out.asBytes().attach();
87+
// }
88+
return status;
89+
}
90+
91+
TranscodeResult TranscodeUcs2FromUtf8(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding, Encoding toEncoding) {
92+
93+
}
94+
95+
TranscodeResult TranscodeUtf8FromUcs2(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding, Encoding toEncoding) {
96+
97+
}
98+
99+
} // namespace
100+
101+
Converter::Converter(Encoding encoding, std::string_view substitude) {
102+
UErrorCode status = U_ZERO_ERROR;
103+
auto name = getEncodingName(encoding);
104+
auto conv = ucnv_open(name, &status);
105+
KJ_ASSERT(U_SUCCESS(status));
106+
conv_.reset(conv);
107+
setSubstitudeChars(substitude);
108+
}
109+
110+
Converter::Converter(UConverter* converter, std::string_view substitude) : conv_(converter) {
111+
setSubstitudeChars(substitude);
112+
}
113+
114+
kj::Array<kj::byte> transcode(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding,
115+
Encoding toEncoding) {
116+
TranscodeImpl transcode_function = &TranscodeDefault;
117+
switch (fromEncoding) {
118+
case Encoding::ASCII:
119+
case Encoding::LATIN1:
120+
if (toEncoding == Encoding::UCS2) {
121+
transcode_function = &TranscodeToUCS2;
122+
}
123+
break;
124+
case Encoding::UTF8:
125+
if (toEncoding == Encoding::UCS2) {
126+
transcode_function = &TranscodeUcs2FromUtf8;
127+
}
128+
break;
129+
case Encoding::UCS2:
130+
switch (toEncoding) {
131+
case Encoding::UCS2:
132+
transcode_function = &TranscodeDefault;
133+
break;
134+
case Encoding::UTF8:
135+
transcode_function = &TranscodeUtf8FromUcs2;
136+
break;
137+
default:
138+
transcode_function = &TranscodeFromUCS2;
139+
}
140+
default:
141+
KJ_UNREACHABLE;
142+
}
143+
144+
auto result = transcode_function(source, fromEncoding, toEncoding);
145+
KJ_SWITCH_ONEOF(result) {
146+
KJ_CASE_ONEOF(value, UErrorCode) {
147+
JSG_FAIL_REQUIRE(Error, "Unable to transcode Buffer");
148+
}
149+
KJ_CASE_ONEOF(v, kj::Array<kj::byte>) {
150+
return kj::mv(v);
151+
}
152+
}
153+
KJ_UNREACHABLE;
154+
}
155+
156+
} // namespace i18n
157+
158+
} // namespace workerd::api::node

0 commit comments

Comments
 (0)