Skip to content

Commit b9b5d45

Browse files
anonriglemire
andcommitted
buffer: improve base64 and base64url performance
Co-authored-by: Daniel Lemire <daniel@lemire.me>
1 parent db17461 commit b9b5d45

File tree

1 file changed

+88
-14
lines changed

1 file changed

+88
-14
lines changed

src/string_bytes.cc

Lines changed: 88 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -346,14 +346,76 @@ size_t StringBytes::Write(Isolate* isolate,
346346
}
347347

348348
case BASE64URL:
349-
// Fall through
349+
if (str->IsExternalOneByte()) { // 8-bit case
350+
auto ext = str->GetExternalOneByteStringResource();
351+
// Try with WHATWG base64 standard first, adapted for base64url
352+
simdutf::result r = simdutf::base64_to_binary_safe(
353+
ext->data(), ext->length(), buf, buflen, simdutf::base64_url);
354+
if (r.error == simdutf::error_code::SUCCESS) {
355+
nbytes = buflen;
356+
} else {
357+
// The input does not follow the WHATWG forgiving-base64 specification
358+
// adapted for base64url
359+
// https://infra.spec.whatwg.org/#forgiving-base64-decode
360+
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
361+
}
362+
} else { // 16-bit case
363+
// Typically, a base64url string is stored as an 8-bit string within v8.
364+
// Thus str->IsOneByte() is typically true. The next line thus often
365+
// allocates a temporary 16-bit buffer to store a 16-bit copy of the
366+
// 8-bit v8 string. Hence the creation of the String::Value value is
367+
// likely a performance bottleneck.
368+
String::Value value(isolate, str);
369+
// Try with WHATWG base64 standard first
370+
simdutf::result r = simdutf::base64_to_binary_safe(
371+
reinterpret_cast<const char16_t*>(*value),
372+
value.length(),
373+
buf,
374+
buflen,
375+
simdutf::base64_url);
376+
if (r.error == simdutf::error_code::SUCCESS) {
377+
nbytes = buflen;
378+
} else {
379+
// The input does not follow the WHATWG forgiving-base64 specification
380+
// (adapted for base64url with + and / replaced by - and _).
381+
// https://infra.spec.whatwg.org/#forgiving-base64-decode
382+
nbytes = base64_decode(buf, buflen, *value, value.length());
383+
}
384+
}
350385
case BASE64:
351-
if (str->IsExternalOneByte()) {
386+
if (str->IsExternalOneByte()) { // 8-bit case
352387
auto ext = str->GetExternalOneByteStringResource();
353-
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
354-
} else {
388+
// Try with WHATWG base64 standard first
389+
auto result = simdutf::base64_to_binary_safe(
390+
ext->data(), ext->length(), buf, buflen, simdutf::base64_default);
391+
if (result.error == simdutf::error_code::SUCCESS) {
392+
nbytes = buflen;
393+
} else {
394+
// The input does not follow the WHATWG forgiving-base64 specification
395+
// https://infra.spec.whatwg.org/#forgiving-base64-decode
396+
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
397+
}
398+
} else { // 16-bit case
399+
// Typically, a base64 string is stored as an 8-bit string within v8.
400+
// Thus str->IsOneByte() is typically true. The next line thus often
401+
// allocates a temporary 16-bit buffer to store a 16-bit copy of the
402+
// 8-bit v8 string. Hence, the creation of the String::Value value is
403+
// likely a performance bottleneck.
355404
String::Value value(isolate, str);
356-
nbytes = base64_decode(buf, buflen, *value, value.length());
405+
// Try with WHATWG base64 standard first
406+
auto result = simdutf::base64_to_binary_safe(
407+
reinterpret_cast<const char16_t*>(*value),
408+
value.length(),
409+
buf,
410+
buflen,
411+
simdutf::base64_default);
412+
if (result.error == simdutf::error_code::SUCCESS) {
413+
nbytes = buflen;
414+
} else {
415+
// The input does not follow the WHATWG base64 specification
416+
// https://infra.spec.whatwg.org/#forgiving-base64-decode
417+
nbytes = base64_decode(buf, buflen, *value, value.length());
418+
}
357419
}
358420
break;
359421

@@ -411,9 +473,12 @@ Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
411473
break;
412474

413475
case BASE64URL:
414-
// Fall through
476+
data_size = simdutf::base64_length_from_binary(str->Length(),
477+
simdutf::base64_url);
478+
break;
479+
415480
case BASE64:
416-
data_size = base64_decoded_size_fast(str->Length());
481+
data_size = simdutf::base64_length_from_binary(str->Length());
417482
break;
418483

419484
case HEX:
@@ -452,11 +517,16 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
452517
case UCS2:
453518
return Just(str->Length() * sizeof(uint16_t));
454519

455-
case BASE64URL:
456-
// Fall through
520+
case BASE64URL: {
521+
String::Value value(isolate, str);
522+
return Just(simdutf::base64_length_from_binary(value.length(),
523+
simdutf::base64_url));
524+
}
525+
457526
case BASE64: {
458527
String::Value value(isolate, str);
459-
return Just(base64_decoded_size(*value, value.length()));
528+
return Just(simdutf::base64_length_from_binary(value.length(),
529+
simdutf::base64_default));
460530
}
461531

462532
case HEX:
@@ -609,28 +679,32 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
609679
return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error);
610680

611681
case BASE64: {
612-
size_t dlen = base64_encoded_size(buflen);
682+
size_t dlen =
683+
simdutf::base64_length_from_binary(buflen, simdutf::base64_default);
613684
char* dst = node::UncheckedMalloc(dlen);
614685
if (dst == nullptr) {
615686
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
616687
return MaybeLocal<Value>();
617688
}
618689

619-
size_t written = base64_encode(buf, buflen, dst, dlen);
690+
size_t written =
691+
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_default);
620692
CHECK_EQ(written, dlen);
621693

622694
return ExternOneByteString::New(isolate, dst, dlen, error);
623695
}
624696

625697
case BASE64URL: {
626-
size_t dlen = base64_encoded_size(buflen, Base64Mode::URL);
698+
size_t dlen =
699+
simdutf::base64_length_from_binary(buflen, simdutf::base64_url);
627700
char* dst = node::UncheckedMalloc(dlen);
628701
if (dst == nullptr) {
629702
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
630703
return MaybeLocal<Value>();
631704
}
632705

633-
size_t written = base64_encode(buf, buflen, dst, dlen, Base64Mode::URL);
706+
size_t written =
707+
simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);
634708
CHECK_EQ(written, dlen);
635709

636710
return ExternOneByteString::New(isolate, dst, dlen, error);

0 commit comments

Comments
 (0)