Skip to content

Commit 660f594

Browse files
Dan Carneyrmagrin
authored andcommitted
deps: V8: cherry-pick 64b36b441179
Original commit message: optimize ascii fast path in WriteUtf8V2 Change-Id: If28168cb4395b953d0ec642ef4fc618ce963dbcd Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7124103 Reviewed-by: Toon Verwaest <verwaest@chromium.org> Commit-Queue: Erik Corry <erikcorry@chromium.org> Reviewed-by: Erik Corry <erikcorry@chromium.org> Cr-Commit-Position: refs/heads/main@{#103542} Refs: v8/v8@64b36b4
1 parent 065c9b0 commit 660f594

File tree

3 files changed

+32
-2
lines changed

3 files changed

+32
-2
lines changed

common.gypi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
# Reset this number to 0 on major V8 upgrades.
4040
# Increment by one for each non-official patch applied to deps/v8.
41-
'v8_embedder_string': '-node.40',
41+
'v8_embedder_string': '-node.41',
4242

4343
##### V8 defaults for Node.js #####
4444

deps/v8/src/strings/unicode-inl.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "src/base/logging.h"
1212
#include "src/utils/utils.h"
13+
#include "third_party/simdutf/simdutf.h"
1314

1415
namespace unibrow {
1516

@@ -219,6 +220,16 @@ bool Utf8::IsValidCharacter(uchar c) {
219220
c != kBadChar);
220221
}
221222

223+
template <>
224+
bool Utf8::IsAsciiOneByteString<uint8_t>(const uint8_t* buffer, size_t size) {
225+
return simdutf::validate_ascii(reinterpret_cast<const char*>(buffer), size);
226+
}
227+
228+
template <>
229+
bool Utf8::IsAsciiOneByteString<uint16_t>(const uint16_t* buffer, size_t size) {
230+
return false;
231+
}
232+
222233
template <typename Char>
223234
Utf8::EncodingResult Utf8::Encode(v8::base::Vector<const Char> string,
224235
char* buffer, size_t capacity,
@@ -234,8 +245,17 @@ Utf8::EncodingResult Utf8::Encode(v8::base::Vector<const Char> string,
234245
const Char* characters = string.begin();
235246
size_t content_capacity = capacity - write_null;
236247
CHECK_LE(content_capacity, capacity);
237-
uint16_t last = Utf16::kNoPreviousCharacter;
238248
size_t read_index = 0;
249+
if (kSourceIsOneByte) {
250+
size_t writeable = std::min(string.size(), content_capacity);
251+
// Just memcpy when possible.
252+
if (writeable > 0 && Utf8::IsAsciiOneByteString(characters, writeable)) {
253+
memcpy(buffer, characters, writeable);
254+
read_index = writeable;
255+
write_index = writeable;
256+
}
257+
}
258+
uint16_t last = Utf16::kNoPreviousCharacter;
239259
for (; read_index < string.size(); read_index++) {
240260
Char character = characters[read_index];
241261

deps/v8/src/strings/unicode.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,16 @@ class V8_EXPORT_PRIVATE Utf8 {
212212
// - valid code point range.
213213
static bool ValidateEncoding(const uint8_t* str, size_t length);
214214

215+
template <typename Char>
216+
static bool IsAsciiOneByteString(const Char* buffer, size_t size);
217+
218+
template <>
219+
inline bool IsAsciiOneByteString<uint8_t>(const uint8_t* buffer, size_t size);
220+
221+
template <>
222+
inline bool IsAsciiOneByteString<uint16_t>(const uint16_t* buffer,
223+
size_t size);
224+
215225
// Encode the given characters as Utf8 into the provided output buffer.
216226
struct EncodingResult {
217227
size_t bytes_written;

0 commit comments

Comments
 (0)