Skip to content

Commit 98d328a

Browse files
theweipengaduh95
authored andcommitted
src: detect whether the string is one byte representation or not
References: #56090 PR-URL: #56147 Fixes: #56090 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net>
1 parent ea0cd57 commit 98d328a

File tree

5 files changed

+120
-0
lines changed

5 files changed

+120
-0
lines changed

doc/api/v8.md

+39
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,45 @@ setTimeout(() => {
13041304
}, 1000);
13051305
```
13061306

1307+
## `v8.isStringOneByteRepresentation(content)`
1308+
1309+
<!-- YAML
1310+
added: REPLACEME
1311+
-->
1312+
1313+
* `content` {string}
1314+
* Returns: {boolean}
1315+
1316+
V8 only supports `Latin-1/ISO-8859-1` and `UTF16` as the underlying representation of a string.
1317+
If the `content` uses `Latin-1/ISO-8859-1` as the underlying representation, this function will return true;
1318+
otherwise, it returns false.
1319+
1320+
If this method returns false, that does not mean that the string contains some characters not in `Latin-1/ISO-8859-1`.
1321+
Sometimes a `Latin-1` string may also be represented as `UTF16`.
1322+
1323+
```js
1324+
const { isStringOneByteRepresentation } = require('node:v8');
1325+
1326+
const Encoding = {
1327+
latin1: 1,
1328+
utf16le: 2,
1329+
};
1330+
const buffer = Buffer.alloc(100);
1331+
function writeString(input) {
1332+
if (isStringOneByteRepresentation(input)) {
1333+
buffer.writeUint8(Encoding.latin1);
1334+
buffer.writeUint32LE(input.length, 1);
1335+
buffer.write(input, 5, 'latin1');
1336+
} else {
1337+
buffer.writeUint8(Encoding.utf16le);
1338+
buffer.writeUint32LE(input.length * 2, 1);
1339+
buffer.write(input, 5, 'utf16le');
1340+
}
1341+
}
1342+
writeString('hello');
1343+
writeString('你好');
1344+
```
1345+
13071346
[HTML structured clone algorithm]: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm
13081347
[Hook Callbacks]: #hook-callbacks
13091348
[V8]: https://developers.google.com/v8/

lib/v8.js

+13
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ const binding = internalBinding('v8');
108108
const {
109109
cachedDataVersionTag,
110110
setFlagsFromString: _setFlagsFromString,
111+
isStringOneByteRepresentation: _isStringOneByteRepresentation,
111112
updateHeapStatisticsBuffer,
112113
updateHeapSpaceStatisticsBuffer,
113114
updateHeapCodeStatisticsBuffer,
@@ -159,6 +160,17 @@ function setFlagsFromString(flags) {
159160
_setFlagsFromString(flags);
160161
}
161162

163+
/**
164+
* Return whether this string uses one byte as underlying representation or not.
165+
* @param {string} content
166+
* @returns {boolean}
167+
*/
168+
function isStringOneByteRepresentation(content) {
169+
validateString(content, 'content');
170+
return _isStringOneByteRepresentation(content);
171+
}
172+
173+
162174
/**
163175
* Gets the current V8 heap statistics.
164176
* @returns {{
@@ -445,4 +457,5 @@ module.exports = {
445457
startupSnapshot,
446458
setHeapSnapshotNearHeapLimit,
447459
GCProfiler,
460+
isStringOneByteRepresentation,
448461
};

src/node_external_reference.h

+4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ namespace node {
1212

1313
using CFunctionCallbackWithOneByteString =
1414
uint32_t (*)(v8::Local<v8::Value>, const v8::FastOneByteString&);
15+
16+
using CFunctionCallbackReturnBool = bool (*)(v8::Local<v8::Value> unused,
17+
v8::Local<v8::Value> receiver);
1518
using CFunctionCallback = void (*)(v8::Local<v8::Value> unused,
1619
v8::Local<v8::Value> receiver);
1720
using CFunctionCallbackReturnDouble =
@@ -90,6 +93,7 @@ class ExternalReferenceRegistry {
9093
#define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \
9194
V(CFunctionCallback) \
9295
V(CFunctionCallbackWithOneByteString) \
96+
V(CFunctionCallbackReturnBool) \
9397
V(CFunctionCallbackReturnDouble) \
9498
V(CFunctionCallbackReturnInt32) \
9599
V(CFunctionCallbackValueReturnDouble) \

src/node_v8.cc

+28
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
namespace node {
3333
namespace v8_utils {
3434
using v8::Array;
35+
using v8::CFunction;
3536
using v8::Context;
3637
using v8::FunctionCallbackInfo;
3738
using v8::FunctionTemplate;
@@ -238,6 +239,23 @@ void SetFlagsFromString(const FunctionCallbackInfo<Value>& args) {
238239
V8::SetFlagsFromString(*flags, static_cast<size_t>(flags.length()));
239240
}
240241

242+
static void IsStringOneByteRepresentation(
243+
const FunctionCallbackInfo<Value>& args) {
244+
CHECK_EQ(args.Length(), 1);
245+
CHECK(args[0]->IsString());
246+
bool is_one_byte = args[0].As<String>()->IsOneByte();
247+
args.GetReturnValue().Set(is_one_byte);
248+
}
249+
250+
static bool FastIsStringOneByteRepresentation(Local<Value> receiver,
251+
const Local<Value> target) {
252+
CHECK(target->IsString());
253+
return target.As<String>()->IsOneByte();
254+
}
255+
256+
CFunction fast_is_string_one_byte_representation_(
257+
CFunction::Make(FastIsStringOneByteRepresentation));
258+
241259
static const char* GetGCTypeName(v8::GCType gc_type) {
242260
switch (gc_type) {
243261
case v8::GCType::kGCTypeScavenge:
@@ -478,6 +496,13 @@ void Initialize(Local<Object> target,
478496
// Export symbols used by v8.setFlagsFromString()
479497
SetMethod(context, target, "setFlagsFromString", SetFlagsFromString);
480498

499+
// Export symbols used by v8.isStringOneByteRepresentation()
500+
SetFastMethodNoSideEffect(context,
501+
target,
502+
"isStringOneByteRepresentation",
503+
IsStringOneByteRepresentation,
504+
&fast_is_string_one_byte_representation_);
505+
481506
// GCProfiler
482507
Local<FunctionTemplate> t =
483508
NewFunctionTemplate(env->isolate(), GCProfiler::New);
@@ -497,6 +522,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
497522
registry->Register(GCProfiler::New);
498523
registry->Register(GCProfiler::Start);
499524
registry->Register(GCProfiler::Stop);
525+
registry->Register(IsStringOneByteRepresentation);
526+
registry->Register(FastIsStringOneByteRepresentation);
527+
registry->Register(fast_is_string_one_byte_representation_.GetTypeInfo());
500528
}
501529

502530
} // namespace v8_utils
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
'use strict';
2+
require('../common');
3+
const assert = require('assert');
4+
const { isStringOneByteRepresentation } = require('v8');
5+
6+
[
7+
undefined,
8+
null,
9+
false,
10+
5n,
11+
5,
12+
Symbol(),
13+
() => {},
14+
{},
15+
].forEach((value) => {
16+
assert.throws(
17+
() => { isStringOneByteRepresentation(value); },
18+
/The "content" argument must be of type string/
19+
);
20+
});
21+
22+
{
23+
const latin1String = 'hello world!';
24+
// Run this inside a for loop to trigger the fast API
25+
for (let i = 0; i < 10_000; i++) {
26+
assert.strictEqual(isStringOneByteRepresentation(latin1String), true);
27+
}
28+
}
29+
30+
{
31+
const utf16String = '你好😀😃';
32+
// Run this inside a for loop to trigger the fast API
33+
for (let i = 0; i < 10_000; i++) {
34+
assert.strictEqual(isStringOneByteRepresentation(utf16String), false);
35+
}
36+
}

0 commit comments

Comments
 (0)