Skip to content

Commit

Permalink
perf(ext/web): add op_encode_binary_string (denoland#16352)
Browse files Browse the repository at this point in the history
Add a new op to use in `reader.readAsBinaryString(blob)`.

```
File API binary string: 400b    35.12 µs/iter    (21.93 µs … 3.27 ms)  31.87 µs 131.95 µs 217.63 µs
File API binary string: 4kb     46.49 µs/iter    (29.36 µs … 4.42 ms)   42.5 µs 122.48 µs  155.1 µs
File API binary string: 2.2mb    4.17 ms/iter     (1.75 ms … 8.54 ms)   5.48 ms   7.39 ms   8.54 ms
```

**main**

```
benchmark                          time (avg)             (min … max)       p75       p99      p995
--------------------------------------------------------------------- -----------------------------
File API binary string: 400b    56.17 µs/iter  (43.09 µs … 784.52 µs)   49.6 µs 177.18 µs 241.23 µs
File API binary string: 4kb     277.2 µs/iter   (240.29 µs … 1.84 ms) 269.87 µs 649.79 µs 774.46 µs
File API binary string: 2.2mb  180.03 ms/iter (173.32 ms … 194.35 ms) 182.54 ms 194.35 ms 194.35 ms
```

It can also handle bigger files, when encoding a 200mb file, main
crashes with OOM

```
<--- Last few GCs --->

[132677:0x560504676550]     5012 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms  (average mu = 0.824, current mu = 0.825) allocation failure; 
[132677:0x560504676550]     5038 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms  (average mu = 0.824, current mu = 0.825) allocation failure; 
[132677:0x560504676550]     5064 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms  (average mu = 0.824, current mu = 0.825) allocation failure;
```
  • Loading branch information
marcosc90 authored Oct 24, 2022
1 parent 7a65b8e commit ac5fcf6
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 10 deletions.
58 changes: 58 additions & 0 deletions cli/tests/unit/text_encoding_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ Deno.test(function toStringShouldBeWebCompatibility() {
const decoder = new TextDecoder();
assertEquals(decoder.toString(), "[object TextDecoder]");
});

Deno.test(function textEncoderShouldCoerceToString() {
const encoder = new TextEncoder();
const fixutreText = "text";
Expand All @@ -261,3 +262,60 @@ Deno.test(function textEncoderShouldCoerceToString() {
const decoded = decoder.decode(bytes);
assertEquals(decoded, fixutreText);
});

Deno.test(function binaryEncode() {
// @ts-ignore: Deno.core allowed
const ops = Deno.core.ops;
function asBinaryString(bytes: Uint8Array): string {
return Array.from(bytes).map(
(v: number) => String.fromCodePoint(v),
).join("");
}

function decodeBinary(binaryString: string) {
const chars: string[] = Array.from(binaryString);
return chars.map((v: string): number | undefined => v.codePointAt(0));
}

// invalid utf-8 code points
const invalid = new Uint8Array([0xC0]);
assertEquals(
ops.op_encode_binary_string(invalid),
asBinaryString(invalid),
);

const invalid2 = new Uint8Array([0xC1]);
assertEquals(
ops.op_encode_binary_string(invalid2),
asBinaryString(invalid2),
);

for (let i = 0, j = 255; i <= 255; i++, j--) {
const bytes = new Uint8Array([i, j]);
const binaryString = ops.op_encode_binary_string(bytes);
assertEquals(
binaryString,
asBinaryString(bytes),
);
assertEquals(Array.from(bytes), decodeBinary(binaryString));
}

const inputs = [
"σ😀",
"Кириллица is Cyrillic",
"𝓽𝓮𝔁𝓽",
"lone𝄞\ud888surrogate",
"\udc00\ud800",
"\ud800",
];
for (const input of inputs) {
const bytes = new TextEncoder().encode(input);
const binaryString = ops.op_encode_binary_string(bytes);
assertEquals(
binaryString,
asBinaryString(bytes),
);

assertEquals(Array.from(bytes), decodeBinary(binaryString));
}
});
12 changes: 2 additions & 10 deletions ext/web/10_filereader.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"use strict";

((window) => {
const core = window.Deno.core;
const webidl = window.__bootstrap.webidl;
const { forgivingBase64Encode } = window.__bootstrap.infra;
const { ProgressEvent } = window.__bootstrap.event;
Expand All @@ -21,8 +22,6 @@
const { parseMimeType } = window.__bootstrap.mimesniff;
const { DOMException } = window.__bootstrap.domException;
const {
ArrayPrototypeJoin,
ArrayPrototypeMap,
ArrayPrototypePush,
ArrayPrototypeReduce,
FunctionPrototypeCall,
Expand All @@ -33,7 +32,6 @@
ObjectPrototypeIsPrototypeOf,
queueMicrotask,
SafeArrayIterator,
StringFromCodePoint,
Symbol,
TypedArrayPrototypeSet,
TypeError,
Expand Down Expand Up @@ -170,13 +168,7 @@
break;
}
case "BinaryString":
this[result] = ArrayPrototypeJoin(
ArrayPrototypeMap(
[...new Uint8Array(bytes.buffer)],
(v) => StringFromCodePoint(v),
),
"",
);
this[result] = core.ops.op_encode_binary_string(bytes);
break;
case "Text": {
let decoder = undefined;
Expand Down
6 changes: 6 additions & 0 deletions ext/web/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ pub fn init<P: TimersPermission + 'static>(
op_encoding_new_decoder::decl(),
op_encoding_decode::decl(),
op_encoding_encode_into::decl(),
op_encode_binary_string::decl(),
op_blob_create_part::decl(),
op_blob_slice_part::decl(),
op_blob_read_part::decl(),
Expand Down Expand Up @@ -337,6 +338,11 @@ fn op_encoding_encode_into(
Ok(())
}

#[op]
fn op_encode_binary_string(s: &[u8]) -> ByteString {
ByteString::from(s)
}

/// Creates a [`CancelHandle`] resource that can be used to cancel invocations of certain ops.
#[op(fast)]
pub fn op_cancel_handle(state: &mut OpState) -> u32 {
Expand Down

0 comments on commit ac5fcf6

Please sign in to comment.