Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making passStringToWasm smaller #1736

Merged
merged 3 commits into from
Aug 26, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 84 additions & 84 deletions crates/cli-support/src/js/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -809,8 +809,10 @@ impl<'a> Context<'a> {
if !self.should_write_global("pass_string_to_wasm") {
return Ok(());
}

self.require_internal_export("__wbindgen_malloc")?;
self.expose_wasm_vector_len();

let debug = if self.config.debug {
"
if (typeof(arg) !== 'string') throw new Error('expected a string argument');
Expand All @@ -830,10 +832,10 @@ impl<'a> Context<'a> {
"
function passStringToWasm(arg) {{
{}
const size = Buffer.byteLength(arg);
const ptr = wasm.__wbindgen_malloc(size);
getNodeBufferMemory().write(arg, ptr, size);
WASM_VECTOR_LEN = size;
const len = Buffer.byteLength(arg);
const ptr = wasm.__wbindgen_malloc(len);
getNodeBufferMemory().write(arg, ptr, len);
WASM_VECTOR_LEN = len;
return ptr;
}}
",
Expand All @@ -844,7 +846,52 @@ impl<'a> Context<'a> {
}

self.expose_text_encoder()?;

// The first implementation we have for this is to use
// `TextEncoder#encode` which has been around for quite some time.
let encode = "function (arg, view) {
const buf = cachedTextEncoder.encode(arg);
view.set(buf);
return {
read: arg.length,
written: buf.length
};
}";

// Another possibility is to use `TextEncoder#encodeInto` which is much
// newer and isn't implemented everywhere yet. It's more efficient,
// however, becaues it allows us to elide an intermediate allocation.
let encode_into = "function (arg, view) {
return cachedTextEncoder.encodeInto(arg, view);
}";

// Looks like `encodeInto` doesn't currently work when the memory passed
// in is backed by a `SharedArrayBuffer`, so force usage of `encode` if
// a `SharedArrayBuffer` is in use.
let shared = self.module.memories.get(self.memory).shared;

match self.config.encode_into {
EncodeInto::Always if !shared => {
self.global(&format!("
const encodeString = {};
", encode_into));
}
EncodeInto::Test if !shared => {
self.global(&format!("
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
? {}
: {});
", encode_into, encode));
}
_ => {
self.global(&format!("
const encodeString = {};
", encode));
}
}

self.expose_uint8_memory();
self.require_internal_export("__wbindgen_realloc")?;

// A fast path that directly writes char codes into WASM memory as long
// as it finds only ASCII characters.
Expand All @@ -855,100 +902,53 @@ impl<'a> Context<'a> {
// This might be not very intuitive, but such calls are usually more
// expensive in mainstream engines than staying in the JS, and
// charCodeAt on ASCII strings is usually optimised to raw bytes.
let start_encoding_as_ascii = format!(
"
{}
let size = arg.length;
let ptr = wasm.__wbindgen_malloc(size);
let offset = 0;
{{
const mem = getUint8Memory();
for (; offset < arg.length; offset++) {{
const code = arg.charCodeAt(offset);
if (code > 0x7F) break;
mem[ptr + offset] = code;
}}
}}
",
debug
);
let encode_as_ascii = "\
let len = arg.length;
let ptr = wasm.__wbindgen_malloc(len);

// The first implementation we have for this is to use
// `TextEncoder#encode` which has been around for quite some time.
let use_encode = format!(
"
{}
if (offset !== arg.length) {{
const buf = cachedTextEncoder.encode(arg.slice(offset));
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length);
getUint8Memory().set(buf, ptr + offset);
offset += buf.length;
}}
WASM_VECTOR_LEN = offset;
return ptr;
",
start_encoding_as_ascii
);
const mem = getUint8Memory();

// Another possibility is to use `TextEncoder#encodeInto` which is much
// newer and isn't implemented everywhere yet. It's more efficient,
// however, becaues it allows us to elide an intermediate allocation.
let use_encode_into = format!(
"
let offset = 0;

for (; offset < len; offset++) {
const code = arg.charCodeAt(offset);
if (code > 0x7F) break;
mem[ptr + offset] = code;
}
";

// TODO:
// When converting a JS string to UTF-8, the maximum size is `arg.length * 3`,
// so we just allocate that. This wastes memory, so we should investigate
// looping over the string to calculate the precise size, or perhaps using
// `shrink_to_fit` on the Rust side.
self.global(&format!(
"function passStringToWasm(arg) {{
{}
{}
if (offset !== arg.length) {{
arg = arg.slice(offset);
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3);
const view = getUint8Memory().subarray(ptr + offset, ptr + size);
const ret = cachedTextEncoder.encodeInto(arg, view);
if (offset !== len) {{
if (offset !== 0) {{
arg = arg.slice(offset);
}}
ptr = wasm.__wbindgen_realloc(ptr, len, len = offset + arg.length * 3);
const view = getUint8Memory().subarray(ptr + offset, ptr + len);
const ret = encodeString(arg, view);
{}
offset += ret.written;
}}

WASM_VECTOR_LEN = offset;
return ptr;
",
start_encoding_as_ascii,
}}",
debug,
encode_as_ascii,
if self.config.debug {
"if (ret.read != arg.length) throw new Error('failed to pass whole string');"
} else {
""
},
);

// Looks like `encodeInto` doesn't currently work when the memory passed
// in is backed by a `SharedArrayBuffer`, so force usage of `encode` if
// a `SharedArrayBuffer` is in use.
let shared = self.module.memories.get(self.memory).shared;
));

match self.config.encode_into {
EncodeInto::Always if !shared => {
self.require_internal_export("__wbindgen_realloc")?;
self.global(&format!(
"function passStringToWasm(arg) {{ {} }}",
use_encode_into,
));
}
EncodeInto::Test if !shared => {
self.require_internal_export("__wbindgen_realloc")?;
self.global(&format!(
"
let passStringToWasm;
if (typeof cachedTextEncoder.encodeInto === 'function') {{
passStringToWasm = function(arg) {{ {} }};
}} else {{
passStringToWasm = function(arg) {{ {} }};
}}
",
use_encode_into, use_encode,
));
}
_ => {
self.global(&format!(
"function passStringToWasm(arg) {{ {} }}",
use_encode,
));
}
}
Ok(())
}

Expand Down