Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for TextEncoder#encodeInto #1279

Merged
merged 1 commit into from
Feb 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 63 additions & 5 deletions crates/cli-support/src/js/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::decode;
use crate::descriptor::{Descriptor, VectorKind};
use crate::Bindgen;
use crate::{Bindgen, EncodeInto};
use failure::{bail, Error, ResultExt};
use std::collections::{HashMap, HashSet};
use walrus::{MemoryId, Module};
Expand Down Expand Up @@ -1168,19 +1168,77 @@ impl<'a> Context<'a> {
} else {
""
};
self.global(&format!(

// The first implementation we have for this is to use
// `TextEncoder#encode` which has been around for quite some time.
let use_encode = format!(
"
function passStringToWasm(arg) {{
{}
const buf = cachedTextEncoder.encode(arg);
const ptr = wasm.__wbindgen_malloc(buf.length);
getUint8Memory().set(buf, ptr);
WASM_VECTOR_LEN = buf.length;
return ptr;
}}
",
debug
));
);

// Another possibility is to use `TextEncoder#encodeInto` which is much
// newer and isn't implemented everywhere yet. It's more efficient,
// however, becaues it allows us to elide an intermediate allocation.
let use_encode_into = format!(
"
{}
let size = arg.length;
let ptr = wasm.__wbindgen_malloc(size);
let writeOffset = 0;
while (true) {{
const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size);
const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view);
arg = arg.substring(read);
writeOffset += written;
if (arg.length === 0) {{
break;
}}
ptr = wasm.__wbindgen_realloc(ptr, size, size * 2);
size *= 2;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the allocator isn't a power-of-two allocator, this can overallocate. It is known that the max number of bytes that encodeInto can require is JavaScript string length times 3. (It follows that if the Wasm program knows the string to be short-lived on the Wasm side, it makes sense to just allocate for the ma length case up front and not reallocate.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm good point! Do you think it'd be worthwhile do do something like malloc(string.length), and if that fails realloc(string.length * 3) followed by realloc(the_actual_length) and that'd be more appropriate than this loop?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the allocations can be assumed the be short-lived, I suggest going with malloc(string.length * 3) without a loop.

If they are assumed to be long-lived, I suggest coming up with a magic number magic that characterizes how much smaller the initial allocation needs to than the worst case for it to be worthwhile not to allocate for the worst case right away.

Then:

let min = roundUpToBucketSize(string.length);
let max = string.length * 3;
if (min + magic > max) {
  // min is close enough to max anyway
  let m = malloc(max);
  let (read, written) = encodeInto(...);
  assert(read == string.length);
} else {
  let m = malloc(min);
  let (read, written) = encodeInto(...);
  if (read < string.length) {
     let inputLeft = string.length - read;
     let outputNeeded = inputLeft * 3;
     let m = realloc(m, written + outputNeeded);
     // ... encodeInto with input starting from read and output starting from written
  }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok thanks for the info! I've opened up #1313 to further track this.

}}
WASM_VECTOR_LEN = writeOffset;
return ptr;
",
debug
);

match self.config.encode_into {
EncodeInto::Never => {
self.global(&format!(
"function passStringToWasm(arg) {{ {} }}",
use_encode,
));
}
EncodeInto::Always => {
self.require_internal_export("__wbindgen_realloc")?;
self.global(&format!(
"function passStringToWasm(arg) {{ {} }}",
use_encode_into,
));
}
EncodeInto::Test => {
self.require_internal_export("__wbindgen_realloc")?;
self.global(&format!(
"
let passStringToWasm;
if (typeof cachedTextEncoder.encodeInto === 'function') {{
passStringToWasm = function(arg) {{ {} }};
}} else {{
passStringToWasm = function(arg) {{ {} }};
}}
",
use_encode_into,
use_encode,
));
}
}
Ok(())
}

Expand Down
13 changes: 13 additions & 0 deletions crates/cli-support/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub struct Bindgen {
// module to be "ready to be instantiated on any thread"
threads: Option<wasm_bindgen_threads_xform::Config>,
anyref: bool,
encode_into: EncodeInto,
}

enum Input {
Expand All @@ -44,6 +45,12 @@ enum Input {
None,
}

pub enum EncodeInto {
Test,
Always,
Never,
}

impl Bindgen {
pub fn new() -> Bindgen {
Bindgen {
Expand All @@ -64,6 +71,7 @@ impl Bindgen {
weak_refs: env::var("WASM_BINDGEN_WEAKREF").is_ok(),
threads: threads_config(),
anyref: env::var("WASM_BINDGEN_ANYREF").is_ok(),
encode_into: EncodeInto::Test,
}
}

Expand Down Expand Up @@ -144,6 +152,11 @@ impl Bindgen {
self
}

pub fn encode_into(&mut self, mode: EncodeInto) -> &mut Bindgen {
self.encode_into = mode;
self
}

pub fn generate<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
self._generate(path.as_ref())
}
Expand Down
13 changes: 12 additions & 1 deletion crates/cli/src/bin/wasm-bindgen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use failure::{bail, Error};
use serde::Deserialize;
use std::path::PathBuf;
use std::process;
use wasm_bindgen_cli_support::Bindgen;
use wasm_bindgen_cli_support::{Bindgen, EncodeInto};

// no need for jemalloc bloat in this binary (and we don't need speed)
#[global_allocator]
Expand Down Expand Up @@ -32,6 +32,8 @@ Options:
--keep-debug Keep debug sections in wasm files
--remove-name-section Remove the debugging `name` section of the file
--remove-producers-section Remove the telemetry `producers` section
--encode-into MODE Whether or not to use TextEncoder#encodeInto,
valid values are [test, always, never]
-V --version Print the version number of wasm-bindgen
";

Expand All @@ -51,6 +53,7 @@ struct Args {
flag_remove_name_section: bool,
flag_remove_producers_section: bool,
flag_keep_debug: bool,
flag_encode_into: Option<String>,
arg_input: Option<PathBuf>,
}

Expand Down Expand Up @@ -100,6 +103,14 @@ fn rmain(args: &Args) -> Result<(), Error> {
if let Some(ref name) = args.flag_out_name {
b.out_name(name);
}
if let Some(mode) = &args.flag_encode_into {
match mode.as_str() {
"test" => b.encode_into(EncodeInto::Test),
"always" => b.encode_into(EncodeInto::Always),
"never" => b.encode_into(EncodeInto::Never),
s => bail!("invalid encode-into mode: `{}`", s),
};
}

let out_dir = match args.flag_out_dir {
Some(ref p) => p,
Expand Down
23 changes: 22 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,7 @@ pub mod __rt {
}

if_std! {
use std::alloc::{alloc, dealloc, Layout};
use std::alloc::{alloc, dealloc, realloc, Layout};
use std::mem;

#[no_mangle]
Expand All @@ -911,6 +911,27 @@ pub mod __rt {
}
}

malloc_failure();
}

#[no_mangle]
pub extern "C" fn __wbindgen_realloc(ptr: *mut u8, old_size: usize, new_size: usize) -> *mut u8 {
let align = mem::align_of::<usize>();
debug_assert!(old_size > 0);
debug_assert!(new_size > 0);
if let Ok(layout) = Layout::from_size_align(old_size, align) {
unsafe {
let ptr = realloc(ptr, layout, new_size);
if !ptr.is_null() {
return ptr
}
}
}
malloc_failure();
}

#[cold]
fn malloc_failure() -> ! {
if cfg!(debug_assertions) {
super::throw_str("invalid malloc request")
} else {
Expand Down