Skip to content

Commit

Permalink
Add a display type for JsString to allow formatting without allocatio…
Browse files Browse the repository at this point in the history
…ns (#3951)

Co-authored-by: Haled Odat <8566042+HalidOdat@users.noreply.github.com>
  • Loading branch information
hansl and HalidOdat authored Sep 9, 2024
1 parent 4778780 commit 5534ec2
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 16 deletions.
44 changes: 44 additions & 0 deletions core/string/src/display.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//! Display implementations for [`crate::JsString`].
use crate::{CodePoint, JsStr, JsStrVariant};
use std::fmt;
use std::fmt::Write;

/// Display implementation for [`crate::JsString`] that escapes unicode characters.
#[derive(Debug)]
pub struct JsStrDisplayEscaped<'a> {
inner: JsStr<'a>,
}

impl fmt::Display for JsStrDisplayEscaped<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.inner.variant() {
// SAFETY: `JsStrVariant::Latin1` does not contain any unpaired surrogates, so need to check.
JsStrVariant::Latin1(v) => v
.iter()
.copied()
.map(char::from)
.try_for_each(|c| f.write_char(c)),
JsStrVariant::Utf16(_) => self.inner.code_points().try_for_each(|r| match r {
CodePoint::Unicode(c) => f.write_char(c),
CodePoint::UnpairedSurrogate(c) => {
write!(f, "\\u{c:04X}")
}
}),
}
}
}

impl<'a> From<JsStr<'a>> for JsStrDisplayEscaped<'a> {
fn from(inner: JsStr<'a>) -> Self {
Self { inner }
}
}

#[test]
fn latin1() {
// 0xE9 is `é` in ISO-8859-1 (see https://www.ascii-code.com/ISO-8859-1).
let s = JsStr::latin1(b"Hello \xE9 world!");

let rust_str = format!("{}", JsStrDisplayEscaped { inner: s });
assert_eq!(rust_str, "Hello é world!");
}
40 changes: 26 additions & 14 deletions core/string/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#![allow(clippy::module_name_repetitions)]

mod common;
mod display;
mod iter;
mod str;
mod tagged;
Expand All @@ -25,13 +26,15 @@ mod tagged;
mod tests;

use self::{iter::Windows, str::JsSliceIndex};
use crate::display::JsStrDisplayEscaped;
use crate::tagged::{Tagged, UnwrappedTagged};
#[doc(inline)]
pub use crate::{
common::StaticJsStrings,
iter::Iter,
str::{JsStr, JsStrVariant},
};
use std::fmt::Write;
use std::{
alloc::{alloc, dealloc, Layout},
cell::Cell,
Expand Down Expand Up @@ -150,6 +153,17 @@ impl CodePoint {
}
}

impl std::fmt::Display for CodePoint {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CodePoint::Unicode(c) => f.write_char(*c),
CodePoint::UnpairedSurrogate(c) => {
write!(f, "\\u{c:04X}")
}
}
}
}

/// A `usize` contains a flag and the length of Latin1/UTF-16 .
/// ```text
/// ┌────────────────────────────────────┐
Expand Down Expand Up @@ -528,10 +542,7 @@ impl JsString {
/// Gets an iterator of all the Unicode codepoints of a [`JsString`].
#[inline]
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
char::decode_utf16(self.iter()).map(|res| match res {
Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),
})
self.as_str().code_points()
}

/// Abstract operation `StringIndexOf ( string, searchValue, fromIndex )`
Expand Down Expand Up @@ -935,6 +946,15 @@ impl JsString {
UnwrappedTagged::Tag(_inner) => None,
}
}

/// Gets a displayable escaped string. This may be faster and has less
/// allocations than `format!("{}", str.to_string_escaped())` when
/// displaying.
#[inline]
#[must_use]
pub fn display_escaped(&self) -> JsStrDisplayEscaped<'_> {
JsStrDisplayEscaped::from(self.as_str())
}
}

impl Clone for JsString {
Expand Down Expand Up @@ -1036,10 +1056,7 @@ impl Drop for JsString {
impl ToStringEscaped for JsString {
#[inline]
fn to_string_escaped(&self) -> String {
match self.as_str().variant() {
JsStrVariant::Latin1(v) => v.iter().copied().map(char::from).collect(),
JsStrVariant::Utf16(v) => v.to_string_escaped(),
}
format!("{}", self.display_escaped())
}
}

Expand Down Expand Up @@ -1232,11 +1249,6 @@ pub(crate) trait ToStringEscaped {
impl ToStringEscaped for [u16] {
#[inline]
fn to_string_escaped(&self) -> String {
char::decode_utf16(self.iter().copied())
.map(|r| match r {
Ok(c) => String::from(c),
Err(e) => format!("\\u{:04X}", e.unpaired_surrogate()),
})
.collect()
JsString::from(self).to_string_escaped()
}
}
12 changes: 11 additions & 1 deletion core/string/src/str.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{is_trimmable_whitespace, is_trimmable_whitespace_latin1, Iter};
use crate::{is_trimmable_whitespace, is_trimmable_whitespace_latin1, CodePoint, Iter};
use std::{
hash::{Hash, Hasher},
slice::SliceIndex,
Expand Down Expand Up @@ -234,6 +234,16 @@ impl<'a> JsStr<'a> {
let (m, n) = (self.len(), needle.len());
m >= n && needle == self.get(m - n..).expect("already checked size")
}

/// Gets an iterator of all the Unicode codepoints of a [`JsStr`].
/// This is not optimized for Latin1 strings.
#[inline]
pub(crate) fn code_points(self) -> impl Iterator<Item = CodePoint> + Clone + 'a {
char::decode_utf16(self.iter()).map(|res| match res {
Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),
})
}
}

impl Hash for JsStr<'_> {
Expand Down
22 changes: 21 additions & 1 deletion core/string/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use std::hash::{BuildHasher, BuildHasherDefault, Hash};

use crate::{JsStr, JsString, StaticJsString, StaticJsStrings};
use crate::{JsStr, JsString, StaticJsString, StaticJsStrings, ToStringEscaped};

use rustc_hash::FxHasher;

Expand Down Expand Up @@ -174,6 +174,26 @@ fn conversion_to_known_static_js_string() {
assert!(string.unwrap().as_str().is_latin1());
}

#[test]
fn to_string_escaped() {
assert_eq!(
JsString::from("Hello, \u{1D49E} world!").to_string_escaped(),
"Hello, \u{1D49E} world!"
);

assert_eq!(
JsString::from("Hello, world!").to_string_escaped(),
"Hello, world!"
);

// 15 should not be escaped.
let unpaired_surrogates: [u16; 3] = [0xDC58, 0xD83C, 0x0015];
assert_eq!(
JsString::from(&unpaired_surrogates).to_string_escaped(),
"\\uDC58\\uD83C\u{15}"
);
}

#[test]
fn from_static_js_string() {
static STATIC_HELLO_WORLD: StaticJsString =
Expand Down

0 comments on commit 5534ec2

Please sign in to comment.