Skip to content

Prefer inline representation over static #278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions integration-tests/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ fn main() {
"a",
"b",
"address",
"defaults",
"area",
"body",
"font-weight",
"br",
"html",
"head",
"id",
"❤",
"❤💯",
"❤💯❤💯",
])
.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs"))
.unwrap()
Expand Down
8 changes: 4 additions & 4 deletions integration-tests/src/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ bench_all!([eq ne lt clone_string]
for longer_string = super::longer_dynamic_a, super::longer_dynamic_b);

bench_all!([eq ne intern as_ref clone is_static lt]
for static_atom = test_atom!("a"), test_atom!("b"));
for static_atom = test_atom!("defaults"), test_atom!("font-weight"));

bench_all!([intern as_ref clone is_inline]
for short_inline_atom = mk("e"), mk("f"));
Expand All @@ -168,13 +168,13 @@ bench_all!([eq ne intern as_ref clone is_dynamic lt]
for longer_dynamic_atom = mk(super::longer_dynamic_a), mk(super::longer_dynamic_b));

bench_all!([intern as_ref clone is_static]
for static_at_runtime = mk("a"), mk("b"));
for static_at_runtime = mk("defaults"), mk("font-weight"));

bench_all!([ne lt x_static y_inline]
for static_vs_inline = test_atom!("a"), mk("f"));
for static_vs_inline = test_atom!("defaults"), mk("f"));

bench_all!([ne lt x_static y_dynamic]
for static_vs_dynamic = test_atom!("a"), mk(super::longer_dynamic_b));
for static_vs_dynamic = test_atom!("defaults"), mk(super::longer_dynamic_b));

bench_all!([ne lt x_inline y_dynamic]
for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b));
Expand Down
25 changes: 18 additions & 7 deletions integration-tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ fn test_as_slice() {
#[test]
fn test_types() {
assert!(Atom::from("").is_static());
assert!(Atom::from("id").is_static());
assert!(Atom::from("body").is_static());
assert!(Atom::from("a").is_static());
assert!(Atom::from("defaults").is_static());
assert!(Atom::from("font-weight").is_static());
assert!(Atom::from("id").is_inline());
assert!(Atom::from("body").is_inline());
assert!(Atom::from("a").is_inline());
assert!(Atom::from("address").is_inline());
assert!(Atom::from("c").is_inline());
assert!(Atom::from("zz").is_inline());
assert!(Atom::from("zzz").is_inline());
Expand Down Expand Up @@ -168,11 +171,13 @@ fn repr() {
// static atom table, the tag values, etc.

// Static atoms
check_static("a", test_atom!("a"));
check_static("address", test_atom!("address"));
check_static("area", test_atom!("area"));
check_static("defaults", test_atom!("defaults"));
check_static("font-weight", test_atom!("font-weight"));

// Inline atoms
check("a", 0x0000_0000_0000_6111);
check("address", 0x7373_6572_6464_6171);
check("area", 0x0000_0061_6572_6141);
check("e", 0x0000_0000_0000_6511);
check("xyzzy", 0x0000_797A_7A79_7851);
check("xyzzy01", 0x3130_797A_7A79_7871);
Expand All @@ -193,8 +198,13 @@ fn test_threads() {

#[test]
fn atom_macro() {
assert_eq!(test_atom!("a"), Atom::from("a"));
assert_eq!(test_atom!("body"), Atom::from("body"));
assert_eq!(test_atom!("address"), Atom::from("address"));
assert_eq!(test_atom!("❤"), Atom::from("❤"));
assert_eq!(test_atom!("❤💯"), Atom::from("❤💯"));
assert_eq!(test_atom!("font-weight"), Atom::from("font-weight"));
assert_eq!(test_atom!("❤💯❤💯"), Atom::from("❤💯❤💯"));
}

#[test]
Expand Down Expand Up @@ -292,7 +302,8 @@ fn test_from_string() {

#[test]
fn test_try_static() {
assert!(Atom::try_static("head").is_some());
assert!(Atom::try_static("defaults").is_some());
assert!(Atom::try_static("head").is_none());
assert!(Atom::try_static("not in the static table").is_none());
}

Expand Down
53 changes: 37 additions & 16 deletions src/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,25 @@ impl<Static> Atom<Static> {
}
}

/// For the atom!() macros
#[inline(always)]
#[doc(hidden)]
pub const fn pack_inline(mut n: u64, len: u8) -> Self {
if cfg!(target_endian = "big") {
// Reverse order of top 7 bytes.
// Bottom 8 bits of `n` are zero, and we need that to remain so.
// String data is stored in top 7 bytes, tag and length in bottom byte.
n = n.to_le() << 8;
}

let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
Self {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
}

fn tag(&self) -> u8 {
(self.unsafe_data.get() & TAG_MASK) as u8
}
Expand Down Expand Up @@ -186,20 +205,22 @@ impl<Static: StaticAtomSet> Hash for Atom<Static> {

impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
fn from(string_to_add: Cow<'a, str>) -> Self {
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
let len = string_to_add.len();
if len <= MAX_INLINE_LEN {
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
{
let dest = inline_atom_slice_mut(&mut data);
dest[..len].copy_from_slice(string_to_add.as_bytes())
}
Atom {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
} else {
let len = string_to_add.len();
if len == 0 {
Self::pack_static(Static::empty_string_index())
} else if len <= MAX_INLINE_LEN {
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
{
let dest = inline_atom_slice_mut(&mut data);
dest[..len].copy_from_slice(string_to_add.as_bytes());
}
Atom {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
} else {
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
let ptr: std::ptr::NonNull<Entry> = DYNAMIC_SET.insert(string_to_add, hash.g);
let data = ptr.as_ptr() as u64;
debug_assert!(0 == data & TAG_MASK);
Expand All @@ -208,8 +229,8 @@ impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
}
})
})
}
}
}

Expand Down
51 changes: 41 additions & 10 deletions string-cache-codegen/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,19 @@ impl AtomType {
// which would cause divisions by zero in rust-phf.
self.atoms.insert(String::new());

let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect();
let hash_state = phf_generator::generate_hash(&atoms);
// Strings over 7 bytes + empty string added to static set.
// Otherwise stored inline.
let (static_strs, inline_strs): (Vec<_>, Vec<_>) = self
.atoms
.iter()
.map(String::as_str)
.partition(|s| s.len() > 7 || s.is_empty());

// Static strings
let hash_state = phf_generator::generate_hash(&static_strs);
let phf_generator::HashState { key, disps, map } = hash_state;
let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip();
let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect();
let atoms: Vec<&str> = map.iter().map(|&idx| static_strs[idx]).collect();
let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32;
let indices = 0..atoms.len() as u32;

Expand Down Expand Up @@ -228,16 +236,33 @@ impl AtomType {
let macro_name = new_term(&*self.macro_name);
let module = module.parse::<proc_macro2::TokenStream>().unwrap();
let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase());
let const_names: Vec<_> = atoms
let new_const_name = |atom: &str| {
let mut name = atom_prefix.clone();
for c in atom.chars() {
name.push_str(&format!("_{:02X}", c as u32))
}
new_term(&name)
};
let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect();

// Inline strings
let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs
.iter()
.map(|atom| {
let mut name = atom_prefix.clone();
for c in atom.chars() {
name.push_str(&format!("_{:02X}", c as u32))
.map(|s| {
let const_name = new_const_name(s);

let mut value = 0u64;
for (index, c) in s.bytes().enumerate() {
value = value | ((c as u64) << (index * 8 + 8));
}
new_term(&name)

let len = s.len() as u8;

(const_name, (value, len))
})
.collect();
.unzip();
let (inline_values, inline_lengths): (Vec<_>, Vec<_>) =
inline_values_and_lengths.into_iter().unzip();

quote! {
#atom_doc
Expand Down Expand Up @@ -265,13 +290,19 @@ impl AtomType {
#(
pub const #const_names: #type_name = #type_name::pack_static(#indices);
)*
#(
pub const #inline_const_names: #type_name = #type_name::pack_inline(#inline_values, #inline_lengths);
)*

#macro_doc
#[macro_export]
macro_rules! #macro_name {
#(
(#atoms) => { #module::#const_names };
)*
#(
(#inline_strs) => { #module::#inline_const_names };
)*
}
}
}
Expand Down
Loading