Skip to content

Commit 658b26d

Browse files
authored
refactor(es/preset-env): Use phf for corejs3 entry (#10712)
**Description:** This is a follow-up to #10684. I simply chose an example to experiment with. I did some simple testing and I believe there is no large regression in single query performance. ``` // before es/preset-env/entry/import time: [531.47 ns 532.42 ns 533.55 ns] // after es/preset-env/entry/import time: [549.62 ns 551.02 ns 552.55 ns] ``` Since we do dedup when packing strings, it is more compact than the original json and has some size optimization. ``` // before 16397965 ../../target/release/deps/polyfills-d11a2ed3340dd897 // after 16070261 ../../target/release/deps/polyfills-4b49b166ca0322eb ``` Since `../../data/core-js-compat/entries.json` is 500k large, I believe it also saves close to 500k of memory.
1 parent c32569d commit 658b26d

File tree

6 files changed

+201
-23
lines changed

6 files changed

+201
-23
lines changed

Cargo.lock

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ resolver = "2"
125125
wasmer = { version = "6.0.0", default-features = false }
126126
wasmer-wasix = { version = "0.600.0", default-features = false }
127127

128+
precomputed-map = "0.2"
129+
foldhash = "0.1"
130+
128131
[workspace.metadata.cargo-shear]
129132
# `serde` is used when #[ast_node] is expanded
130133
# the rests are used for enabling features
@@ -139,7 +142,6 @@ resolver = "2"
139142
"swc_ecma_utils",
140143
]
141144

142-
143145
[profile.release]
144146
lto = true
145147

crates/swc_ecma_preset_env/Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ serde = { workspace = true, features = ["derive"], optional = true }
2323
serde_json = { workspace = true }
2424

2525
preset_env_base = { version = "4.0.1", path = "../preset_env_base" }
26+
27+
precomputed-map = { workspace = true }
28+
foldhash = { workspace = true }
29+
2630
rustc-hash = { workspace = true }
2731
string_enum = { version = "1.0.2", path = "../string_enum" }
2832
swc_atoms = { version = "6.0.1", path = "../swc_atoms" }
@@ -35,6 +39,12 @@ swc_ecma_transforms = { version = "23.0.1", path = "../swc_ecma_transforms", fea
3539
swc_ecma_utils = { version = "18.0.3", path = "../swc_ecma_utils" }
3640
swc_ecma_visit = { version = "13.0.0", path = "../swc_ecma_visit" }
3741

42+
[build-dependencies]
43+
anyhow = { workspace = true }
44+
serde_json = { workspace = true }
45+
precomputed-map = { workspace = true, features = ["builder"] }
46+
foldhash = { workspace = true }
47+
3848
[dev-dependencies]
3949
codspeed-criterion-compat = { workspace = true }
4050
pretty_assertions = { workspace = true }
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
use std::{
2+
collections::{BTreeMap, HashMap},
3+
fs,
4+
io::{self, Write},
5+
path::Path,
6+
};
7+
8+
use anyhow::Context;
9+
10+
fn main() -> anyhow::Result<()> {
11+
es_preset_env_corejs3_entry()
12+
}
13+
14+
fn es_preset_env_corejs3_entry() -> anyhow::Result<()> {
15+
const SEED: u64 = 16416001479773392852;
16+
17+
let crate_dir = std::env::var("CARGO_MANIFEST_DIR")?;
18+
let crate_dir = Path::new(&crate_dir);
19+
20+
let out_dir = std::env::var("OUT_DIR").unwrap();
21+
let out_dir = Path::new(&out_dir);
22+
let out_dir = out_dir.join("corejs3_entries");
23+
24+
let entry_path = crate_dir.join("data/core-js-compat/entries.json");
25+
println!("cargo::rerun-if-changed={}", entry_path.display());
26+
27+
let entry_data = fs::read_to_string(entry_path)?;
28+
let entry_data: BTreeMap<&str, Vec<&str>> =
29+
serde_json::from_str(&entry_data).context("failed to parse entries.json from core js 3")?;
30+
let (keys, values): (Vec<_>, Vec<_>) = entry_data.into_iter().unzip();
31+
32+
let mut strpool = StrPool::default();
33+
let mut values_strid = Vec::new();
34+
let mut values_index = Vec::new();
35+
for list in values {
36+
let start: u32 = values_strid.len().try_into().unwrap();
37+
38+
for s in list {
39+
values_strid.push(strpool.insert(s));
40+
}
41+
let end: u32 = values_strid.len().try_into().unwrap();
42+
values_index.push(start..end);
43+
}
44+
45+
let mapout = precomputed_map::builder::MapBuilder::<&str>::new()
46+
.set_seed(SEED)
47+
.set_hash(&|seed, &v| {
48+
use std::hash::{Hash, Hasher};
49+
50+
let mut hasher =
51+
foldhash::fast::FoldHasher::with_seed(seed, foldhash::SharedSeed::global_fixed());
52+
v.as_bytes().hash(&mut hasher);
53+
hasher.finish()
54+
})
55+
.set_next_seed(|seed, c| seed + c)
56+
.build(&keys)?;
57+
58+
if let Some(seed) = mapout.seed().filter(|&seed| seed != SEED) {
59+
println!(
60+
"cargo::warning=The seed has changed, please update the seed to {seed} for faster \
61+
builds"
62+
);
63+
}
64+
65+
// clean file
66+
{
67+
fs::remove_dir_all(&out_dir).or_else(|err| match err.kind() {
68+
io::ErrorKind::NotFound => Ok(()),
69+
_ => Err(err),
70+
})?;
71+
fs::create_dir(&out_dir)?;
72+
}
73+
74+
let mut u8seq = precomputed_map::builder::U8SeqWriter::new(
75+
"PrecomputedU8Seq".into(),
76+
out_dir.join("u8.bin"),
77+
);
78+
let mut u32seq = precomputed_map::builder::U32SeqWriter::new(
79+
"PrecomputedU32Seq".into(),
80+
out_dir.join("u32.bin"),
81+
);
82+
83+
let mut builder = precomputed_map::builder::CodeBuilder::new(
84+
"Corejs3Entries".into(),
85+
"SwcFold".into(),
86+
&mut u8seq,
87+
&mut u32seq,
88+
);
89+
90+
let k = builder.create_bytes_position_seq("EntryKeys".into(), mapout.reorder(&keys))?;
91+
builder.create_u32_seq("EntryValuesStringId".into(), values_strid.iter().copied())?;
92+
mapout.create_map("ENTRY_INDEX".into(), k, &mut builder)?;
93+
94+
let mut codeout = fs::File::create(out_dir.join("lib.rs"))?;
95+
builder.codegen(&mut codeout)?;
96+
u8seq.codegen(&mut codeout)?;
97+
u32seq.codegen(&mut codeout)?;
98+
99+
fs::write(out_dir.join("str.bin"), strpool.pool.as_bytes())?;
100+
101+
writeln!(
102+
codeout,
103+
"static ENTRY_VALUES_STRING_STORE: &str = include_str!(\"str.bin\");
104+
static ENTRY_VALUES_LIST: &[Range<u32>] = &["
105+
)?;
106+
for range in mapout.reorder(&values_index) {
107+
writeln!(codeout, "{}..{},", range.start, range.end)?;
108+
}
109+
writeln!(codeout, "];")?;
110+
111+
Ok(())
112+
}
113+
114+
#[derive(Default)]
115+
struct StrPool<'s> {
116+
pool: String,
117+
map: HashMap<&'s str, u32>,
118+
}
119+
120+
impl<'s> StrPool<'s> {
121+
pub fn insert(&mut self, s: &'s str) -> u32 {
122+
*self.map.entry(s).or_insert_with(|| {
123+
let offset = self.pool.len();
124+
self.pool.push_str(s);
125+
let len: u8 = (self.pool.len() - offset).try_into().unwrap();
126+
let offset: u32 = offset.try_into().unwrap();
127+
128+
if offset > (1 << 24) {
129+
panic!("string too large");
130+
}
131+
132+
offset | (u32::from(len) << 24)
133+
})
134+
}
135+
}

crates/swc_ecma_preset_env/src/corejs3/entry.rs

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,45 @@
1-
use std::sync::Arc;
1+
use std::{ops::Range, sync::Arc};
22

33
use indexmap::IndexSet;
4-
use once_cell::sync::Lazy;
54
use preset_env_base::{
65
version::{should_enable, Version},
76
Versions,
87
};
9-
use rustc_hash::{FxBuildHasher, FxHashMap};
8+
use rustc_hash::FxBuildHasher;
109
use swc_atoms::atom;
1110
use swc_common::DUMMY_SP;
1211
use swc_ecma_ast::*;
1312
use swc_ecma_visit::VisitMut;
1413

1514
use super::{compat::DATA as CORE_JS_COMPAT_DATA, data::MODULES_BY_VERSION};
15+
use crate::util::SwcFold;
1616

17-
static ENTRIES: Lazy<FxHashMap<String, Vec<&'static str>>> = Lazy::new(|| {
18-
serde_json::from_str::<FxHashMap<String, Vec<String>>>(include_str!(
19-
"../../data/core-js-compat/entries.json"
20-
))
21-
.expect("failed to parse entries.json from core js 3")
22-
.into_iter()
23-
.map(|(k, v)| {
24-
(
25-
k,
26-
v.into_iter()
27-
.map(|s: String| &*Box::leak(s.into_boxed_str()))
28-
.collect::<Vec<_>>(),
29-
)
30-
})
31-
.collect()
32-
});
17+
include!(concat!(env!("OUT_DIR"), "/corejs3_entries/lib.rs"));
18+
19+
pub struct FeatureSet(Range<u32>);
20+
21+
pub fn entries_get(name: &str) -> Option<FeatureSet> {
22+
{
23+
let index = ENTRY_INDEX.get(name.as_bytes())?;
24+
ENTRY_VALUES_LIST.get(index).cloned().map(FeatureSet)
25+
}
26+
}
27+
28+
impl FeatureSet {
29+
pub fn iter(&self) -> impl ExactSizeIterator<Item = &'static str> {
30+
use precomputed_map::store::AccessSeq;
31+
32+
self.0
33+
.clone()
34+
.map(|idx| EntryValuesStringId::index(idx as usize).unwrap())
35+
.map(|id| {
36+
let offset = id & ((1 << 24) - 1);
37+
let len = id >> 24;
38+
39+
&ENTRY_VALUES_STRING_STORE[(offset as usize)..][..(len as usize)]
40+
})
41+
}
42+
}
3343

3444
#[derive(Debug)]
3545
pub struct Entry {
@@ -68,9 +78,9 @@ impl Entry {
6878
return true;
6979
}
7080

71-
if let Some(features) = ENTRIES.get(src) {
81+
if let Some(features) = entries_get(src) {
7282
self.imports.extend(features.iter().filter(|f| {
73-
let feature = CORE_JS_COMPAT_DATA.get(&***f);
83+
let feature = CORE_JS_COMPAT_DATA.get(*f);
7484

7585
if !*is_any_target {
7686
if let Some(feature) = feature {
@@ -80,7 +90,7 @@ impl Entry {
8090
}
8191
}
8292

83-
if let Some(version) = MODULES_BY_VERSION.get(**f) {
93+
if let Some(version) = MODULES_BY_VERSION.get(f) {
8494
return version <= corejs_version;
8595
}
8696

crates/swc_ecma_preset_env/src/util.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,3 +687,16 @@ impl<T> DataMapExt<T> for DataMap<T> {
687687
self
688688
}
689689
}
690+
691+
pub(crate) struct SwcFold;
692+
693+
impl precomputed_map::phf::HashOne for SwcFold {
694+
fn hash_one<T: std::hash::Hash>(k: u64, v: T) -> u64 {
695+
use std::hash::Hasher;
696+
697+
let mut hasher =
698+
foldhash::fast::FoldHasher::with_seed(k, foldhash::SharedSeed::global_fixed());
699+
v.hash(&mut hasher);
700+
hasher.finish()
701+
}
702+
}

0 commit comments

Comments
 (0)