Skip to content

Commit 45d28df

Browse files
authored
Turbopack Persistent Caching: Use SmallVec to avoid allocations for small values written to DB (#78136)
### What? Use SmallVec to avoid allocations for small values written to DB
1 parent ddca498 commit 45d28df

16 files changed

+361
-95
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

turbopack/crates/turbo-persistence/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ quick_cache = { version = "0.6.9" }
2222
rayon = { workspace = true }
2323
rustc-hash = { workspace = true }
2424
serde = { workspace = true }
25+
smallvec = { workspace = true}
2526
thread_local = { workspace = true }
2627
twox-hash = { version = "2.0.1", features = ["xxhash64"] }
2728
zstd = { version = "0.13.2", features = ["zdict_builder"] }

turbopack/crates/turbo-persistence/src/collector.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::{
44
DATA_THRESHOLD_PER_INITIAL_FILE, MAX_ENTRIES_PER_INITIAL_FILE, MAX_SMALL_VALUE_SIZE,
55
},
66
key::{hash_key, StoreKey},
7+
ValueBuffer,
78
};
89

910
/// A collector accumulates entries that should be eventually written to a file. It keeps track of
@@ -36,15 +37,19 @@ impl<K: StoreKey> Collector<K> {
3637
}
3738

3839
/// Adds a normal key-value pair to the collector.
39-
pub fn put(&mut self, key: K, value: Vec<u8>) {
40+
pub fn put(&mut self, key: K, value: ValueBuffer) {
4041
let key = EntryKey {
4142
hash: hash_key(&key),
4243
data: key,
4344
};
4445
let value = if value.len() > MAX_SMALL_VALUE_SIZE {
45-
CollectorEntryValue::Medium { value }
46+
CollectorEntryValue::Medium {
47+
value: value.into_vec(),
48+
}
4649
} else {
47-
CollectorEntryValue::Small { value }
50+
CollectorEntryValue::Small {
51+
value: value.into_small_vec(),
52+
}
4853
};
4954
self.total_key_size += key.len();
5055
self.total_value_size += value.len();

turbopack/crates/turbo-persistence/src/collector_entry.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use std::cmp::Ordering;
22

3+
use smallvec::SmallVec;
4+
35
use crate::{
46
key::StoreKey,
57
static_sorted_file_builder::{Entry, EntryValue},
@@ -11,7 +13,7 @@ pub struct CollectorEntry<K: StoreKey> {
1113
}
1214

1315
pub enum CollectorEntryValue {
14-
Small { value: Vec<u8> },
16+
Small { value: SmallVec<[u8; 16]> },
1517
Medium { value: Vec<u8> },
1618
Large { blob: u32 },
1719
Deleted,

turbopack/crates/turbo-persistence/src/key.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ use std::{cmp::min, hash::Hasher};
44
pub trait KeyBase {
55
/// Returns the length of the key in bytes.
66
fn len(&self) -> usize;
7+
fn is_empty(&self) -> bool {
8+
self.len() == 0
9+
}
710
/// Hashes the key. It should not include the structure of the key, only the data. E.g. `([1,
811
/// 2], [3, 4])` should hash the same as `[1, 2, 3, 4]`.
912
fn hash<H: Hasher>(&self, state: &mut H);
@@ -14,6 +17,10 @@ impl KeyBase for &'_ [u8] {
1417
<[u8]>::len(self)
1518
}
1619

20+
fn is_empty(&self) -> bool {
21+
<[u8]>::is_empty(self)
22+
}
23+
1724
fn hash<H: Hasher>(&self, state: &mut H) {
1825
for item in *self {
1926
state.write_u8(*item);
@@ -23,7 +30,11 @@ impl KeyBase for &'_ [u8] {
2330

2431
impl<const N: usize> KeyBase for [u8; N] {
2532
fn len(&self) -> usize {
26-
self[..].len()
33+
N
34+
}
35+
36+
fn is_empty(&self) -> bool {
37+
N > 0
2738
}
2839

2940
fn hash<H: Hasher>(&self, state: &mut H) {
@@ -38,6 +49,10 @@ impl KeyBase for Vec<u8> {
3849
self.len()
3950
}
4051

52+
fn is_empty(&self) -> bool {
53+
self.is_empty()
54+
}
55+
4156
fn hash<H: Hasher>(&self, state: &mut H) {
4257
for item in self {
4358
state.write_u8(*item);
@@ -50,6 +65,10 @@ impl KeyBase for u8 {
5065
1
5166
}
5267

68+
fn is_empty(&self) -> bool {
69+
false
70+
}
71+
5372
fn hash<H: Hasher>(&self, state: &mut H) {
5473
state.write_u8(*self);
5574
}
@@ -61,6 +80,11 @@ impl<A: KeyBase, B: KeyBase> KeyBase for (A, B) {
6180
a.len() + b.len()
6281
}
6382

83+
fn is_empty(&self) -> bool {
84+
let (a, b) = self;
85+
a.is_empty() && b.is_empty()
86+
}
87+
6488
fn hash<H: Hasher>(&self, state: &mut H) {
6589
let (a, b) = self;
6690
KeyBase::hash(a, state);
@@ -73,6 +97,10 @@ impl<T: KeyBase> KeyBase for &'_ T {
7397
(*self).len()
7498
}
7599

100+
fn is_empty(&self) -> bool {
101+
(*self).is_empty()
102+
}
103+
76104
fn hash<H: Hasher>(&self, state: &mut H) {
77105
(*self).hash(state)
78106
}

turbopack/crates/turbo-persistence/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ mod write_batch;
1717

1818
#[cfg(test)]
1919
mod tests;
20+
mod value_buf;
2021

2122
pub use arc_slice::ArcSlice;
2223
pub use db::TurboPersistence;
23-
pub use key::{QueryKey, StoreKey};
24+
pub use key::{KeyBase, QueryKey, StoreKey};
25+
pub use value_buf::ValueBuffer;
2426
pub use write_batch::WriteBatch;
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
use std::{borrow::Cow, ops::Deref};
2+
3+
use smallvec::SmallVec;
4+
5+
pub enum ValueBuffer<'l> {
6+
Borrowed(&'l [u8]),
7+
Vec(Vec<u8>),
8+
SmallVec(SmallVec<[u8; 16]>),
9+
}
10+
11+
impl ValueBuffer<'_> {
12+
pub fn into_vec(self) -> Vec<u8> {
13+
match self {
14+
ValueBuffer::Borrowed(b) => b.to_vec(),
15+
ValueBuffer::Vec(v) => v,
16+
ValueBuffer::SmallVec(sv) => sv.into_vec(),
17+
}
18+
}
19+
20+
pub fn into_small_vec(self) -> SmallVec<[u8; 16]> {
21+
match self {
22+
ValueBuffer::Borrowed(b) => SmallVec::from_slice(b),
23+
ValueBuffer::Vec(v) => SmallVec::from_vec(v),
24+
ValueBuffer::SmallVec(sv) => sv,
25+
}
26+
}
27+
}
28+
29+
impl<'l> From<&'l [u8]> for ValueBuffer<'l> {
30+
fn from(b: &'l [u8]) -> Self {
31+
ValueBuffer::Borrowed(b)
32+
}
33+
}
34+
35+
impl From<Vec<u8>> for ValueBuffer<'_> {
36+
fn from(v: Vec<u8>) -> Self {
37+
ValueBuffer::Vec(v)
38+
}
39+
}
40+
41+
impl From<SmallVec<[u8; 16]>> for ValueBuffer<'_> {
42+
fn from(sv: SmallVec<[u8; 16]>) -> Self {
43+
ValueBuffer::SmallVec(sv)
44+
}
45+
}
46+
47+
impl<'l> From<Cow<'l, [u8]>> for ValueBuffer<'l> {
48+
fn from(c: Cow<'l, [u8]>) -> Self {
49+
match c {
50+
Cow::Borrowed(b) => ValueBuffer::Borrowed(b),
51+
Cow::Owned(o) => ValueBuffer::Vec(o),
52+
}
53+
}
54+
}
55+
56+
impl Deref for ValueBuffer<'_> {
57+
type Target = [u8];
58+
59+
fn deref(&self) -> &Self::Target {
60+
match self {
61+
ValueBuffer::Borrowed(b) => b,
62+
ValueBuffer::Vec(v) => v,
63+
ValueBuffer::SmallVec(sv) => sv,
64+
}
65+
}
66+
}

turbopack/crates/turbo-persistence/src/write_batch.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use std::{
2-
borrow::Cow,
32
cell::UnsafeCell,
43
fs::File,
54
io::Write,
@@ -20,7 +19,7 @@ use thread_local::ThreadLocal;
2019

2120
use crate::{
2221
collector::Collector, collector_entry::CollectorEntry, constants::MAX_MEDIUM_VALUE_SIZE,
23-
key::StoreKey, static_sorted_file_builder::StaticSortedFileBuilder,
22+
key::StoreKey, static_sorted_file_builder::StaticSortedFileBuilder, ValueBuffer,
2423
};
2524

2625
/// The thread local state of a `WriteBatch`.
@@ -107,11 +106,11 @@ impl<K: StoreKey + Send + Sync, const FAMILIES: usize> WriteBatch<K, FAMILIES> {
107106
}
108107

109108
/// Puts a key-value pair into the write batch.
110-
pub fn put(&self, family: usize, key: K, value: Cow<'_, [u8]>) -> Result<()> {
109+
pub fn put(&self, family: usize, key: K, value: ValueBuffer<'_>) -> Result<()> {
111110
let state = self.thread_local_state();
112111
let collector = self.collector_mut(state, family)?;
113112
if value.len() <= MAX_MEDIUM_VALUE_SIZE {
114-
collector.put(key, value.into_owned());
113+
collector.put(key, value);
115114
} else {
116115
let (blob, file) = self.create_blob(&value)?;
117116
collector.put_blob(key, blob);

turbopack/crates/turbo-tasks-backend/src/database/fresh_db_optimization.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use std::{
2-
borrow::Cow,
32
fs,
43
path::Path,
54
sync::atomic::{AtomicBool, Ordering},
@@ -9,7 +8,9 @@ use anyhow::Result;
98

109
use crate::database::{
1110
key_value_database::{KeySpace, KeyValueDatabase},
12-
write_batch::{BaseWriteBatch, ConcurrentWriteBatch, SerialWriteBatch, WriteBatch},
11+
write_batch::{
12+
BaseWriteBatch, ConcurrentWriteBatch, SerialWriteBatch, WriteBatch, WriteBuffer,
13+
},
1314
};
1415

1516
pub fn is_fresh(path: &Path) -> bool {
@@ -124,23 +125,28 @@ impl<'a, B: BaseWriteBatch<'a>> BaseWriteBatch<'a> for FreshDbOptimizationWriteB
124125
}
125126

126127
impl<'a, B: SerialWriteBatch<'a>> SerialWriteBatch<'a> for FreshDbOptimizationWriteBatch<'a, B> {
127-
fn put(&mut self, key_space: KeySpace, key: Cow<[u8]>, value: Cow<[u8]>) -> Result<()> {
128+
fn put(
129+
&mut self,
130+
key_space: KeySpace,
131+
key: WriteBuffer<'_>,
132+
value: WriteBuffer<'_>,
133+
) -> Result<()> {
128134
self.write_batch.put(key_space, key, value)
129135
}
130136

131-
fn delete(&mut self, key_space: KeySpace, key: Cow<[u8]>) -> Result<()> {
137+
fn delete(&mut self, key_space: KeySpace, key: WriteBuffer<'_>) -> Result<()> {
132138
self.write_batch.delete(key_space, key)
133139
}
134140
}
135141

136142
impl<'a, B: ConcurrentWriteBatch<'a>> ConcurrentWriteBatch<'a>
137143
for FreshDbOptimizationWriteBatch<'a, B>
138144
{
139-
fn put(&self, key_space: KeySpace, key: Cow<[u8]>, value: Cow<[u8]>) -> Result<()> {
145+
fn put(&self, key_space: KeySpace, key: WriteBuffer<'_>, value: WriteBuffer<'_>) -> Result<()> {
140146
self.write_batch.put(key_space, key, value)
141147
}
142148

143-
fn delete(&self, key_space: KeySpace, key: Cow<[u8]>) -> Result<()> {
149+
fn delete(&self, key_space: KeySpace, key: WriteBuffer<'_>) -> Result<()> {
144150
self.write_batch.delete(key_space, key)
145151
}
146152
}

turbopack/crates/turbo-tasks-backend/src/database/lmdb/mod.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::{borrow::Cow, fs::create_dir_all, path::Path, thread::available_parallelism};
1+
use std::{fs::create_dir_all, path::Path, thread::available_parallelism};
22

33
use anyhow::{Context, Result};
44
use lmdb::{
@@ -8,7 +8,7 @@ use lmdb::{
88

99
use crate::database::{
1010
key_value_database::{KeySpace, KeyValueDatabase},
11-
write_batch::{BaseWriteBatch, SerialWriteBatch, WriteBatch},
11+
write_batch::{BaseWriteBatch, SerialWriteBatch, WriteBatch, WriteBuffer},
1212
};
1313

1414
mod extended_key;
@@ -164,7 +164,12 @@ impl<'a> BaseWriteBatch<'a> for LmbdWriteBatch<'a> {
164164
}
165165

166166
impl<'a> SerialWriteBatch<'a> for LmbdWriteBatch<'a> {
167-
fn put(&mut self, key_space: KeySpace, key: Cow<[u8]>, value: Cow<[u8]>) -> Result<()> {
167+
fn put(
168+
&mut self,
169+
key_space: KeySpace,
170+
key: WriteBuffer<'_>,
171+
value: WriteBuffer<'_>,
172+
) -> Result<()> {
168173
extended_key::put(
169174
&mut self.tx,
170175
self.this.db(key_space),
@@ -175,7 +180,7 @@ impl<'a> SerialWriteBatch<'a> for LmbdWriteBatch<'a> {
175180
Ok(())
176181
}
177182

178-
fn delete(&mut self, key_space: KeySpace, key: Cow<[u8]>) -> Result<()> {
183+
fn delete(&mut self, key_space: KeySpace, key: WriteBuffer<'_>) -> Result<()> {
179184
extended_key::delete(
180185
&mut self.tx,
181186
self.this.db(key_space),

turbopack/crates/turbo-tasks-backend/src/database/noop_kv.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
use std::borrow::Cow;
2-
31
use anyhow::Result;
42

53
use crate::database::{
64
key_value_database::{KeySpace, KeyValueDatabase},
7-
write_batch::{BaseWriteBatch, ConcurrentWriteBatch, SerialWriteBatch, WriteBatch},
5+
write_batch::{
6+
BaseWriteBatch, ConcurrentWriteBatch, SerialWriteBatch, WriteBatch, WriteBuffer,
7+
},
88
};
99

1010
pub struct NoopKvDb;
@@ -78,21 +78,31 @@ impl<'a> BaseWriteBatch<'a> for NoopWriteBatch {
7878
}
7979

8080
impl SerialWriteBatch<'_> for NoopWriteBatch {
81-
fn put(&mut self, _key_space: KeySpace, _key: Cow<[u8]>, _value: Cow<[u8]>) -> Result<()> {
81+
fn put(
82+
&mut self,
83+
_key_space: KeySpace,
84+
_key: WriteBuffer<'_>,
85+
_value: WriteBuffer<'_>,
86+
) -> Result<()> {
8287
Ok(())
8388
}
8489

85-
fn delete(&mut self, _key_space: KeySpace, _key: Cow<[u8]>) -> Result<()> {
90+
fn delete(&mut self, _key_space: KeySpace, _key: WriteBuffer<'_>) -> Result<()> {
8691
Ok(())
8792
}
8893
}
8994

9095
impl ConcurrentWriteBatch<'_> for NoopWriteBatch {
91-
fn put(&self, _key_space: KeySpace, _key: Cow<[u8]>, _value: Cow<[u8]>) -> Result<()> {
96+
fn put(
97+
&self,
98+
_key_space: KeySpace,
99+
_key: WriteBuffer<'_>,
100+
_value: WriteBuffer<'_>,
101+
) -> Result<()> {
92102
Ok(())
93103
}
94104

95-
fn delete(&self, _key_space: KeySpace, _key: Cow<[u8]>) -> Result<()> {
105+
fn delete(&self, _key_space: KeySpace, _key: WriteBuffer<'_>) -> Result<()> {
96106
Ok(())
97107
}
98108
}

0 commit comments

Comments
 (0)