Skip to content

Commit 95e3f2c

Browse files
committed
handle keys larger than 511 bytes
1 parent 024c2f1 commit 95e3f2c

File tree

4 files changed

+113
-5
lines changed

4 files changed

+113
-5
lines changed

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

turbopack/crates/turbo-tasks-backend/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ anyhow = { workspace = true }
1717
async-trait = { workspace = true }
1818
auto-hash-map = { workspace = true }
1919
bincode = "1.3.3"
20+
byteorder = "1.5.0"
2021
dashmap = { workspace = true }
2122
indexmap = { workspace = true }
2223
lmdb = "0.8.0"

turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
mod extended_key;
2+
13
use std::{
24
collections::{hash_map::Entry, HashMap},
35
error::Error,
@@ -118,7 +120,8 @@ impl BackingStorage for LmdbBackingStorage {
118120
let task_id = **task_id;
119121
let task_type_bytes = bincode::serialize(&task_type)
120122
.with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?;
121-
tx.put(
123+
extended_key::put(
124+
&mut tx,
122125
self.forward_task_cache_db,
123126
&task_type_bytes,
124127
&task_id.to_be_bytes(),
@@ -204,8 +207,7 @@ impl BackingStorage for LmdbBackingStorage {
204207
fn forward_lookup_task_cache(&self, task_type: &CachedTaskType) -> Option<TaskId> {
205208
let tx = self.env.begin_ro_txn().ok()?;
206209
let task_type = bincode::serialize(task_type).ok()?;
207-
let result = tx
208-
.get(self.forward_task_cache_db, &task_type)
210+
let result = extended_key::get(&tx, self.forward_task_cache_db, &task_type)
209211
.ok()
210212
.and_then(|v| v.try_into().ok())
211213
.map(|v| TaskId::from(u32::from_be_bytes(v)));
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
use std::hash::{Hash, Hasher};
2+
3+
use byteorder::ByteOrder;
4+
use lmdb::{Database, RoTransaction, RwTransaction, Transaction, WriteFlags};
5+
use rustc_hash::FxHasher;
6+
7+
const MAX_KEY_SIZE: usize = 511;
8+
const SHARED_KEY: usize = MAX_KEY_SIZE - 8;
9+
10+
pub fn get<'tx>(
11+
tx: &'tx RoTransaction<'tx>,
12+
database: Database,
13+
key: &[u8],
14+
) -> lmdb::Result<&'tx [u8]> {
15+
if key.len() > MAX_KEY_SIZE - 1 {
16+
let hashed_key = hashed_key(key);
17+
let data = tx.get(database, &hashed_key)?;
18+
let mut iter = ExtendedValueIter::new(data);
19+
while let Some((k, v)) = iter.next() {
20+
if k == key {
21+
return Ok(v);
22+
}
23+
}
24+
Err(lmdb::Error::NotFound)
25+
} else {
26+
tx.get(database, &key)
27+
}
28+
}
29+
30+
pub fn put(
31+
tx: &mut RwTransaction<'_>,
32+
database: Database,
33+
key: &[u8],
34+
value: &[u8],
35+
flags: WriteFlags,
36+
) -> lmdb::Result<()> {
37+
if key.len() > MAX_KEY_SIZE - 1 {
38+
let hashed_key = hashed_key(key);
39+
40+
let size = key.len() - SHARED_KEY + value.len() + 8;
41+
let old = tx.get(database, &hashed_key);
42+
let old_size = old.map_or(0, |v| v.len());
43+
let mut data = Vec::with_capacity(old_size + size);
44+
data.extend_from_slice(&((key.len() - SHARED_KEY) as u32).to_be_bytes());
45+
data.extend_from_slice(&(value.len() as u32).to_be_bytes());
46+
data.extend_from_slice(&key[SHARED_KEY..]);
47+
data.extend_from_slice(value);
48+
if let Ok(old) = old {
49+
let mut iter = ExtendedValueIter::new(old);
50+
while let Some((k, v)) = iter.next() {
51+
if k != &key[SHARED_KEY..] {
52+
data.extend_from_slice(&(k.len() as u32).to_be_bytes());
53+
data.extend_from_slice(&(v.len() as u32).to_be_bytes());
54+
data.extend_from_slice(k);
55+
data.extend_from_slice(v);
56+
}
57+
}
58+
};
59+
60+
tx.put(database, &hashed_key, &data, flags)?;
61+
Ok(())
62+
} else {
63+
tx.put(database, &key, &value, flags)
64+
}
65+
}
66+
67+
fn hashed_key(key: &[u8]) -> [u8; MAX_KEY_SIZE] {
68+
let mut result = [0; MAX_KEY_SIZE];
69+
let mut hash = FxHasher::default();
70+
key.hash(&mut hash);
71+
byteorder::BigEndian::write_u64(&mut result, hash.finish());
72+
result[8..].copy_from_slice(&key[0..SHARED_KEY]);
73+
result
74+
}
75+
76+
struct ExtendedValueIter<'a> {
77+
data: &'a [u8],
78+
pos: usize,
79+
}
80+
81+
impl<'a> Iterator for ExtendedValueIter<'a> {
82+
type Item = (&'a [u8], &'a [u8]);
83+
84+
fn next(&mut self) -> Option<Self::Item> {
85+
if self.pos >= self.data.len() {
86+
return None;
87+
}
88+
let key_len = byteorder::BigEndian::read_u32(&self.data[self.pos..]) as usize;
89+
self.pos += 4;
90+
let value_len = byteorder::BigEndian::read_u32(&self.data[self.pos..]) as usize;
91+
self.pos += 4;
92+
let key = &self.data[self.pos..self.pos + key_len];
93+
self.pos += key_len;
94+
let value = &self.data[self.pos..self.pos + value_len];
95+
self.pos += value_len;
96+
Some((key, value))
97+
}
98+
}
99+
100+
impl<'a> ExtendedValueIter<'a> {
101+
fn new(data: &'a [u8]) -> Self {
102+
Self { data, pos: 0 }
103+
}
104+
}

0 commit comments

Comments
 (0)