-
Notifications
You must be signed in to change notification settings - Fork 63
Use inline storage for small hashes #47
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
20c5c89
Use inline storage for small hashes
rklaehn 4e0632b
Clippy
rklaehn 0559bed
Rename copy_from_slice to just from_slice
rklaehn 02cf114
Explicity store the bytes size
rklaehn 9ab33d9
Add comment about the rationale for the 38 byte limit.
rklaehn 5a91f48
PR feedback
rklaehn ec44135
Add quickcheck tests for from_slices
rklaehn b729685
Add check_invariants to make sure we don't create heap storage for sm…
rklaehn 3456e07
PR feedback
rklaehn 62692e2
Make debug instance useful
rklaehn File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
use std::sync::Arc; | ||
|
||
/// MAX_INLINE is the maximum size of a multihash that can be stored inline | ||
/// | ||
/// We want the currently most common multihashes using 256bit hashes to be stored inline. These | ||
/// hashes are 34 bytes long. An overall size of 38 seems like a good compromise. It allows storing | ||
/// any 256bit hash with some room to spare and gives an overall size for Storage of 40 bytes, which | ||
/// is a multiple of 8. We need 2 extra bytes, one for the size and one for the enum discriminator. | ||
const MAX_INLINE: usize = 38; | ||
rklaehn marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
#[derive(Clone)] | ||
pub(crate) enum Storage { | ||
/// hash is stored inline if it is smaller than MAX_INLINE | ||
Inline(u8, [u8; MAX_INLINE]), | ||
/// hash is stored on the heap. this must be only used if the hash is actually larger than | ||
/// MAX_INLINE bytes to ensure an unique representation. | ||
Heap(Arc<[u8]>), | ||
} | ||
|
||
impl Storage { | ||
/// The raw bytes. | ||
pub fn bytes(&self) -> &[u8] { | ||
match self { | ||
Storage::Inline(len, bytes) => &bytes[..(*len as usize)], | ||
Storage::Heap(data) => &data, | ||
} | ||
} | ||
|
||
/// creates storage from a vec. For a size up to MAX_INLINE, this will not allocate. | ||
pub fn from_slice(slice: &[u8]) -> Self { | ||
let len = slice.len(); | ||
if len <= MAX_INLINE { | ||
let mut data: [u8; MAX_INLINE] = [0; MAX_INLINE]; | ||
data[..len].copy_from_slice(slice); | ||
Storage::Inline(len as u8, data) | ||
} else { | ||
Storage::Heap(slice.into()) | ||
} | ||
} | ||
|
||
/// creates storage from multiple slices. For a size up to MAX_INLINE, this will not allocate. | ||
pub fn from_slices(slices: &[&[u8]]) -> Self { | ||
let n = slices.iter().fold(0usize, |a, s| a.saturating_add(s.len())); | ||
if n <= MAX_INLINE { | ||
let s = slices | ||
.iter() | ||
.fold(([0; MAX_INLINE], 0), |(mut array, i), s| { | ||
array[i..i + s.len()].copy_from_slice(s); | ||
(array, i + s.len()) | ||
}); | ||
Storage::Inline(n as u8, s.0) | ||
} else { | ||
let mut v = Vec::with_capacity(n); | ||
for s in slices { | ||
v.extend_from_slice(s) | ||
} | ||
Storage::Heap(v.into()) | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::{Storage, MAX_INLINE}; | ||
use quickcheck::quickcheck; | ||
|
||
#[test] | ||
fn struct_size() { | ||
// this should be true for both 32 and 64 bit archs | ||
assert_eq!(std::mem::size_of::<Storage>(), 40); | ||
} | ||
|
||
#[test] | ||
fn roundtrip() { | ||
// check that .bytes() returns whatever the storage was created with | ||
for i in 0..((MAX_INLINE + 10) as u8) { | ||
let data = (0..i).collect::<Vec<u8>>(); | ||
let storage = Storage::from_slice(&data); | ||
assert_eq!(data, storage.bytes()); | ||
} | ||
} | ||
|
||
fn check_invariants(storage: Storage) -> bool { | ||
match storage { | ||
Storage::Inline(len, _) => len as usize <= MAX_INLINE, | ||
Storage::Heap(arc) => arc.len() > MAX_INLINE, | ||
} | ||
} | ||
|
||
quickcheck! { | ||
fn roundtrip_check(data: Vec<u8>) -> bool { | ||
let storage = Storage::from_slice(&data); | ||
storage.bytes() == data.as_slice() && check_invariants(storage) | ||
} | ||
|
||
fn from_slices_roundtrip_check(data: Vec<Vec<u8>>) -> bool { | ||
let mut slices = Vec::new(); | ||
let mut expected = Vec::new(); | ||
for v in data.iter() { | ||
slices.push(v.as_slice()); | ||
expected.extend_from_slice(&v); | ||
} | ||
let storage = Storage::from_slices(&slices); | ||
storage.bytes() == expected.as_slice() && check_invariants(storage) | ||
} | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.