Skip to content

Commit d080fb1

Browse files
Liubov Dmitrievafacebook-github-bot
authored andcommitted
cas: implement split algo [part #2]
Summary: implement split algo Reviewed By: muirdm Differential Revision: D62855251 fbshipit-source-id: 2e24b94eb2fa681cfb46907d8abba0aaf8075724
1 parent 7d80e8b commit d080fb1

File tree

1 file changed

+75
-0
lines changed
  • eden/scm/lib/cas-client/re-cas-common/src

1 file changed

+75
-0
lines changed

eden/scm/lib/cas-client/re-cas-common/src/lib.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,78 @@ impl $crate::CasClient for $struct {
8888
}
8989
};
9090
}
91+
92+
pub fn split_up_to_max_bytes(
93+
digests: &[CasDigest],
94+
max_bytes: u64,
95+
) -> impl Iterator<Item = &[CasDigest]> {
96+
struct Iter<'a> {
97+
pub digests: &'a [CasDigest],
98+
pub max_bytes: u64,
99+
}
100+
101+
impl<'a> Iterator for Iter<'a> {
102+
type Item = &'a [CasDigest];
103+
104+
fn next(&mut self) -> Option<&'a [CasDigest]> {
105+
if self.digests.is_empty() {
106+
return None;
107+
}
108+
let mut split_at = 0;
109+
let mut sum = 0;
110+
for (i, digest) in self.digests.iter().enumerate() {
111+
sum += digest.size;
112+
if sum > self.max_bytes {
113+
break;
114+
}
115+
split_at = i + 1;
116+
}
117+
if split_at == 0 {
118+
// We didn't find a split point meaning that there is a single file above the threshold,
119+
// so just return this first item.
120+
split_at = 1;
121+
}
122+
let (batch, rest) = self.digests.split_at(split_at);
123+
self.digests = rest;
124+
Some(batch)
125+
}
126+
}
127+
Iter { digests, max_bytes }
128+
}
129+
130+
#[cfg(test)]
131+
mod tests {
132+
use std::str::FromStr;
133+
134+
use super::*;
135+
136+
fn split_and_format_results(digests: &[CasDigest], max_bytes: u64) -> String {
137+
split_up_to_max_bytes(digests, max_bytes)
138+
.map(|v| {
139+
v.iter()
140+
.map(|d| d.size.to_string())
141+
.collect::<Vec<_>>()
142+
.join(",")
143+
})
144+
.collect::<Vec<_>>()
145+
.join("|")
146+
}
147+
148+
#[test]
149+
fn test_split_up_to_max_bytes() {
150+
let hash =
151+
Blake3::from_str("2078b4229b5353de0268efc7f64b68f3c99fb8829e9c052117b4e1e090b2603a")
152+
.unwrap();
153+
let digests = vec![
154+
CasDigest { hash, size: 200 },
155+
CasDigest { hash, size: 200 },
156+
CasDigest { hash, size: 400 },
157+
];
158+
159+
assert_eq!(split_and_format_results(&digests, 200), "200|200|400");
160+
assert_eq!(split_and_format_results(&digests, 400), "200,200|400");
161+
assert_eq!(split_and_format_results(&digests, 500), "200,200|400");
162+
assert_eq!(split_and_format_results(&digests, 5000), "200,200,400");
163+
assert_eq!(split_and_format_results(&digests, 10), "200|200|400");
164+
}
165+
}

0 commit comments

Comments
 (0)