@@ -8,14 +8,8 @@ use anyhow::Context;
8
8
///
9
9
/// The last line in a chunk potentially reads over the chunk byte boundary to find the line end.
10
10
/// In the same way the first line searches the line end.
11
- pub fn chunks ( path : PathBuf ) -> anyhow:: Result < Chunks < BufReader < File > > > {
11
+ pub fn chunks ( path : PathBuf ) -> anyhow:: Result < Chunks < FileChunks > > {
12
12
let size = File :: open ( & path) ?. metadata ( ) ?. len ( ) ;
13
- let it = ( 0 ..usize:: MAX ) . map ( move |_| {
14
- File :: open ( & path)
15
- . map ( BufReader :: new)
16
- . with_context ( || "Failed" )
17
- } ) ;
18
-
19
13
let cpus = num_cpus:: get ( ) as u64 ;
20
14
let chunk_size = MAX_CHUNK_SIZE . min ( size / cpus / 10 ) . max ( MIN_CHUNK_SIZE ) ;
21
15
let chunks = if chunk_size == 0 {
@@ -24,7 +18,7 @@ pub fn chunks(path: PathBuf) -> anyhow::Result<Chunks<BufReader<File>>> {
24
18
size / chunk_size + 1 . min ( size % chunk_size)
25
19
} as usize ;
26
20
Ok ( Chunks {
27
- chunk_data : Box :: new ( it ) ,
21
+ source : FileChunks { path } ,
28
22
position : 0 ,
29
23
count : 0 ,
30
24
chunks,
@@ -33,20 +27,17 @@ pub fn chunks(path: PathBuf) -> anyhow::Result<Chunks<BufReader<File>>> {
33
27
} )
34
28
}
35
29
36
- pub struct Chunks < T > {
37
- chunk_data : Box < dyn Iterator < Item = anyhow :: Result < T > > + Send > ,
30
+ pub struct Chunks < S : ChunkSource > {
31
+ source : S ,
38
32
position : u64 ,
39
33
count : usize ,
40
34
chunks : usize ,
41
35
chunk_size : u64 ,
42
36
size : u64 ,
43
37
}
44
38
45
- impl < T > Iterator for Chunks < T >
46
- where
47
- T : BufRead + Seek ,
48
- {
49
- type Item = Chunk < T > ;
39
+ impl < S : ChunkSource > Iterator for Chunks < S > {
40
+ type Item = Chunk < S :: Item > ;
50
41
51
42
fn next ( & mut self ) -> Option < Self :: Item > {
52
43
if self . count == self . chunks {
55
46
56
47
let start = ( self . count as u64 ) * self . chunk_size ;
57
48
self . count += 1 ;
58
- let f = self . chunk_data . next ( ) ? . ok ( ) ?;
49
+ let f = self . source . call ( ) . ok ( ) ?;
59
50
let ( chunk, position) =
60
51
Chunk :: new ( f, self . chunk_size , self . position , start, self . size ) . ok ( ) ?;
61
52
self . position = position;
@@ -128,6 +119,26 @@ where
128
119
}
129
120
}
130
121
122
+ pub trait ChunkSource : Sized {
123
+ type Item : Seek + BufRead ;
124
+
125
+ fn call ( & self ) -> anyhow:: Result < Self :: Item > ;
126
+ }
127
+
128
+ pub struct FileChunks {
129
+ path : PathBuf ,
130
+ }
131
+
132
+ impl ChunkSource for FileChunks {
133
+ type Item = BufReader < File > ;
134
+
135
+ fn call ( & self ) -> anyhow:: Result < Self :: Item > {
136
+ File :: open ( & self . path )
137
+ . map ( BufReader :: new)
138
+ . with_context ( || "Failed" )
139
+ }
140
+ }
141
+
131
142
const MIN_CHUNK_SIZE : u64 = 512 * 1024 ;
132
143
const MAX_CHUNK_SIZE : u64 = 64 * 1024 * 1024 ;
133
144
@@ -137,15 +148,14 @@ mod tests {
137
148
use quickcheck:: TestResult ;
138
149
use std:: io:: Cursor ;
139
150
140
- fn mem_chunks < ' a > ( mem : Vec < u8 > , chunk_size : u64 , size : u64 ) -> Chunks < impl BufRead + Seek > {
141
- let it = ( 0 ..usize:: MAX ) . map ( move |_| Ok ( Cursor :: new ( mem. clone ( ) ) ) ) ;
151
+ fn mem_chunks < ' a > ( bytes : Vec < u8 > , chunk_size : u64 , size : u64 ) -> Chunks < MemoryChunks > {
142
152
let chunks = if chunk_size == 0 {
143
153
0
144
154
} else {
145
155
size / chunk_size + 1 . min ( size % chunk_size)
146
156
} as usize ;
147
157
Chunks {
148
- chunk_data : Box :: new ( it ) ,
158
+ source : MemoryChunks { bytes } ,
149
159
position : 0 ,
150
160
count : 0 ,
151
161
chunks,
@@ -192,4 +202,16 @@ mod tests {
192
202
. max_tests ( 300 )
193
203
. quickcheck ( test_split_buf as fn ( _, _) -> TestResult ) ;
194
204
}
205
+
206
+ struct MemoryChunks {
207
+ bytes : Vec < u8 > ,
208
+ }
209
+
210
+ impl ChunkSource for MemoryChunks {
211
+ type Item = Cursor < Vec < u8 > > ;
212
+
213
+ fn call ( & self ) -> anyhow:: Result < Self :: Item > {
214
+ Ok ( Cursor :: new ( self . bytes . clone ( ) ) )
215
+ }
216
+ }
195
217
}
0 commit comments