9
9
// History: Adapted from posixutils/compress/zopen.cc, which was in turn
10
10
// adapted from FreeBSD's zopen.c.
11
11
//
12
- // TODO:
13
- // - FIXME: file tail truncated (data corruption)
14
- //
15
12
16
13
use std:: io:: { self , Error , ErrorKind , Read } ;
17
14
@@ -26,37 +23,105 @@ const CLEAR: i32 = 256;
26
23
27
24
const RMASK : [ i32 ; 9 ] = [ 0x00 , 0x01 , 0x03 , 0x07 , 0x0f , 0x1f , 0x3f , 0x7f , 0xff ] ;
28
25
26
+ /// A wrapper around the Read trait object used
27
+ /// for reading the compressed file or
28
+ /// file to be compressed
29
+ struct CompReader {
30
+ inner_rdr : Box < dyn Read > ,
31
+ }
32
+
33
+ impl CompReader {
34
+ fn new ( rdr : Box < dyn Read > ) -> Self {
35
+ Self { inner_rdr : rdr }
36
+ }
37
+
38
+ fn read_exact ( & mut self , buf : & mut [ u8 ] ) -> io:: Result < usize > {
39
+ let mut total_read = 0 ;
40
+
41
+ while total_read < buf. len ( ) {
42
+ match self . inner_rdr . read ( & mut buf[ total_read..] ) {
43
+ Ok ( n) => {
44
+ if n == 0 {
45
+ break ;
46
+ }
47
+ total_read += n;
48
+ }
49
+ Err ( ref e) if e. kind ( ) == ErrorKind :: Interrupted => { }
50
+ Err ( e) => {
51
+ return Err ( e) ;
52
+ }
53
+ }
54
+ }
55
+
56
+ if total_read == 0 {
57
+ return Err ( io:: Error :: new ( ErrorKind :: UnexpectedEof , "Unexpected EOF" ) ) ;
58
+ }
59
+
60
+ Ok ( total_read)
61
+ }
62
+ }
63
+
29
64
fn max_code ( n_bits : u32 ) -> u32 {
30
65
( 1 << ( n_bits) ) - 1
31
66
}
32
67
33
68
pub struct UnixLZWReader {
34
- rdr : Box < dyn Read > ,
69
+ /// the reader of the compressed file or the file to be compressed
70
+ rdr : CompReader ,
71
+
72
+ /// if the compressed file has header or not
35
73
have_hdr : bool ,
74
+
75
+ /// if the eof has been reached or not
36
76
eof : bool ,
37
77
78
+ /// the max no of bits for the code(maxmaxcode is derived from this)
38
79
maxbits : u32 ,
80
+
81
+ /// the current max no of bits for the code
39
82
n_bits : u32 ,
83
+
84
+ /// if BLOCK_MASK is enabled in the compressed file or not
40
85
block_compress : bool ,
86
+
87
+ /// It indicates if the buffer has to be cleared or not
41
88
clear : bool ,
89
+
42
90
code : i32 ,
91
+
92
+ /// the previously recognized code
43
93
oldcode : i32 ,
94
+
44
95
incode : i32 ,
96
+
97
+ /// the max no of codes that can be created of n_bits
45
98
maxcode : i32 ,
99
+
100
+ /// the max value of maxcode
46
101
maxmaxcode : i32 ,
102
+
103
+ /// the next free entry on the table
47
104
free_ent : i32 ,
105
+
48
106
finchar : i32 ,
107
+
108
+ /// It's the current read offset
49
109
roffset : i32 ,
110
+
50
111
size : i32 ,
112
+
113
+ /// Buffer to fill as we go on read from the read stream
51
114
gbuf : [ u8 ; BITS as usize ] ,
115
+
52
116
tab_suffix : [ i32 ; HSIZE ] ,
117
+
53
118
tab_prefix : [ u16 ; HSIZE ] ,
54
119
}
55
120
56
121
impl UnixLZWReader {
57
122
pub fn new ( rdr : Box < dyn Read > ) -> UnixLZWReader {
58
123
UnixLZWReader {
59
- rdr,
124
+ rdr : CompReader :: new ( rdr ) ,
60
125
have_hdr : false ,
61
126
eof : false ,
62
127
maxbits : 0 ,
@@ -80,29 +145,37 @@ impl UnixLZWReader {
80
145
81
146
fn getcode ( & mut self ) -> i32 {
82
147
if self . clear || self . roffset >= self . size || self . free_ent > self . maxcode {
148
+ // as free_ent represents the index of the next available entry that can be made
149
+ // on the table, so if its more than the self.maxcode (i.e max allowed no of codes),
150
+ // which is derived from the current no of bits of code, then we need to expand
151
+ // our entry by increasing the n_bits by 1 and then updating the max_code
152
+ // from that
83
153
if self . free_ent > self . maxcode {
84
- self . n_bits = self . n_bits + 1 ;
85
- if self . n_bits == self . maxcode as u32 {
86
- self . maxcode = self . maxmaxcode ;
154
+ self . n_bits += 1 ;
155
+ self . maxcode = if self . n_bits == self . maxbits {
156
+ self . maxmaxcode
87
157
} else {
88
- self . maxcode = max_code ( self . n_bits ) as i32 ;
89
- }
158
+ max_code ( self . n_bits ) as i32
159
+ } ;
90
160
}
91
161
162
+ // reset the table entry back to smallest one
92
163
if self . clear {
93
164
self . n_bits = INIT_BITS ;
94
165
self . maxcode = max_code ( self . n_bits ) as i32 ;
95
166
self . clear = false ;
96
167
}
97
168
169
+ // the buffer for current max n of bits
98
170
let gbuf = & mut self . gbuf [ 0 ..self . n_bits as usize ] ;
99
171
100
- let res = self . rdr . read_exact ( gbuf) ;
101
- if res. is_err ( ) {
102
- return -1 ;
172
+ match self . rdr . read_exact ( gbuf) {
173
+ Ok ( n) => {
174
+ self . size = n as i32 ;
175
+ }
176
+ Err ( _) => return -1 ,
103
177
}
104
178
105
- self . size = gbuf. len ( ) as i32 ;
106
179
self . roffset = 0 ;
107
180
self . size = ( self . size << 3 ) - ( self . n_bits - 1 ) as i32 ;
108
181
}
@@ -132,11 +205,12 @@ impl UnixLZWReader {
132
205
gcode
133
206
}
134
207
208
+ /// Read from the compressed stream of file
135
209
pub fn read ( & mut self ) -> io:: Result < Vec < u8 > > {
136
210
let mut outbytes: Vec < u8 > = Vec :: new ( ) ;
137
211
138
212
if !self . have_hdr {
139
- // 3-byte header. 2 byte magic, 1 byte a bitmask of options.
213
+ // 3-byte header: 2 byte magic, 1 byte a bitmask of options.
140
214
let mut header = [ 0 ; 3 ] ;
141
215
self . rdr . read_exact ( & mut header) ?;
142
216
@@ -149,17 +223,33 @@ impl UnixLZWReader {
149
223
) ) ;
150
224
}
151
225
226
+ // the third byte has bitmask of options
227
+ // (Eg) if it has 10011111
228
+ // that means the first bit represents the BLOCK_MASK i.e block_compress
229
+ // has to be enabled or not
230
+ //
231
+ // the bit that we get onwards represent the bit position of the value we want as
232
+ // max no bits
233
+ // if it's at 5th position then it means 2^4 = 16
152
234
let options = header[ 2 ] ;
235
+
153
236
self . maxbits = ( options & HDR_BIT_MASK ) as u32 ;
154
237
self . block_compress = ( options & HDR_BLOCK_MASK ) != 0 ;
155
238
156
239
if self . maxbits > BITS {
157
240
return Err ( Error :: new ( ErrorKind :: Other , "invalid file header: bits" ) ) ;
158
241
}
159
242
243
+ // the max value that self.maxcode can have, which is derived
244
+ // from the maxbits that codes can have
245
+ // hence, 2^(self.maxbits)
160
246
self . maxmaxcode = 1 << self . maxbits ;
161
- self . n_bits = INIT_BITS ;
162
- self . maxcode = max_code ( self . n_bits ) as i32 ;
247
+
248
+ // the no of bits of code that we start with
249
+ // btw, this no of bits also represent the fact that there can be
250
+ // 2 ^ (self.n_bits) entries in the table initially
251
+ self . n_bits = INIT_BITS ; // 9
252
+ self . maxcode = max_code ( self . n_bits ) as i32 ; // 511
163
253
164
254
for code in ( 0 ..=255 ) . rev ( ) {
165
255
let idx: usize = code as usize ;
@@ -169,6 +259,8 @@ impl UnixLZWReader {
169
259
}
170
260
171
261
if self . block_compress {
262
+ // TODO: understand why we need to skip one index (i.e 256) (initial guess is that
263
+ // we need that index reserverd for CLEAR)
172
264
self . free_ent = FIRST ;
173
265
} else {
174
266
self . free_ent = 256 ;
@@ -196,6 +288,7 @@ impl UnixLZWReader {
196
288
}
197
289
198
290
if ( self . code == CLEAR ) && self . block_compress {
291
+ // clear the table and fill it again with value
199
292
for code in ( 0 ..=255 ) . rev ( ) {
200
293
let idx: usize = code as usize ;
201
294
self . code = code;
@@ -210,6 +303,7 @@ impl UnixLZWReader {
210
303
break ;
211
304
}
212
305
}
306
+
213
307
self . incode = self . code ;
214
308
215
309
if self . code >= self . free_ent {
0 commit comments