Skip to content

Commit 6c02096

Browse files
committed
Add test of DeltaBitPackDecoder padding
1 parent 145a1dc commit 6c02096

File tree

1 file changed

+88
-1
lines changed

1 file changed

+88
-1
lines changed

parquet/src/encodings/decoding.rs

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,14 @@ where
652652
.bit_reader
653653
.get_batch(&mut buffer[read..read + batch_to_read], bit_width);
654654

655+
if batch_read != batch_to_read {
656+
return Err(general_err!(
657+
"Expected to read {} values from miniblock got {}",
658+
batch_to_read,
659+
batch_read
660+
));
661+
}
662+
655663
// At this point we have read the deltas to `buffer` we now need to offset
656664
// these to get back to the original values that were encoded
657665
for v in &mut buffer[read..read + batch_read] {
@@ -927,7 +935,9 @@ mod tests {
927935
ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType,
928936
};
929937
use crate::util::{
930-
bit_util::set_array_bit, memory::MemTracker, test_common::RandGen,
938+
bit_util::set_array_bit,
939+
memory::{BufferPtr, MemTracker},
940+
test_common::RandGen,
931941
};
932942

933943
#[test]
@@ -1325,6 +1335,83 @@ mod tests {
13251335
assert_eq!(result, vec![29, 43, 89]);
13261336
}
13271337

1338+
#[test]
1339+
fn test_delta_bit_packed_padding() {
1340+
// Page header
1341+
let header = vec![
1342+
// Page Header
1343+
1344+
// Block Size - 256
1345+
128,
1346+
2,
1347+
// Miniblocks in block,
1348+
4,
1349+
// Total value count - 419
1350+
128 + 35,
1351+
3,
1352+
// First value - 7
1353+
7,
1354+
];
1355+
1356+
// Block Header
1357+
let block1_header = vec![
1358+
0, // Min delta
1359+
0, 1, 0, 0, // Bit widths
1360+
];
1361+
1362+
// Mini-block 1 - bit width 0 => 0 bytes
1363+
// Mini-block 2 - bit width 1 => 8 bytes
1364+
// Mini-block 3 - bit width 0 => 0 bytes
1365+
// Mini-block 4 - bit width 0 => 0 bytes
1366+
let block1 = vec![0xFF; 8];
1367+
1368+
// Block Header
1369+
let block2_header = vec![
1370+
0, // Min delta
1371+
0, 1, 2, 0xFF, // Bit widths, including non-zero padding
1372+
];
1373+
1374+
// Mini-block 1 - bit width 0 => 0 bytes
1375+
// Mini-block 2 - bit width 1 => 8 bytes
1376+
// Mini-block 3 - bit width 2 => 16 bytes
1377+
// Mini-block 4 - padding => no bytes
1378+
let block2 = vec![0xFF; 24];
1379+
1380+
let data: Vec<u8> = header
1381+
.into_iter()
1382+
.chain(block1_header)
1383+
.chain(block1)
1384+
.chain(block2_header)
1385+
.chain(block2)
1386+
.collect();
1387+
1388+
let length = data.len();
1389+
1390+
let ptr = BufferPtr::new(data);
1391+
let mut reader = BitReader::new(ptr.clone());
1392+
assert_eq!(reader.get_vlq_int().unwrap(), 256);
1393+
assert_eq!(reader.get_vlq_int().unwrap(), 4);
1394+
assert_eq!(reader.get_vlq_int().unwrap(), 419);
1395+
assert_eq!(reader.get_vlq_int().unwrap(), 7);
1396+
1397+
// Test output buffer larger than needed but smaller than block size
1398+
let mut output = vec![0_i32; 420];
1399+
1400+
let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
1401+
decoder.set_data(ptr.clone(), 0).unwrap();
1402+
assert_eq!(decoder.get(&mut output).unwrap(), 419);
1403+
assert_eq!(decoder.get_offset(), length);
1404+
1405+
// Test with truncated buffer
1406+
decoder.set_data(ptr.range(0, 12), 0).unwrap();
1407+
let err = decoder.get(&mut output).unwrap_err().to_string();
1408+
assert!(
1409+
err.contains("Expected to read 64 values from miniblock got 8"),
1410+
"{}",
1411+
err
1412+
);
1413+
}
1414+
13281415
#[test]
13291416
fn test_delta_byte_array_same_arrays() {
13301417
let data = vec![

0 commit comments

Comments
 (0)