Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions parquet/src/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,19 @@ where
self.next_block()
}
}

/// Verify the bit width is smaller then the integer type that it is trying to decode.
#[inline]
fn check_bit_width(&self, bit_width: usize) -> Result<()> {
if bit_width > std::mem::size_of::<T::T>() * 8 {
return Err(general_err!(
"Invalid delta bit width {} which is larger than expected {} ",
bit_width,
std::mem::size_of::<T::T>() * 8
));
}
Ok(())
}
}

impl<T: DataType> Decoder<T> for DeltaBitPackDecoder<T>
Expand Down Expand Up @@ -726,6 +739,7 @@ where
}

let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as usize;
self.check_bit_width(bit_width)?;
let batch_to_read = self.mini_block_remaining.min(to_read - read);

let batch_read = self
Expand Down Expand Up @@ -796,6 +810,7 @@ where
}

let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as usize;
self.check_bit_width(bit_width)?;
let mini_block_to_skip = self.mini_block_remaining.min(to_skip - skip);
let mini_block_should_skip = mini_block_to_skip;

Expand Down Expand Up @@ -2091,4 +2106,51 @@ mod tests {
v
}
}

#[test]
// Allow initializing a vector and pushing to it for clarity in this test
#[allow(clippy::vec_init_then_push)]
fn test_delta_bit_packed_invalid_bit_width() {
// Manually craft a buffer with an invalid bit width
let mut buffer = vec![];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the documentation here, I think adding #[allow(clippy::vec_init_then_push)] is warranted.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a #[allow and pushed a commit

// block_size = 128
buffer.push(128);
buffer.push(1);
// mini_blocks_per_block = 4
buffer.push(4);
// num_values = 32
buffer.push(32);
// first_value = 0
buffer.push(0);
// min_delta = 0
buffer.push(0);
// bit_widths, one for each of the 4 mini blocks
buffer.push(33); // Invalid bit width
buffer.push(0);
buffer.push(0);
buffer.push(0);

let corrupted_buffer = Bytes::from(buffer);

let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
decoder.set_data(corrupted_buffer.clone(), 32).unwrap();
let mut read_buffer = vec![0; 32];
let err = decoder.get(&mut read_buffer).unwrap_err();
assert!(
err.to_string()
.contains("Invalid delta bit width 33 which is larger than expected 32"),
"{}",
err
);

let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
decoder.set_data(corrupted_buffer, 32).unwrap();
let err = decoder.skip(32).unwrap_err();
assert!(
err.to_string()
.contains("Invalid delta bit width 33 which is larger than expected 32"),
"{}",
err
);
}
}
Loading