Skip to content

Commit baabe63

Browse files
authored
Merge pull request #32 from njames93/vbr-encoding
Add support for encoding LLVM style variable bit rate integers
2 parents 8ace00f + f601054 commit baabe63

File tree

4 files changed

+219
-0
lines changed

4 files changed

+219
-0
lines changed

src/read.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,34 @@ use super::{
2525

2626
use core::convert::TryInto;
2727

28+
/// An error returned if performing VBR read overflows
29+
#[derive(Copy, Clone, Debug)]
30+
pub(crate) struct VariableWidthOverflow;
31+
32+
impl core::fmt::Display for VariableWidthOverflow {
33+
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
34+
"variable bit rate overflowed".fmt(f)
35+
}
36+
}
37+
38+
impl core::error::Error for VariableWidthOverflow {}
39+
40+
impl From<VariableWidthOverflow> for io::Error {
41+
fn from(VariableWidthOverflow: VariableWidthOverflow) -> Self {
42+
io::Error::new(
43+
#[cfg(feature = "std")]
44+
{
45+
io::ErrorKind::StorageFull
46+
},
47+
#[cfg(not(feature = "std"))]
48+
{
49+
io::ErrorKind::Other
50+
},
51+
"variable bit rate overflow",
52+
)
53+
}
54+
}
55+
2856
/// A trait for anything that can read a variable number of
2957
/// potentially un-aligned values from an input stream
3058
pub trait BitRead {
@@ -874,6 +902,97 @@ pub trait BitRead {
874902
T::from_bits(|| self.read_bit())
875903
}
876904

905+
/// Reads a number using a variable using a variable-width integer.
906+
/// This optimises the case when the number is small.
907+
///
908+
/// The integer is mapped to an unsigned value using zigzag encoding.
909+
/// For an integer X:
910+
/// - if X >= 0 -> 2X
911+
/// - else -> -2X + 1
912+
///
913+
/// # Errors
914+
///
915+
/// Passes along any I/O error from the underlying stream.
916+
/// Returns an error if the data read would overflow the size of the result
917+
///
918+
/// # Example
919+
/// ```
920+
/// use bitstream_io::{BitReader, BitRead, BigEndian};
921+
///
922+
/// let bytes: &[u8] = &[0b0111_1100, 0b1100_0001];
923+
/// let mut r = BitReader::endian(bytes, BigEndian);
924+
/// assert_eq!(r.read_unsigned_vbr::<4, u32>().unwrap(), 7);
925+
/// assert_eq!(r.read_unsigned_vbr::<4, u32>().unwrap(), 100);
926+
/// ```
927+
/// ```
928+
/// use bitstream_io::{BitReader, BitRead, BigEndian};
929+
///
930+
/// let bytes: &[u8] = &[0b1111_1111, 0b0011_1000, 0b1000_0100, 0b1000_1000, 0b1000_0000];
931+
/// let mut r = BitReader::endian(bytes, BigEndian);
932+
/// assert_eq!(r.read_unsigned_vbr::<4, u8>().unwrap(), 255); // Tries to read <011><111><111>
933+
/// assert!(r.read_unsigned_vbr::<4, u8>().is_err()); // Tries to read a value of <100><000><000>
934+
/// assert!(r.read_unsigned_vbr::<4, u8>().is_err()); // Tries to read a value of <000><000><000><000>
935+
/// ```
936+
fn read_unsigned_vbr<const FIELD_SIZE: u32, U: UnsignedInteger>(&mut self) -> io::Result<U> {
937+
const { assert!(FIELD_SIZE >= 2 && FIELD_SIZE < U::BITS_SIZE) };
938+
let payload_bits = FIELD_SIZE - 1;
939+
let mut value = U::ZERO;
940+
let mut shift = 0u32;
941+
loop {
942+
let (data, continuation) = self.read_unsigned::<FIELD_SIZE, U>().map(|item| {
943+
(
944+
item & ((U::ONE << payload_bits) - U::ONE),
945+
(item >> payload_bits) != U::ZERO,
946+
)
947+
})?;
948+
let shifted = data << shift;
949+
value |= shifted;
950+
if !continuation {
951+
if (data << shift) >> shift == data {
952+
break Ok(value);
953+
} else {
954+
break Err(VariableWidthOverflow {}.into());
955+
}
956+
}
957+
shift += payload_bits;
958+
if shift >= U::BITS_SIZE {
959+
break Err(VariableWidthOverflow {}.into());
960+
}
961+
}
962+
}
963+
964+
/// Reads a number using a variable using a variable-width integer.
965+
/// This optimises the case when the number is small.
966+
///
967+
/// The integer is mapped to an unsigned value using zigzag encoding.
968+
/// For an integer X:
969+
/// - if X >= 0 -> 2X
970+
/// - else -> -2X + 1
971+
///
972+
/// # Errors
973+
///
974+
/// Passes along any I/O error from the underlying stream.
975+
/// Returns an error if the data read would overflow the size of the result
976+
///
977+
/// # Example
978+
/// ```
979+
/// use bitstream_io::{BitReader, BitRead, BigEndian};
980+
///
981+
/// let bytes: &[u8] = &[0b0110_1011, 0b1100_0001];
982+
/// let mut r = BitReader::endian(bytes, BigEndian);
983+
/// assert_eq!(r.read_signed_vbr::<4, i32>().unwrap(), 3);
984+
/// assert_eq!(r.read_signed_vbr::<4, i32>().unwrap(), -50);
985+
/// ```
986+
fn read_signed_vbr<const FIELD_SIZE: u32, I: SignedInteger>(&mut self) -> io::Result<I> {
987+
self.read_unsigned_vbr::<FIELD_SIZE, I::Unsigned>()
988+
.map(|zig_zag| {
989+
let shifted = zig_zag >> 1;
990+
let complimented = zig_zag & <I::Unsigned as crate::Numeric>::ONE;
991+
let neg = I::ZERO - complimented.as_non_negative();
992+
shifted.as_non_negative() ^ neg
993+
})
994+
}
995+
877996
/// Creates a "by reference" adaptor for this `BitRead`
878997
///
879998
/// The returned adapter also implements `BitRead`

src/write.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,78 @@ pub trait BitWrite {
962962
T::to_bits(value, |b| self.write_bit(b))
963963
}
964964

965+
/// Writes a number using a variable using a variable-width integer.
966+
/// This optimises the case when the number is small.
967+
///
968+
/// Given a 4-bit VBR field, any 3-bit value (0 through 7) is encoded directly, with the high bit set to zero.
969+
/// Values larger than N-1 bits emit their bits in a series of N-1 bit chunks, where all but the last set the high bit.
970+
///
971+
/// # Errors
972+
///
973+
/// Passes along any I/O error from the underlying stream.
974+
///
975+
/// # Example
976+
/// ```
977+
/// use std::io::Write;
978+
/// use bitstream_io::{BigEndian, BitWriter, BitWrite};
979+
/// let mut writer = BitWriter::endian(Vec::new(), BigEndian);
980+
/// writer.write_unsigned_vbr::<4,_>(7u32);
981+
/// writer.write_unsigned_vbr::<4,_>(100u32);
982+
/// assert_eq!(writer.into_writer(), [0b0111_1100, 0b1100_0001]);
983+
/// ```
984+
fn write_unsigned_vbr<const FIELD_SIZE: u32, U: UnsignedInteger>(
985+
&mut self,
986+
value: U,
987+
) -> io::Result<()> {
988+
const { assert!(FIELD_SIZE >= 2 && FIELD_SIZE < U::BITS_SIZE) };
989+
let payload_bits = FIELD_SIZE - 1;
990+
let continuation_bit = U::ONE.shl(payload_bits);
991+
let payload_mask = continuation_bit.sub(U::ONE);
992+
let mut value = value;
993+
994+
loop {
995+
let payload = value & payload_mask;
996+
value >>= payload_bits;
997+
if value != U::ZERO {
998+
self.write_unsigned::<FIELD_SIZE, U>(payload | continuation_bit)?;
999+
} else {
1000+
self.write_unsigned::<FIELD_SIZE, U>(payload)?;
1001+
break;
1002+
}
1003+
}
1004+
Ok(())
1005+
}
1006+
1007+
/// Writes a number using a variable using a variable-width integer.
1008+
/// This optimises the case when the number is small.
1009+
///
1010+
/// The integer is mapped to an unsigned value using zigzag encoding.
1011+
/// For an integer X:
1012+
/// - if X >= 0 -> 2X
1013+
/// - else -> -2X + 1
1014+
///
1015+
/// # Errors
1016+
///
1017+
/// Passes along any I/O error from the underlying stream.
1018+
///
1019+
/// # Example
1020+
/// ```
1021+
/// use std::io::Write;
1022+
/// use bitstream_io::{BigEndian, BitWriter, BitWrite};
1023+
/// let mut writer = BitWriter::endian(Vec::new(), BigEndian);
1024+
/// writer.write_signed_vbr::<4,_>(3);
1025+
/// writer.write_signed_vbr::<4,_>(-50);
1026+
/// assert_eq!(writer.into_writer(), [0b0110_1011, 0b1100_0001]);
1027+
/// ```
1028+
#[inline]
1029+
fn write_signed_vbr<const FIELD_SIZE: u32, I: SignedInteger>(
1030+
&mut self,
1031+
value: I,
1032+
) -> io::Result<()> {
1033+
let zig_zag = value.shl(1).bitxor(value.shr(I::BITS_SIZE - 1));
1034+
self.write_unsigned_vbr::<FIELD_SIZE, _>(zig_zag.as_non_negative())
1035+
}
1036+
9651037
/// Creates a "by reference" adaptor for this `BitWrite`
9661038
///
9671039
/// The returned adapter also implements `BitWrite`

tests/read.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,20 @@ fn test_reader_be() {
110110
assert_eq!(r.read_unary::<1>().unwrap(), 3);
111111
assert_eq!(r.read_unary::<1>().unwrap(), 0);
112112

113+
// reading unsigned vbr
114+
let mut r = BitReader::endian(actual_data.as_slice(), BigEndian);
115+
assert_eq!(r.read_unsigned_vbr::<4, u8>().unwrap(), 11);
116+
assert_eq!(r.read_unsigned_vbr::<4, u8>().unwrap(), 238);
117+
assert_eq!(r.read_unsigned_vbr::<4, u8>().unwrap(), 99);
118+
assert!(r.read_unsigned_vbr::<4, u8>().is_err());
119+
120+
// reading signed vbr
121+
let mut r = BitReader::endian(actual_data.as_slice(), BigEndian);
122+
assert_eq!(r.read_signed_vbr::<4, i8>().unwrap(), -6);
123+
assert_eq!(r.read_signed_vbr::<4, i8>().unwrap(), 119);
124+
assert_eq!(r.read_signed_vbr::<4, i8>().unwrap(), -50);
125+
assert!(r.read_signed_vbr::<4, i8>().is_err());
126+
113127
// byte aligning
114128
let mut r = BitReader::endian(actual_data.as_slice(), BigEndian);
115129
assert_eq!(r.read_var::<u32>(3).unwrap(), 5);

tests/write.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,20 @@ fn test_writer_be() {
134134
w.write_unary::<1>(5).unwrap();
135135
assert_eq!(w.into_writer().as_slice(), &final_data);
136136

137+
// writing unsigned vbr
138+
let mut w = BitWriter::endian(Vec::with_capacity(4), BigEndian);
139+
w.write_unsigned_vbr::<4, _>(11u8).unwrap(); // 001 011 -> <1>011 <0>001
140+
w.write_unsigned_vbr::<4, _>(238u8).unwrap(); // 011 101 110 -> <1>110 <1>101 <0>011
141+
w.write_unsigned_vbr::<4, _>(99u8).unwrap(); // 001 100 011 -> <1>011 <1>100 <0>001
142+
assert_eq!(w.into_writer().as_slice(), &final_data);
143+
144+
// writing signed vbr
145+
let mut w = BitWriter::endian(Vec::with_capacity(4), BigEndian);
146+
w.write_signed_vbr::<4, _>(-6i16).unwrap(); // 001 011 -> <1>011 <0>001
147+
w.write_signed_vbr::<4, _>(119i16).unwrap(); // 011 101 110 -> <1>110 <1>101 <0>011
148+
w.write_signed_vbr::<4, _>(-50i16).unwrap(); // 001 100 011 -> <1>011 <1>100 <0>001
149+
assert_eq!(w.into_writer().as_slice(), &final_data);
150+
137151
// byte aligning
138152
let aligned_data = [0xA0, 0xE0, 0x3B, 0xC0];
139153
let mut w = BitWriter::endian(Vec::with_capacity(4), BigEndian);

0 commit comments

Comments
 (0)