Skip to content

Simplify deflate window #272

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 19 additions & 36 deletions zlib-rs/src/allocate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use core::{
alloc::Layout,
ffi::{c_uint, c_void},
marker::PhantomData,
ptr::NonNull,
};

#[cfg(feature = "rust-allocator")]
Expand Down Expand Up @@ -245,33 +246,21 @@ impl Allocator<'_> {
ptr
}

pub fn allocate_raw<T>(&self) -> Option<*mut T> {
let ptr = self.allocate_layout(Layout::new::<T>());

if ptr.is_null() {
None
} else {
Some(ptr as *mut T)
}
pub fn allocate_raw<T>(&self) -> Option<NonNull<T>> {
NonNull::new(self.allocate_layout(Layout::new::<T>()).cast())
}

pub fn allocate_slice_raw<T>(&self, len: usize) -> Option<*mut T> {
let ptr = self.allocate_layout(Layout::array::<T>(len).ok()?);

if ptr.is_null() {
None
} else {
Some(ptr.cast())
}
pub fn allocate_slice_raw<T>(&self, len: usize) -> Option<NonNull<T>> {
NonNull::new(self.allocate_layout(Layout::array::<T>(len).ok()?).cast())
}

pub fn allocate_zeroed(&self, len: usize) -> *mut u8 {
pub fn allocate_zeroed(&self, len: usize) -> Option<NonNull<u8>> {
#[cfg(feature = "rust-allocator")]
if self.zalloc == Allocator::RUST.zalloc {
// internally, we want to align allocations to 64 bytes (in part for SIMD reasons)
let layout = Layout::from_size_align(len, 64).unwrap();

return unsafe { std::alloc::System.alloc_zeroed(layout) };
return NonNull::new(unsafe { std::alloc::System.alloc_zeroed(layout) });
}

#[cfg(feature = "c-allocator")]
Expand All @@ -285,24 +274,18 @@ impl Allocator<'_> {

let ptr = alloc.allocate_layout(Layout::array::<u8>(len).ok().unwrap());

if ptr.is_null() {
return core::ptr::null_mut();
}

return ptr.cast();
return NonNull::new(ptr.cast());
}

// create the allocation (contents are uninitialized)
let ptr = self.allocate_layout(Layout::array::<u8>(len).ok().unwrap());

if ptr.is_null() {
return core::ptr::null_mut();
}
let ptr = NonNull::new(ptr)?;

// zero all contents (thus initializing the buffer)
unsafe { core::ptr::write_bytes(ptr, 0, len) };
unsafe { core::ptr::write_bytes(ptr.as_ptr(), 0, len) };

ptr.cast()
Some(ptr.cast())
}

/// # Panics
Expand Down Expand Up @@ -373,11 +356,11 @@ mod tests {
_marker: PhantomData,
};

let ptr = allocator.allocate_raw::<T>().unwrap();
let ptr = allocator.allocate_raw::<T>().unwrap().as_ptr();
assert_eq!(ptr as usize % core::mem::align_of::<T>(), 0);
unsafe { allocator.deallocate(ptr, 1) }

let ptr = allocator.allocate_slice_raw::<T>(10).unwrap();
let ptr = allocator.allocate_slice_raw::<T>(10).unwrap().as_ptr();
assert_eq!(ptr as usize % core::mem::align_of::<T>(), 0);
unsafe { allocator.deallocate(ptr, 10) }
}
Expand Down Expand Up @@ -428,15 +411,15 @@ mod tests {

fn test_allocate_zeroed_help(allocator: Allocator) {
let len = 42;
let buf = allocator.allocate_zeroed(len);
let Some(buf) = allocator.allocate_zeroed(len) else {
return;
};

if !buf.is_null() {
let slice = unsafe { core::slice::from_raw_parts_mut(buf, len) };
let slice = unsafe { core::slice::from_raw_parts_mut(buf.as_ptr(), len) };

assert_eq!(slice.iter().sum::<u8>(), 0);
}
assert_eq!(slice.iter().sum::<u8>(), 0);

unsafe { allocator.deallocate(buf, len) };
unsafe { allocator.deallocate(buf.as_ptr(), len) };
}

#[test]
Expand Down
35 changes: 16 additions & 19 deletions zlib-rs/src/deflate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,27 +282,29 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
pending.drop_in(&alloc);
}
if let Some(head) = head {
alloc.deallocate(head, 1)
alloc.deallocate(head.as_ptr(), 1)
}
if let Some(prev) = prev {
alloc.deallocate(prev, w_size)
alloc.deallocate(prev.as_ptr(), w_size)
}
if let Some(mut window) = window {
window.drop_in(&alloc);
}

alloc.deallocate(state_allocation, 1);
alloc.deallocate(state_allocation.as_ptr(), 1);
}

return ReturnCode::MemError;
}
};

// zero initialize the memory
let prev = prev.as_ptr(); // FIXME: write_bytes is stable for NonNull since 1.80.0
unsafe { prev.write_bytes(0, w_size) };
let prev = unsafe { WeakSliceMut::from_raw_parts_mut(prev, w_size) };

// zero out head's first element
let head = head.as_ptr(); // FIXME: write_bytes is stable for NonNull since 1.80.0
unsafe { head.write_bytes(0, 1) };
let head = unsafe { WeakArrayMut::<u16, HASH_SIZE>::from_ptr(head) };

Expand Down Expand Up @@ -374,8 +376,9 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
hash_calc_variant: HashCalcVariant::Standard,
};

unsafe { state_allocation.write(state) };
stream.state = state_allocation as *mut internal_state;
unsafe { state_allocation.as_ptr().write(state) }; // FIXME: write is stable for NonNull since 1.80.0

stream.state = state_allocation.as_ptr() as *mut internal_state;

let Some(stream) = (unsafe { DeflateStream::from_stream_mut(stream) }) else {
if cfg!(debug_assertions) {
Expand Down Expand Up @@ -596,28 +599,31 @@ pub fn copy<'a>(
pending.drop_in(alloc);
}
if let Some(head) = head {
alloc.deallocate(head, HASH_SIZE)
alloc.deallocate(head.as_ptr(), HASH_SIZE)
}
if let Some(prev) = prev {
alloc.deallocate(prev, source_state.w_size)
alloc.deallocate(prev.as_ptr(), source_state.w_size)
}
if let Some(mut window) = window {
window.drop_in(alloc);
}

alloc.deallocate(state_allocation, 1);
alloc.deallocate(state_allocation.as_ptr(), 1);
}

return ReturnCode::MemError;
}
};

let prev = unsafe {
let prev = prev.as_ptr();
prev.copy_from_nonoverlapping(source_state.prev.as_ptr(), source_state.prev.len());
WeakSliceMut::from_raw_parts_mut(prev, source_state.prev.len())
};

// FIXME: write_bytes is stable for NonNull since 1.80.0
let head = unsafe {
let head = head.as_ptr();
head.write_bytes(0, 1);
head.cast::<u16>().write(source_state.head.as_slice()[0]);
WeakArrayMut::from_ptr(head)
Expand Down Expand Up @@ -673,11 +679,11 @@ pub fn copy<'a>(
};

// write the cloned state into state_ptr
unsafe { state_allocation.write(dest_state) };
unsafe { state_allocation.as_ptr().write(dest_state) }; // FIXME: write is stable for NonNull since 1.80.0

// insert the state_ptr into `dest`
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) };
unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation) };
unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation.as_ptr()) };

// update the gzhead field (it contains a mutable reference so we need to be careful
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state.gzhead) };
Expand Down Expand Up @@ -1650,7 +1656,6 @@ pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: u
// we likely cannot fuse the crc32 and the copy here because the input can be changed by
// a concurrent thread. Therefore it cannot be converted into a slice!
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
// SAFETY: len is bounded by avail_in, so this copy is in bounds.
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };

Expand All @@ -1660,15 +1665,13 @@ pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: u
// we likely cannot fuse the adler32 and the copy here because the input can be changed by
// a concurrent thread. Therefore it cannot be converted into a slice!
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
// SAFETY: len is bounded by avail_in, so this copy is in bounds.
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };

let data = &stream.state.window.filled()[offset..][..len];
stream.adler = adler32(stream.adler as u32, data) as _;
} else {
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
// SAFETY: len is bounded by avail_in, so this copy is in bounds.
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };
}
Expand Down Expand Up @@ -1742,7 +1745,6 @@ pub(crate) fn fill_window(stream: &mut DeflateStream) {
// explicitly initialize them with zeros.
//
// see also the "fill_window_out_of_bounds" test.
state.window.initialize_at_least(2 * wsize);
state.window.filled_mut().copy_within(wsize..2 * wsize, 0);

if state.match_start >= wsize {
Expand Down Expand Up @@ -1811,11 +1813,6 @@ pub(crate) fn fill_window(stream: &mut DeflateStream) {
}
}

// initialize some memory at the end of the (filled) window, so SIMD operations can go "out of
// bounds" without violating any requirements. The window allocation is already slightly bigger
// to allow for this.
stream.state.window.initialize_out_of_bounds();

assert!(
stream.state.strstart <= stream.state.window_size - MIN_LOOKAHEAD,
"not enough room for search"
Expand Down
2 changes: 1 addition & 1 deletion zlib-rs/src/deflate/pending.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ impl<'a> Pending<'a> {
pub(crate) fn new_in(alloc: &Allocator<'a>, len: usize) -> Option<Self> {
let ptr = alloc.allocate_slice_raw::<MaybeUninit<u8>>(len)?;
// SAFETY: freshly allocated buffer
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) };
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) };

Some(Self {
buf,
Expand Down
85 changes: 7 additions & 78 deletions zlib-rs/src/deflate/window.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,22 @@
use crate::{allocate::Allocator, weak_slice::WeakSliceMut};
use core::mem::MaybeUninit;

#[derive(Debug)]
pub struct Window<'a> {
// the full window allocation. This is longer than w_size so that operations don't need to
// perform bounds checks.
buf: WeakSliceMut<'a, MaybeUninit<u8>>,

// number of initialized bytes
filled: usize,
buf: WeakSliceMut<'a, u8>,

window_bits: usize,

high_water: usize,
}

impl<'a> Window<'a> {
pub fn new_in(alloc: &Allocator<'a>, window_bits: usize) -> Option<Self> {
let len = 2 * ((1 << window_bits) + Self::padding());
let ptr = alloc.allocate_slice_raw::<MaybeUninit<u8>>(len)?;
let ptr = alloc.allocate_zeroed(len)?;
// SAFETY: freshly allocated buffer
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) };
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) };

Some(Self {
buf,
filled: 0,
window_bits,
high_water: 0,
})
Some(Self { buf, window_bits })
}

pub fn clone_in(&self, alloc: &Allocator<'a>) -> Option<Self> {
Expand All @@ -37,8 +26,6 @@ impl<'a> Window<'a> {
.buf
.as_mut_slice()
.copy_from_slice(self.buf.as_slice());
clone.filled = self.filled;
clone.high_water = self.high_water;

Some(clone)
}
Expand All @@ -61,14 +48,14 @@ impl<'a> Window<'a> {
#[inline]
pub fn filled(&self) -> &[u8] {
// SAFETY: `self.buf` has been initialized for at least `filled` elements
unsafe { core::slice::from_raw_parts(self.buf.as_ptr().cast(), self.filled) }
unsafe { core::slice::from_raw_parts(self.buf.as_ptr().cast(), self.buf.len()) }
}

/// Returns a mutable reference to the filled portion of the buffer.
#[inline]
pub fn filled_mut(&mut self) -> &mut [u8] {
// SAFETY: `self.buf` has been initialized for at least `filled` elements
unsafe { core::slice::from_raw_parts_mut(self.buf.as_mut_ptr().cast(), self.filled) }
unsafe { core::slice::from_raw_parts_mut(self.buf.as_mut_ptr().cast(), self.buf.len()) }
}

/// # Safety
Expand All @@ -77,66 +64,8 @@ impl<'a> Window<'a> {
pub unsafe fn copy_and_initialize(&mut self, range: core::ops::Range<usize>, src: *const u8) {
let (start, end) = (range.start, range.end);

let dst = self.buf.as_mut_slice()[range].as_mut_ptr() as *mut u8;
let dst = self.buf.as_mut_slice()[range].as_mut_ptr();
unsafe { core::ptr::copy_nonoverlapping(src, dst, end - start) };

if start >= self.filled {
self.filled = Ord::max(self.filled, end);
}

self.high_water = Ord::max(self.high_water, self.filled);
}

// this library has many functions that operated in a chunked fashion on memory. For
// performance, we want to minimize bounds checks. Therefore we reserve initialize some extra
// memory at the end of the window so that chunked operations can use the whole buffer. If they
// go slightly over `self.capacity` that's okay, we account for that here by making sure the
// memory there is initialized!
pub fn initialize_out_of_bounds(&mut self) {
const WIN_INIT: usize = crate::deflate::STD_MAX_MATCH;

// If the WIN_INIT bytes after the end of the current data have never been
// written, then zero those bytes in order to avoid memory check reports of
// the use of uninitialized (or uninitialised as Julian writes) bytes by
// the longest match routines. Update the high water mark for the next
// time through here. WIN_INIT is set to STD_MAX_MATCH since the longest match
// routines allow scanning to strstart + STD_MAX_MATCH, ignoring lookahead.
if self.high_water < self.capacity() {
let curr = self.filled().len();

if self.high_water < curr {
// Previous high water mark below current data -- zero WIN_INIT
// bytes or up to end of window, whichever is less.
let init = Ord::min(self.capacity() - curr, WIN_INIT);

self.buf.as_mut_slice()[curr..][..init].fill(MaybeUninit::new(0));

self.high_water = curr + init;

self.filled += init;
} else if self.high_water < curr + WIN_INIT {
// High water mark at or above current data, but below current data
// plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
// to end of window, whichever is less.
let init = Ord::min(
curr + WIN_INIT - self.high_water,
self.capacity() - self.high_water,
);

self.buf.as_mut_slice()[self.high_water..][..init].fill(MaybeUninit::new(0));

self.high_water += init;
self.filled += init;
}
}
}

pub fn initialize_at_least(&mut self, at_least: usize) {
let end = at_least.clamp(self.high_water, self.buf.len());
self.buf.as_mut_slice()[self.high_water..end].fill(MaybeUninit::new(0));

self.high_water = end;
self.filled = end;
}

// padding required so that SIMD operations going out-of-bounds are not a problem
Expand Down
Loading
Loading