Skip to content

Commit

Permalink
avoid unnecessary reservations in std::io::Take::read_to_end
Browse files Browse the repository at this point in the history
Prevously the `read_to_end` implementation for `std::io::Take` used its
own `limit` as a cap on the `reservation_size`. However, that could
still result in an over-allocation like this:

1. Call `reader.take(5).read_to_end(&mut vec)`.
2. `read_to_end_with_reservation` reserves 5 bytes and calls `read`.
3. `read` writes 5 bytes.
4. `read_to_end_with_reservation` reserves 5 bytes and calls `read`.
5. `read` writes 0 bytes.
6. The read loop ends with `vec` having length 5 and capacity 10.

The reservation of 5 bytes was correct for the read at step 2 but
unnecessary for the read at step 4. By that second read, `Take::limit`
is 0, but the `read_to_end_with_reservation` loop is still using the
same `reservation_size` it started with.

Solve this by having `read_to_end_with_reservation` take a closure,
which lets it get a fresh `reservation_size` for each read. This is an
implementation detail which doesn't affect any public API.
  • Loading branch information
oconnor663 committed Aug 6, 2019
1 parent 8996328 commit edb5214
Showing 1 changed file with 58 additions and 8 deletions.
66 changes: 58 additions & 8 deletions src/libstd/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,20 +353,25 @@ fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
// Because we're extending the buffer with uninitialized data for trusted
// readers, we need to make sure to truncate that if any of this panics.
fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
read_to_end_with_reservation(r, buf, 32)
read_to_end_with_reservation(r, buf, |_| 32)
}

fn read_to_end_with_reservation<R: Read + ?Sized>(r: &mut R,
buf: &mut Vec<u8>,
reservation_size: usize) -> Result<usize>
fn read_to_end_with_reservation<R, F>(
r: &mut R,
buf: &mut Vec<u8>,
mut reservation_size: F,
) -> Result<usize>
where
R: Read + ?Sized,
F: FnMut(&R) -> usize,
{
let start_len = buf.len();
let mut g = Guard { len: buf.len(), buf: buf };
let ret;
loop {
if g.len == g.buf.len() {
unsafe {
g.buf.reserve(reservation_size);
g.buf.reserve(reservation_size(r));
let capacity = g.buf.capacity();
g.buf.set_len(capacity);
r.initializer().initialize(&mut g.buf[g.len..]);
Expand Down Expand Up @@ -2253,9 +2258,10 @@ impl<T: Read> Read for Take<T> {
}

fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
let reservation_size = cmp::min(self.limit, 32) as usize;

read_to_end_with_reservation(self, buf, reservation_size)
// Pass in a reservation_size closure that respects the current value
// of limit for each read. If we hit the read limit, this prevents the
// final zero-byte read from allocating again.
read_to_end_with_reservation(self, buf, |self_| cmp::min(self_.limit, 32) as usize)
}
}

Expand Down Expand Up @@ -2378,6 +2384,7 @@ impl<B: BufRead> Iterator for Lines<B> {

#[cfg(test)]
mod tests {
use crate::cmp;
use crate::io::prelude::*;
use super::{Cursor, SeekFrom, repeat};
use crate::io::{self, IoSlice, IoSliceMut};
Expand Down Expand Up @@ -2651,6 +2658,49 @@ mod tests {
Ok(())
}

// A simple example reader which uses the default implementation of
// read_to_end.
struct ExampleSliceReader<'a> {
slice: &'a [u8],
}

impl<'a> Read for ExampleSliceReader<'a> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let len = cmp::min(self.slice.len(), buf.len());
buf[..len].copy_from_slice(&self.slice[..len]);
self.slice = &self.slice[len..];
Ok(len)
}
}

#[test]
fn test_read_to_end_capacity() -> io::Result<()> {
let input = &b"foo"[..];

// read_to_end() generally needs to over-allocate, both for efficiency
// and so that it can distinguish EOF. Assert that this is the case
// with this simple ExampleSliceReader struct, which uses the default
// implementation of read_to_end. Even though vec1 is allocated with
// exactly enough capacity for the read, read_to_end will allocate more
// space here.
let mut vec1 = Vec::with_capacity(input.len());
ExampleSliceReader { slice: input }.read_to_end(&mut vec1)?;
assert_eq!(vec1.len(), input.len());
assert!(vec1.capacity() > input.len(), "allocated more");

// However, std::io::Take includes an implementation of read_to_end
// that will not allocate when the limit has already been reached. In
// this case, vec2 never grows.
let mut vec2 = Vec::with_capacity(input.len());
ExampleSliceReader { slice: input }
.take(input.len() as u64)
.read_to_end(&mut vec2)?;
assert_eq!(vec2.len(), input.len());
assert_eq!(vec2.capacity(), input.len(), "did not allocate more");

Ok(())
}

#[test]
fn io_slice_mut_advance() {
let mut buf1 = [1; 8];
Expand Down

0 comments on commit edb5214

Please sign in to comment.