Skip to content

Commit

Permalink
Limit read size in File::read_to_end loop
Browse files Browse the repository at this point in the history
This works around performance issues on  Windows by limiting reads the size of reads when the expected size is known.
  • Loading branch information
ChrisDenton committed Apr 21, 2023
1 parent fa4cc63 commit f74fe8b
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 24 deletions.
40 changes: 22 additions & 18 deletions library/std/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ pub struct DirBuilder {
pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
fn inner(path: &Path) -> io::Result<Vec<u8>> {
let mut file = File::open(path)?;
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
let mut bytes = Vec::with_capacity(size as usize);
io::default_read_to_end(&mut file, &mut bytes)?;
let size = file.metadata().map(|m| m.len() as usize).ok();
let mut bytes = Vec::with_capacity(size.unwrap_or(0));
io::default_read_to_end(&mut file, &mut bytes, size)?;
Ok(bytes)
}
inner(path.as_ref())
Expand Down Expand Up @@ -289,9 +289,9 @@ pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
pub fn read_to_string<P: AsRef<Path>>(path: P) -> io::Result<String> {
fn inner(path: &Path) -> io::Result<String> {
let mut file = File::open(path)?;
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
let mut string = String::with_capacity(size as usize);
io::default_read_to_string(&mut file, &mut string)?;
let size = file.metadata().map(|m| m.len() as usize).ok();
let mut string = String::with_capacity(size.unwrap_or(0));
io::default_read_to_string(&mut file, &mut string, size)?;
Ok(string)
}
inner(path.as_ref())
Expand Down Expand Up @@ -732,12 +732,12 @@ impl fmt::Debug for File {
}

/// Indicates how much extra capacity is needed to read the rest of the file.
fn buffer_capacity_required(mut file: &File) -> usize {
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
let pos = file.stream_position().unwrap_or(0);
fn buffer_capacity_required(mut file: &File) -> Option<usize> {
let size = file.metadata().map(|m| m.len()).ok()?;
let pos = file.stream_position().ok()?;
// Don't worry about `usize` overflow because reading will fail regardless
// in that case.
size.saturating_sub(pos) as usize
Some(size.saturating_sub(pos) as usize)
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand All @@ -761,14 +761,16 @@ impl Read for File {

// Reserves space in the buffer based on the file size when available.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
io::default_read_to_end(self, buf)
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
io::default_read_to_end(self, buf, size)
}

// Reserves space in the buffer based on the file size when available.
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
io::default_read_to_string(self, buf)
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
io::default_read_to_string(self, buf, size)
}
}
#[stable(feature = "rust1", since = "1.0.0")]
Expand Down Expand Up @@ -817,14 +819,16 @@ impl Read for &File {

// Reserves space in the buffer based on the file size when available.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
io::default_read_to_end(self, buf)
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
io::default_read_to_end(self, buf, size)
}

// Reserves space in the buffer based on the file size when available.
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
io::default_read_to_string(self, buf)
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
io::default_read_to_string(self, buf, size)
}
}
#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
24 changes: 19 additions & 5 deletions library/std/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,17 +357,30 @@ where
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
// time is 4,500 times (!) slower than a default reservation size of 32 if the
// reader has a very small amount of data to return.
pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
pub(crate) fn default_read_to_end<R: Read + ?Sized>(
r: &mut R,
buf: &mut Vec<u8>,
size_hint: Option<usize>,
) -> Result<usize> {
let start_len = buf.len();
let start_cap = buf.capacity();
// Optionally limit the maximum bytes read on each iteration.
// This adds an arbitrary fiddle factor to allow for more data than we expect.
let max_read_size =
size_hint.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE));

let mut initialized = 0; // Extra initialized bytes from previous loop iteration
loop {
if buf.len() == buf.capacity() {
buf.reserve(32); // buf is full, need more space
}

let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into();
let mut spare = buf.spare_capacity_mut();
if let Some(size) = max_read_size {
let len = cmp::min(spare.len(), size);
spare = &mut spare[..len]
}
let mut read_buf: BorrowedBuf<'_> = spare.into();

// SAFETY: These bytes were initialized but not filled in the previous loop
unsafe {
Expand Down Expand Up @@ -419,6 +432,7 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>
pub(crate) fn default_read_to_string<R: Read + ?Sized>(
r: &mut R,
buf: &mut String,
size_hint: Option<usize>,
) -> Result<usize> {
// Note that we do *not* call `r.read_to_end()` here. We are passing
// `&mut Vec<u8>` (the raw contents of `buf`) into the `read_to_end`
Expand All @@ -429,7 +443,7 @@ pub(crate) fn default_read_to_string<R: Read + ?Sized>(
// To prevent extraneously checking the UTF-8-ness of the entire buffer
// we pass it to our hardcoded `default_read_to_end` implementation which
// we know is guaranteed to only read data into the end of the buffer.
unsafe { append_to_string(buf, |b| default_read_to_end(r, b)) }
unsafe { append_to_string(buf, |b| default_read_to_end(r, b, size_hint)) }
}

pub(crate) fn default_read_vectored<F>(read: F, bufs: &mut [IoSliceMut<'_>]) -> Result<usize>
Expand Down Expand Up @@ -709,7 +723,7 @@ pub trait Read {
/// [`std::fs::read`]: crate::fs::read
#[stable(feature = "rust1", since = "1.0.0")]
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
default_read_to_end(self, buf)
default_read_to_end(self, buf, None)
}

/// Read all bytes until EOF in this source, appending them to `buf`.
Expand Down Expand Up @@ -752,7 +766,7 @@ pub trait Read {
/// [`std::fs::read_to_string`]: crate::fs::read_to_string
#[stable(feature = "rust1", since = "1.0.0")]
fn read_to_string(&mut self, buf: &mut String) -> Result<usize> {
default_read_to_string(self, buf)
default_read_to_string(self, buf, None)
}

/// Read the exact number of bytes required to fill `buf`.
Expand Down
2 changes: 1 addition & 1 deletion library/std/src/io/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ fn bench_read_to_end(b: &mut test::Bencher) {
b.iter(|| {
let mut lr = repeat(1).take(10000000);
let mut vec = Vec::with_capacity(1024);
super::default_read_to_end(&mut lr, &mut vec)
super::default_read_to_end(&mut lr, &mut vec, None)
});
}

Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@
#![feature(float_next_up_down)]
#![feature(hasher_prefixfree_extras)]
#![feature(hashmap_internals)]
#![feature(int_roundings)]
#![feature(ip)]
#![feature(ip_in_core)]
#![feature(maybe_uninit_slice)]
Expand Down

0 comments on commit f74fe8b

Please sign in to comment.