Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Limit read size in File::read_to_end loop #110655

Merged
merged 1 commit into from
Apr 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions library/std/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ pub struct DirBuilder {
pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
fn inner(path: &Path) -> io::Result<Vec<u8>> {
let mut file = File::open(path)?;
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
let mut bytes = Vec::with_capacity(size as usize);
io::default_read_to_end(&mut file, &mut bytes)?;
let size = file.metadata().map(|m| m.len() as usize).ok();
let mut bytes = Vec::with_capacity(size.unwrap_or(0));
io::default_read_to_end(&mut file, &mut bytes, size)?;
Ok(bytes)
}
inner(path.as_ref())
Expand Down Expand Up @@ -289,9 +289,9 @@ pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
pub fn read_to_string<P: AsRef<Path>>(path: P) -> io::Result<String> {
fn inner(path: &Path) -> io::Result<String> {
let mut file = File::open(path)?;
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
let mut string = String::with_capacity(size as usize);
io::default_read_to_string(&mut file, &mut string)?;
let size = file.metadata().map(|m| m.len() as usize).ok();
let mut string = String::with_capacity(size.unwrap_or(0));
io::default_read_to_string(&mut file, &mut string, size)?;
Ok(string)
}
inner(path.as_ref())
Expand Down Expand Up @@ -732,12 +732,12 @@ impl fmt::Debug for File {
}

/// Indicates how much extra capacity is needed to read the rest of the file.
fn buffer_capacity_required(mut file: &File) -> usize {
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
let pos = file.stream_position().unwrap_or(0);
fn buffer_capacity_required(mut file: &File) -> Option<usize> {
let size = file.metadata().map(|m| m.len()).ok()?;
let pos = file.stream_position().ok()?;
// Don't worry about `usize` overflow because reading will fail regardless
// in that case.
size.saturating_sub(pos) as usize
Some(size.saturating_sub(pos) as usize)
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand All @@ -761,14 +761,16 @@ impl Read for File {

// Reserves space in the buffer based on the file size when available.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
io::default_read_to_end(self, buf)
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
io::default_read_to_end(self, buf, size)
}

// Reserves space in the buffer based on the file size when available.
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
io::default_read_to_string(self, buf)
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
io::default_read_to_string(self, buf, size)
}
}
#[stable(feature = "rust1", since = "1.0.0")]
Expand Down Expand Up @@ -817,14 +819,16 @@ impl Read for &File {

// Reserves space in the buffer based on the file size when available.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
io::default_read_to_end(self, buf)
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
io::default_read_to_end(self, buf, size)
}

// Reserves space in the buffer based on the file size when available.
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
io::default_read_to_string(self, buf)
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
io::default_read_to_string(self, buf, size)
}
}
#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
24 changes: 19 additions & 5 deletions library/std/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,17 +357,30 @@ where
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
// time is 4,500 times (!) slower than a default reservation size of 32 if the
// reader has a very small amount of data to return.
pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
pub(crate) fn default_read_to_end<R: Read + ?Sized>(
r: &mut R,
buf: &mut Vec<u8>,
size_hint: Option<usize>,
) -> Result<usize> {
let start_len = buf.len();
let start_cap = buf.capacity();
// Optionally limit the maximum bytes read on each iteration.
// This adds an arbitrary fiddle factor to allow for more data than we expect.
joshtriplett marked this conversation as resolved.
Show resolved Hide resolved
let max_read_size =
size_hint.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE));

let mut initialized = 0; // Extra initialized bytes from previous loop iteration
loop {
if buf.len() == buf.capacity() {
buf.reserve(32); // buf is full, need more space
}

let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into();
let mut spare = buf.spare_capacity_mut();
if let Some(size) = max_read_size {
let len = cmp::min(spare.len(), size);
spare = &mut spare[..len]
}
let mut read_buf: BorrowedBuf<'_> = spare.into();

// SAFETY: These bytes were initialized but not filled in the previous loop
unsafe {
Expand Down Expand Up @@ -419,6 +432,7 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>
pub(crate) fn default_read_to_string<R: Read + ?Sized>(
r: &mut R,
buf: &mut String,
size_hint: Option<usize>,
) -> Result<usize> {
// Note that we do *not* call `r.read_to_end()` here. We are passing
// `&mut Vec<u8>` (the raw contents of `buf`) into the `read_to_end`
Expand All @@ -429,7 +443,7 @@ pub(crate) fn default_read_to_string<R: Read + ?Sized>(
// To prevent extraneously checking the UTF-8-ness of the entire buffer
// we pass it to our hardcoded `default_read_to_end` implementation which
// we know is guaranteed to only read data into the end of the buffer.
unsafe { append_to_string(buf, |b| default_read_to_end(r, b)) }
unsafe { append_to_string(buf, |b| default_read_to_end(r, b, size_hint)) }
}

pub(crate) fn default_read_vectored<F>(read: F, bufs: &mut [IoSliceMut<'_>]) -> Result<usize>
Expand Down Expand Up @@ -709,7 +723,7 @@ pub trait Read {
/// [`std::fs::read`]: crate::fs::read
#[stable(feature = "rust1", since = "1.0.0")]
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
default_read_to_end(self, buf)
default_read_to_end(self, buf, None)
}

/// Read all bytes until EOF in this source, appending them to `buf`.
Expand Down Expand Up @@ -752,7 +766,7 @@ pub trait Read {
/// [`std::fs::read_to_string`]: crate::fs::read_to_string
#[stable(feature = "rust1", since = "1.0.0")]
fn read_to_string(&mut self, buf: &mut String) -> Result<usize> {
default_read_to_string(self, buf)
default_read_to_string(self, buf, None)
}

/// Read the exact number of bytes required to fill `buf`.
Expand Down
2 changes: 1 addition & 1 deletion library/std/src/io/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ fn bench_read_to_end(b: &mut test::Bencher) {
b.iter(|| {
let mut lr = repeat(1).take(10000000);
let mut vec = Vec::with_capacity(1024);
super::default_read_to_end(&mut lr, &mut vec)
super::default_read_to_end(&mut lr, &mut vec, None)
});
}

Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@
#![feature(float_next_up_down)]
#![feature(hasher_prefixfree_extras)]
#![feature(hashmap_internals)]
#![feature(int_roundings)]
#![feature(ip)]
#![feature(ip_in_core)]
#![feature(maybe_uninit_slice)]
Expand Down