diff --git a/crates/next-core/src/next_app/metadata/image.rs b/crates/next-core/src/next_app/metadata/image.rs index 65e071c51975e..992b32471622c 100644 --- a/crates/next-core/src/next_app/metadata/image.rs +++ b/crates/next-core/src/next_app/metadata/image.rs @@ -29,7 +29,7 @@ async fn hash_file_content(path: Vc) -> Result { Ok(match &*original_file_content { FileContent::Content(content) => { - let content = content.content().to_bytes()?; + let content = content.content().to_bytes(); hash_xxh3_hash64(&*content) } FileContent::NotFound => { diff --git a/crates/next-core/src/next_app/metadata/route.rs b/crates/next-core/src/next_app/metadata/route.rs index 5a7c0a780e760..64187b6163b69 100644 --- a/crates/next-core/src/next_app/metadata/route.rs +++ b/crates/next-core/src/next_app/metadata/route.rs @@ -117,7 +117,7 @@ async fn get_base64_file_content(path: Vc) -> Result { Ok(match &*original_file_content { FileContent::Content(content) => { - let content = content.content().to_bytes()?; + let content = content.content().to_bytes(); Base64Display::new(&content, &STANDARD).to_string() } FileContent::NotFound => { diff --git a/crates/next-core/src/next_font/local/font_fallback.rs b/crates/next-core/src/next_font/local/font_fallback.rs index f59b5565f45ed..72d962927570a 100644 --- a/crates/next-core/src/next_font/local/font_fallback.rs +++ b/crates/next-core/src/next_font/local/font_fallback.rs @@ -108,7 +108,7 @@ async fn get_font_adjustment( FileContent::Content(file) => file.content(), }; - let font_file_binary = font_file_rope.to_bytes()?; + let font_file_binary = font_file_rope.to_bytes(); let scope = allsorts::binary::read::ReadScope::new(&font_file_binary); let mut font = Font::new(scope.read::()?.table_provider(0)?)?.context(format!( "Unable to read font metrics from font file at {}", diff --git a/turbopack/crates/turbo-tasks-fs/src/rope.rs b/turbopack/crates/turbo-tasks-fs/src/rope.rs index cc42bf8bbec22..526ef222f5ec8 100644 --- a/turbopack/crates/turbo-tasks-fs/src/rope.rs +++ b/turbopack/crates/turbo-tasks-fs/src/rope.rs @@ -2,7 +2,7 @@ use std::{ borrow::Cow, cmp::{Ordering, min}, fmt, - io::{BufRead, Read, Result as IoResult, Write}, + io::{BufRead, Cursor, Read, Result as IoResult, Write}, mem, ops::{AddAssign, Deref}, pin::Pin, @@ -11,14 +11,13 @@ use std::{ use RopeElem::{Local, Shared}; use anyhow::{Context, Result}; -use bytes::{Buf, Bytes}; +use bytes::Buf; use futures::Stream; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_bytes::ByteBuf; use tokio::io::{AsyncRead, ReadBuf}; use triomphe::Arc; use turbo_tasks_hash::{DeterministicHash, DeterministicHasher}; -use unsize::{CoerceUnsize, Coercion}; static EMPTY_BUF: &[u8] = &[]; @@ -42,13 +41,13 @@ pub struct Rope { /// An Arc container for ropes. This indirection allows for easily sharing the /// contents between Ropes (and also RopeBuilders/RopeReaders). #[derive(Clone, Debug)] -struct InnerRope(Arc<[RopeElem]>); +struct InnerRope(Arc>); /// Differentiates the types of stored bytes in a rope. #[derive(Clone, Debug)] enum RopeElem { /// Local bytes are owned directly by this rope. - Local(Bytes), + Local(Cow<'static, [u8]>), /// Shared holds the Arc container of another rope. Shared(InnerRope), @@ -81,12 +80,6 @@ enum Uncommitted { #[default] None, - /// Stores our attempt to push static lifetime bytes into the rope. If we - /// build the Rope or concatenate another Rope, we can commit a static - /// Bytes reference and save memory. If not, we'll concatenate this into - /// writable bytes to be committed later. - Static(&'static [u8]), - /// Mutable bytes collection where non-static/non-shared bytes are written. /// This builds until the next time a static or shared bytes is /// appended, in which case we split the buffer and commit. Finishing @@ -114,35 +107,42 @@ impl Rope { } /// Returns a slice of all bytes - pub fn to_bytes(&self) -> Result> { + pub fn to_bytes(&self) -> Cow<'_, [u8]> { self.data.to_bytes(self.length) } + + pub fn into_bytes(self) -> Cow<'static, [u8]> { + self.data.into_bytes(self.length) + } } impl From> for Rope { fn from(mut bytes: Vec) -> Self { bytes.shrink_to_fit(); - Rope::from(Bytes::from(bytes)) + Rope::from(Cow::Owned(bytes)) } } impl From for Rope { - fn from(mut bytes: String) -> Self { - bytes.shrink_to_fit(); - Rope::from(Bytes::from(bytes)) + fn from(bytes: String) -> Self { + Rope::from(bytes.into_bytes()) + } +} + +impl From<&'static str> for Rope { + fn from(bytes: &'static str) -> Self { + Rope::from(Cow::Borrowed(bytes.as_bytes())) } } -impl> From for Rope { - default fn from(bytes: T) -> Self { - let bytes = bytes.into(); - // We can't have an InnerRope which contains an empty Local section. +impl From> for Rope { + fn from(bytes: Cow<'static, [u8]>) -> Self { if bytes.is_empty() { Default::default() } else { Rope { length: bytes.len(), - data: InnerRope(Arc::from([Local(bytes)]).unsize(Coercion::to_slice())), + data: InnerRope(Arc::new(vec![Local(bytes)])), } } } @@ -170,18 +170,11 @@ impl RopeBuilder { return; } - // If the string is smaller than the cost of a Bytes reference (4 usizes), then - // it's more efficient to own the bytes in a new buffer. We may be able to reuse - // that buffer when more bytes are pushed. - if bytes.len() < mem::size_of::() { - return self.uncommitted.push_static_bytes(bytes); - } - // We may have pending bytes from a prior push. self.finish(); self.length += bytes.len(); - self.committed.push(Local(Bytes::from_static(bytes))); + self.committed.push(Local(Cow::Borrowed(bytes))); } /// Concatenate another Rope instance into our builder. @@ -280,7 +273,6 @@ impl Uncommitted { fn len(&self) -> usize { match self { Uncommitted::None => 0, - Uncommitted::Static(s) => s.len(), Uncommitted::Owned(v) => v.len(), } } @@ -291,39 +283,15 @@ impl Uncommitted { debug_assert!(!bytes.is_empty(), "must not push empty uncommitted bytes"); match self { Self::None => *self = Self::Owned(bytes.to_vec()), - Self::Static(s) => { - // If we'd previously pushed static bytes, we instead concatenate those bytes - // with the new bytes in an attempt to use less memory rather than committing 2 - // Bytes references (2 * 4 usizes). - let v = [s, bytes].concat(); - *self = Self::Owned(v); - } Self::Owned(v) => v.extend(bytes), } } - /// Pushes static lifetime bytes, but only if the current representation is - /// None. Else, it coverts to an Owned. - fn push_static_bytes(&mut self, bytes: &'static [u8]) { - debug_assert!(!bytes.is_empty(), "must not push empty uncommitted bytes"); - match self { - // If we've not already pushed static bytes, we attempt to store the bytes for later. If - // we push owned bytes or another static bytes, then this attempt will fail and we'll - // instead concatenate into a single owned Bytes. But if we don't push anything (build - // the Rope), or concatenate another Rope (we can't join our bytes with the InnerRope of - // another Rope), we'll be able to commit a static Bytes reference and save overall - // memory (a small static Bytes reference is better than a small owned Bytes reference). - Self::None => *self = Self::Static(bytes), - _ => self.push_bytes(bytes), - } - } - - /// Converts the current uncommitted bytes into a Bytes, resetting our + /// Converts the current uncommitted bytes into a `Cow<'static, [u8]>`, resetting our /// representation to None. - fn finish(&mut self) -> Option { + fn finish(&mut self) -> Option> { match mem::take(self) { Self::None => None, - Self::Static(s) => Some(Bytes::from_static(s)), Self::Owned(mut v) => { v.shrink_to_fit(); Some(v.into()) @@ -336,14 +304,7 @@ impl fmt::Debug for Uncommitted { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Uncommitted::None => f.write_str("None"), - Uncommitted::Static(s) => f - .debug_tuple("Static") - .field(&Bytes::from_static(s)) - .finish(), - Uncommitted::Owned(v) => f - .debug_tuple("Owned") - .field(&Bytes::from(v.clone())) - .finish(), + Uncommitted::Owned(v) => f.debug_tuple("Owned").field(&v).finish(), } } } @@ -362,8 +323,7 @@ impl Serialize for Rope { /// deserialization won't deduplicate and share the Arcs (being the only /// possible owner of a individual "shared" data doesn't make sense). fn serialize(&self, serializer: S) -> Result { - use serde::ser::Error; - let bytes = self.to_bytes().map_err(Error::custom)?; + let bytes = self.to_bytes(); match bytes { Cow::Borrowed(b) => serde_bytes::Bytes::new(b).serialize(serializer), Cow::Owned(b) => ByteBuf::from(b).serialize(serializer), @@ -523,24 +483,45 @@ impl InnerRope { } /// Returns a slice of all bytes. - fn to_bytes(&self, len: usize) -> Result> { + fn to_bytes(&self, len: usize) -> Cow<'_, [u8]> { match &self[..] { - [] => Ok(Cow::Borrowed(EMPTY_BUF)), + [] => Cow::Borrowed(EMPTY_BUF), [Shared(inner)] => inner.to_bytes(len), - [Local(bytes)] => Ok(Cow::Borrowed(bytes)), + [Local(bytes)] => Cow::Borrowed(bytes), _ => { let mut read = RopeReader::new(self, 0); let mut buf = Vec::with_capacity(len); - read.read_to_end(&mut buf)?; - Ok(Cow::Owned(buf)) + read.read_to_end(&mut buf) + .expect("read of rope cannot fail"); + Cow::Owned(buf) } } } + + fn into_bytes(mut self, len: usize) -> Cow<'static, [u8]> { + if self.0.is_empty() { + return Cow::Borrowed(EMPTY_BUF); + } else if self.0.len() == 1 { + let data = Arc::try_unwrap(self.0); + match data { + Ok(data) => return data.into_iter().next().unwrap().into_bytes(len), + Err(data) => { + self.0 = data; + } + } + } + + let mut read = RopeReader::new(&self, 0); + let mut buf = Vec::with_capacity(len); + read.read_to_end(&mut buf) + .expect("read of rope cannot fail"); + Cow::Owned(buf) + } } impl Default for InnerRope { fn default() -> Self { - InnerRope(Arc::new([]).unsize(Coercion::to_slice())) + InnerRope(Arc::new(vec![])) } } @@ -579,7 +560,7 @@ impl From> for InnerRope { } impl Deref for InnerRope { - type Target = Arc<[RopeElem]>; + type Target = Arc>; fn deref(&self) -> &Self::Target { &self.0 @@ -610,6 +591,13 @@ impl RopeElem { _ => None, } } + + fn into_bytes(self, len: usize) -> Cow<'static, [u8]> { + match self { + Local(bytes) => bytes, + Shared(inner) => inner.into_bytes(len), + } + } } impl DeterministicHash for RopeElem { @@ -637,7 +625,7 @@ pub struct RopeReader { /// continue onto the next item in the stack. #[derive(Debug)] enum StackElem { - Local(Bytes), + Local(Cursor>), Shared(InnerRope, usize), } @@ -663,11 +651,14 @@ impl RopeReader { Some(b) => b, }; - let amount = min(bytes.len(), remaining); + let pos = bytes.position() as usize; + let len = bytes.get_ref().len() - pos; + + let amount = min(len, remaining); - buf.put_slice(&bytes[0..amount]); + buf.put_slice(&bytes.get_ref()[pos..pos + amount]); - if amount < bytes.len() { + if amount < len { bytes.advance(amount); self.stack.push(StackElem::Local(bytes)) } @@ -679,7 +670,7 @@ impl RopeReader { } impl Iterator for RopeReader { - type Item = Bytes; + type Item = Cursor>; fn next(&mut self) -> Option { // Iterates the rope's elements recursively until we find the next Local @@ -688,7 +679,7 @@ impl Iterator for RopeReader { let (inner, mut index) = match self.stack.pop() { None => return None, Some(StackElem::Local(b)) => { - debug_assert!(!b.is_empty(), "must not have empty Bytes section"); + debug_assert!(!b.get_ref().is_empty(), "must not have empty Bytes section"); return Some(b); } Some(StackElem::Shared(r, i)) => (r, i), @@ -740,12 +731,12 @@ impl BufRead for RopeReader { unreachable!() }; - Ok(bytes) + Ok(&bytes.get_ref()[bytes.position() as usize..]) } fn consume(&mut self, amt: usize) { if let Some(StackElem::Local(b)) = self.stack.last_mut() { - if amt == b.len() { + if amt == b.get_ref().len() - b.position() as usize { self.stack.pop(); } else { // Consume some amount of bytes from the current Bytes instance, ensuring @@ -759,7 +750,7 @@ impl BufRead for RopeReader { impl Stream for RopeReader { // The Result item type is required for this to be streamable into a // [Hyper::Body]. - type Item = Result; + type Item = Result>>; // Returns a "result" of reading the next shared bytes reference. This // differs from [Read::read] by not copying any memory. @@ -772,7 +763,7 @@ impl Stream for RopeReader { impl From for StackElem { fn from(el: RopeElem) -> Self { match el { - Local(bytes) => Self::Local(bytes), + Local(bytes) => Self::Local(Cursor::new(bytes)), Shared(inner) => Self::Shared(inner, 0), } } @@ -794,7 +785,7 @@ mod test { // in order to fully test cases. impl From<&str> for RopeElem { fn from(value: &str) -> Self { - RopeElem::Local(value.to_string().into()) + RopeElem::Local(value.to_string().into_bytes().into()) } } impl From> for RopeElem { @@ -977,7 +968,10 @@ mod test { let shared = Rope::from("def"); let rope = Rope::new(vec!["abc".into(), shared.into(), "ghi".into()]); - let chunks = rope.read().collect::>(); + let chunks = rope + .read() + .map(|v| String::from_utf8(v.into_inner().into_owned()).unwrap()) + .collect::>(); assert_eq!(chunks, vec!["abc", "def", "ghi"]); } @@ -1045,7 +1039,7 @@ mod test { #[test] fn test_to_bytes() -> Result<()> { let rope = Rope::from("abc"); - assert_eq!(rope.to_bytes()?, Cow::Borrowed::<[u8]>(&[0x61, 0x62, 0x63])); + assert_eq!(rope.to_bytes(), Cow::Borrowed::<[u8]>(&[0x61, 0x62, 0x63])); Ok(()) } } diff --git a/turbopack/crates/turbopack-browser/src/ecmascript/content.rs b/turbopack/crates/turbopack-browser/src/ecmascript/content.rs index d970d1eaf17df..70c1f7a6647f6 100644 --- a/turbopack/crates/turbopack-browser/src/ecmascript/content.rs +++ b/turbopack/crates/turbopack-browser/src/ecmascript/content.rs @@ -128,7 +128,7 @@ impl EcmascriptBrowserChunkContent { let mut code = code.build(); if let MinifyType::Minify { mangle } = this.chunking_context.await?.minify_type() { - code = minify(&code, source_maps, mangle)?; + code = minify(code, source_maps, mangle)?; } Ok(code.cell()) diff --git a/turbopack/crates/turbopack-browser/src/ecmascript/evaluate/chunk.rs b/turbopack/crates/turbopack-browser/src/ecmascript/evaluate/chunk.rs index 62bd90851cfb1..7875ae02968cf 100644 --- a/turbopack/crates/turbopack-browser/src/ecmascript/evaluate/chunk.rs +++ b/turbopack/crates/turbopack-browser/src/ecmascript/evaluate/chunk.rs @@ -194,7 +194,7 @@ impl EcmascriptBrowserEvaluateChunk { let mut code = code.build(); if let MinifyType::Minify { mangle } = this.chunking_context.await?.minify_type() { - code = minify(&code, source_maps, mangle)?; + code = minify(code, source_maps, mangle)?; } Ok(code.cell()) diff --git a/turbopack/crates/turbopack-core/src/code_builder.rs b/turbopack/crates/turbopack-core/src/code_builder.rs index 92504dd214ded..693d84a7af3db 100644 --- a/turbopack/crates/turbopack-core/src/code_builder.rs +++ b/turbopack/crates/turbopack-core/src/code_builder.rs @@ -34,6 +34,11 @@ impl Code { pub fn has_source_map(&self) -> bool { !self.mappings.is_empty() } + + /// Take the source code out of the Code. + pub fn into_source_code(self) -> Rope { + self.code + } } /// CodeBuilder provides a mutable container to append source code. diff --git a/turbopack/crates/turbopack-dev-server/src/http.rs b/turbopack/crates/turbopack-dev-server/src/http.rs index 5a76680edf3e2..f83576491440e 100644 --- a/turbopack/crates/turbopack-dev-server/src/http.rs +++ b/turbopack/crates/turbopack-dev-server/src/http.rs @@ -190,7 +190,7 @@ pub async fn process_request_with_content_source( hyper::header::HeaderValue::try_from(content.len().to_string())?, ); - response.body(hyper::Body::wrap_stream(content.read()))? + response.body(hyper::Body::wrap_stream(ReaderStream::new(content.read())))? }; return Ok((response, side_effects)); diff --git a/turbopack/crates/turbopack-ecmascript/src/minify.rs b/turbopack/crates/turbopack-ecmascript/src/minify.rs index a7e85af651ba1..bae7e8f0a8432 100644 --- a/turbopack/crates/turbopack-ecmascript/src/minify.rs +++ b/turbopack/crates/turbopack-ecmascript/src/minify.rs @@ -31,17 +31,17 @@ use turbopack_core::{ use crate::parse::generate_js_source_map; #[instrument(level = Level::INFO, skip_all)] -pub fn minify(code: &Code, source_maps: bool, mangle: Option) -> Result { +pub fn minify(code: Code, source_maps: bool, mangle: Option) -> Result { let source_maps = source_maps .then(|| code.generate_source_map_ref()) .transpose()?; + let source_code = code.into_source_code().into_bytes().into_owned(); + let source_code = String::from_utf8(source_code)?; + let cm = Arc::new(SwcSourceMap::new(FilePathMapping::empty())); let (src, mut src_map_buf) = { - let fm = cm.new_source_file( - FileName::Anon.into(), - code.source_code().to_str()?.into_owned(), - ); + let fm = cm.new_source_file(FileName::Anon.into(), source_code); let lexer = Lexer::new( Syntax::default(), @@ -57,10 +57,7 @@ pub fn minify(code: &Code, source_maps: bool, mangle: Option) -> Res Ok(program) => program, Err(err) => { err.into_diagnostic(handler).emit(); - bail!( - "failed to parse source code\n{}", - code.source_code().to_str()? - ) + bail!("failed to parse source code\n{}", fm.src) } }; let comments = SingleThreadedComments::default(); diff --git a/turbopack/crates/turbopack-image/src/process/mod.rs b/turbopack/crates/turbopack-image/src/process/mod.rs index 07ea6599c926d..48a7b45f5deda 100644 --- a/turbopack/crates/turbopack-image/src/process/mod.rs +++ b/turbopack/crates/turbopack-image/src/process/mod.rs @@ -347,7 +347,7 @@ pub async fn get_meta_data( let FileContent::Content(content) = &*content.await? else { bail!("Input image not found"); }; - let bytes = content.content().to_bytes()?; + let bytes = content.content().to_bytes(); let path_resolved = ident.path().to_resolved().await?; let path = path_resolved.await?; let extension = path.extension_ref(); @@ -430,7 +430,7 @@ pub async fn optimize( let FileContent::Content(content) = &*content.await? else { return Ok(FileContent::NotFound.cell()); }; - let bytes = content.content().to_bytes()?; + let bytes = content.content().to_bytes(); let path = ident.path().to_resolved().await?; let Some((image, format)) = load_image(path, &bytes, ident.path().await?.extension_ref()) diff --git a/turbopack/crates/turbopack-node/src/transforms/webpack.rs b/turbopack/crates/turbopack-node/src/transforms/webpack.rs index 58839b773ce45..31dece53d5f54 100644 --- a/turbopack/crates/turbopack-node/src/transforms/webpack.rs +++ b/turbopack/crates/turbopack-node/src/transforms/webpack.rs @@ -230,7 +230,7 @@ impl WebpackLoadersProcessedAsset { "binary".to_string(), JsonValue::from( base64::engine::general_purpose::STANDARD - .encode(file_content.content().to_bytes().unwrap()), + .encode(file_content.content().to_bytes()), ), )))), }; diff --git a/turbopack/crates/turbopack-nodejs/src/ecmascript/node/content.rs b/turbopack/crates/turbopack-nodejs/src/ecmascript/node/content.rs index 1c38092470959..079f9ba6bcc0a 100644 --- a/turbopack/crates/turbopack-nodejs/src/ecmascript/node/content.rs +++ b/turbopack/crates/turbopack-nodejs/src/ecmascript/node/content.rs @@ -78,7 +78,7 @@ impl EcmascriptBuildNodeChunkContent { let mut code = code.build(); if let MinifyType::Minify { mangle } = this.chunking_context.await?.minify_type() { - code = minify(&code, source_maps, mangle)?; + code = minify(code, source_maps, mangle)?; } Ok(code.cell()) diff --git a/turbopack/crates/turbopack-wasm/src/analysis.rs b/turbopack/crates/turbopack-wasm/src/analysis.rs index 1e405d60b7815..cca2e4f23debf 100644 --- a/turbopack/crates/turbopack-wasm/src/analysis.rs +++ b/turbopack/crates/turbopack-wasm/src/analysis.rs @@ -29,7 +29,7 @@ pub(crate) async fn analyze(source: Vc) -> Result