From 513cf8664ab074d0fa540f1e1661193aeff0d358 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Thu, 4 Aug 2022 12:13:16 -0700 Subject: [PATCH 1/3] rustdoc: use a more compact encoding for implementors/trait.*.js The exact amount that this reduces the size of an implementors file depends on whether most of the impls are synthetic or not. For `Send`, it reduces the file from 128K to 116K, while for `Clone` it went from 64K to 52K. --- src/librustdoc/html/render/write_shared.rs | 90 ++++++++++++++++++---- src/librustdoc/html/static/js/main.js | 14 +++- 2 files changed, 83 insertions(+), 21 deletions(-) diff --git a/src/librustdoc/html/render/write_shared.rs b/src/librustdoc/html/render/write_shared.rs index 6fb41ff327916..246121bd7e3e1 100644 --- a/src/librustdoc/html/render/write_shared.rs +++ b/src/librustdoc/html/render/write_shared.rs @@ -1,5 +1,4 @@ use std::ffi::OsStr; -use std::fmt::Write; use std::fs::{self, File}; use std::io::prelude::*; use std::io::{self, BufReader}; @@ -10,7 +9,6 @@ use std::sync::LazyLock as Lazy; use itertools::Itertools; use rustc_data_structures::flock; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; -use serde::Serialize; use super::{collect_paths_for_type, ensure_trailing_slash, Context, BASIC_KEYWORDS}; use crate::clean::Crate; @@ -284,25 +282,43 @@ pub(super) fn write_shared( cx.write_shared(SharedResource::Unversioned { name }, contents, &options.emit)?; } - fn collect(path: &Path, krate: &str, key: &str) -> io::Result<(Vec, Vec)> { + /// Read a file and return all lines that match the `"{crate}":{data},` format, + /// and return a tuple `(Vec, Vec)`. + /// + /// This forms the payload of files that look like this: + /// + /// ```javascript + /// var data = { + /// "{crate1}":{data}, + /// "{crate2}":{data} + /// }; + /// use_data(data); + /// ``` + /// + /// The file needs to be formatted so that *only crate data lines start with `"`*. + fn collect(path: &Path, krate: &str) -> io::Result<(Vec, Vec)> { let mut ret = Vec::new(); let mut krates = Vec::new(); if path.exists() { - let prefix = format!(r#"{}["{}"]"#, key, krate); + let prefix = format!("\"{}\"", krate); for line in BufReader::new(File::open(path)?).lines() { let line = line?; - if !line.starts_with(key) { + if !line.starts_with('"') { continue; } if line.starts_with(&prefix) { continue; } - ret.push(line.to_string()); + if line.ends_with(",") { + ret.push(line[..line.len() - 1].to_string()); + } else { + // No comma (it's the case for the last added crate line) + ret.push(line.to_string()); + } krates.push( - line[key.len() + 2..] - .split('"') - .next() + line.split('"') + .find(|s| !s.is_empty()) .map(|s| s.to_owned()) .unwrap_or_else(String::new), ); @@ -311,6 +327,20 @@ pub(super) fn write_shared( Ok((ret, krates)) } + /// Read a file and return all lines that match the "{crate}":{data},\ format, + /// and return a tuple `(Vec, Vec)`. + /// + /// This forms the payload of files that look like this: + /// + /// ```javascript + /// var data = JSON.parse('{\ + /// "{crate1}":{data},\ + /// "{crate2}":{data}\ + /// }'); + /// use_data(data); + /// ``` + /// + /// The file needs to be formatted so that *only crate data lines start with `"`*. fn collect_json(path: &Path, krate: &str) -> io::Result<(Vec, Vec)> { let mut ret = Vec::new(); let mut krates = Vec::new(); @@ -526,13 +556,40 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; }, }; - #[derive(Serialize)] struct Implementor { text: String, synthetic: bool, types: Vec, } + impl Implementor { + fn to_js_string(&self) -> String { + fn single_quote_string(s: &str) -> String { + let mut result = String::with_capacity(s.len() + 2); + result.push_str("'"); + for c in s.chars() { + if c == '"' { + result.push_str("\""); + } else { + result.extend(c.escape_default()); + } + } + result.push_str("'"); + result + } + let text_esc = single_quote_string(&self.text); + if self.synthetic { + let types = self.types.iter().map(|type_| single_quote_string(type_)).join(","); + // use `1` to represent a synthetic, because it's fewer bytes than `true` + format!("[{text_esc},1,[{types}]]") + } else { + // The types list is only used for synthetic impls. + // If this changes, `main.js` and `write_shared.rs` both need changed. + format!("[{text_esc}]") + } + } + } + let implementors = imps .iter() .filter_map(|imp| { @@ -563,9 +620,9 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; } let implementors = format!( - r#"implementors["{}"] = {};"#, + r#""{}":[{}]"#, krate.name(cx.tcx()), - serde_json::to_string(&implementors).unwrap() + implementors.iter().map(Implementor::to_js_string).join(",") ); let mut mydst = dst.clone(); @@ -576,16 +633,15 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; mydst.push(&format!("{}.{}.js", remote_item_type, remote_path[remote_path.len() - 1])); let (mut all_implementors, _) = - try_err!(collect(&mydst, krate.name(cx.tcx()).as_str(), "implementors"), &mydst); + try_err!(collect(&mydst, krate.name(cx.tcx()).as_str()), &mydst); all_implementors.push(implementors); // Sort the implementors by crate so the file will be generated // identically even with rustdoc running in parallel. all_implementors.sort(); - let mut v = String::from("(function() {var implementors = {};\n"); - for implementor in &all_implementors { - writeln!(v, "{}", *implementor).unwrap(); - } + let mut v = String::from("(function() {var implementors = {\n"); + v.push_str(&all_implementors.join(",\n")); + v.push_str("\n};"); v.push_str( "if (window.register_implementors) {\ window.register_implementors(implementors);\ diff --git a/src/librustdoc/html/static/js/main.js b/src/librustdoc/html/static/js/main.js index 0702b2b0b7caf..2e05c4be2f3e9 100644 --- a/src/librustdoc/html/static/js/main.js +++ b/src/librustdoc/html/static/js/main.js @@ -501,6 +501,10 @@ function loadCss(cssFileName) { const synthetic_implementors = document.getElementById("synthetic-implementors-list"); const inlined_types = new Set(); + const TEXT_IDX = 0; + const SYNTHETIC_IDX = 1; + const TYPES_IDX = 2; + if (synthetic_implementors) { // This `inlined_types` variable is used to avoid having the same implementation // showing up twice. For example "String" in the "Sync" doc page. @@ -536,10 +540,12 @@ function loadCss(cssFileName) { struct_loop: for (const struct of structs) { - const list = struct.synthetic ? synthetic_implementors : implementors; + const list = struct[SYNTHETIC_IDX] ? synthetic_implementors : implementors; - if (struct.synthetic) { - for (const struct_type of struct.types) { + // The types list is only used for synthetic impls. + // If this changes, `main.js` and `write_shared.rs` both need changed. + if (struct[SYNTHETIC_IDX]) { + for (const struct_type of struct[TYPES_IDX]) { if (inlined_types.has(struct_type)) { continue struct_loop; } @@ -548,7 +554,7 @@ function loadCss(cssFileName) { } const code = document.createElement("h3"); - code.innerHTML = struct.text; + code.innerHTML = struct[TEXT_IDX]; addClass(code, "code-header"); addClass(code, "in-band"); From dddc2fd6de3db11eeeb244e0ac5754c49f9ad11b Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 5 Aug 2022 08:56:09 -0700 Subject: [PATCH 2/3] rustdoc: reduce the number of intermediate Strings allocated --- src/librustdoc/html/format.rs | 2 +- src/librustdoc/html/render/write_shared.rs | 33 ++++++++++++++-------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/librustdoc/html/format.rs b/src/librustdoc/html/format.rs index 36a47b05cb9d6..3dee4d1acc819 100644 --- a/src/librustdoc/html/format.rs +++ b/src/librustdoc/html/format.rs @@ -152,7 +152,7 @@ impl Buffer { } } -fn comma_sep( +pub(crate) fn comma_sep( items: impl Iterator, space_after_comma: bool, ) -> impl fmt::Display { diff --git a/src/librustdoc/html/render/write_shared.rs b/src/librustdoc/html/render/write_shared.rs index 246121bd7e3e1..0b0f5056019bd 100644 --- a/src/librustdoc/html/render/write_shared.rs +++ b/src/librustdoc/html/render/write_shared.rs @@ -1,4 +1,5 @@ use std::ffi::OsStr; +use std::fmt; use std::fs::{self, File}; use std::io::prelude::*; use std::io::{self, BufReader}; @@ -563,7 +564,7 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; } impl Implementor { - fn to_js_string(&self) -> String { + fn to_js_string(&self) -> impl fmt::Display + '_ { fn single_quote_string(s: &str) -> String { let mut result = String::with_capacity(s.len() + 2); result.push_str("'"); @@ -577,16 +578,21 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; result.push_str("'"); result } - let text_esc = single_quote_string(&self.text); - if self.synthetic { - let types = self.types.iter().map(|type_| single_quote_string(type_)).join(","); - // use `1` to represent a synthetic, because it's fewer bytes than `true` - format!("[{text_esc},1,[{types}]]") - } else { - // The types list is only used for synthetic impls. - // If this changes, `main.js` and `write_shared.rs` both need changed. - format!("[{text_esc}]") - } + crate::html::format::display_fn(|f| { + let text_esc = single_quote_string(&self.text); + if self.synthetic { + let types = crate::html::format::comma_sep( + self.types.iter().map(|type_| single_quote_string(type_)), + false, + ); + // use `1` to represent a synthetic, because it's fewer bytes than `true` + write!(f, "[{text_esc},1,[{types}]]") + } else { + // The types list is only used for synthetic impls. + // If this changes, `main.js` and `write_shared.rs` both need changed. + write!(f, "[{text_esc}]") + } + }) } } @@ -622,7 +628,10 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; let implementors = format!( r#""{}":[{}]"#, krate.name(cx.tcx()), - implementors.iter().map(Implementor::to_js_string).join(",") + crate::html::format::comma_sep( + implementors.iter().map(Implementor::to_js_string), + false + ) ); let mut mydst = dst.clone(); From fc31fce670ad76db14044a519a15870347253766 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 5 Aug 2022 16:36:47 -0700 Subject: [PATCH 3/3] rustdoc: use serde, which can escape strings more quickly This means we don't gain as much as we did from using single-quotes, since serde_json can only produce double-quoted strings, but it's still a win. --- src/librustdoc/html/render/write_shared.rs | 50 +++++++--------------- 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/src/librustdoc/html/render/write_shared.rs b/src/librustdoc/html/render/write_shared.rs index 0b0f5056019bd..f9abb46207d74 100644 --- a/src/librustdoc/html/render/write_shared.rs +++ b/src/librustdoc/html/render/write_shared.rs @@ -1,5 +1,4 @@ use std::ffi::OsStr; -use std::fmt; use std::fs::{self, File}; use std::io::prelude::*; use std::io::{self, BufReader}; @@ -10,6 +9,8 @@ use std::sync::LazyLock as Lazy; use itertools::Itertools; use rustc_data_structures::flock; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use serde::ser::SerializeSeq; +use serde::{Serialize, Serializer}; use super::{collect_paths_for_type, ensure_trailing_slash, Context, BASIC_KEYWORDS}; use crate::clean::Crate; @@ -563,36 +564,18 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; types: Vec, } - impl Implementor { - fn to_js_string(&self) -> impl fmt::Display + '_ { - fn single_quote_string(s: &str) -> String { - let mut result = String::with_capacity(s.len() + 2); - result.push_str("'"); - for c in s.chars() { - if c == '"' { - result.push_str("\""); - } else { - result.extend(c.escape_default()); - } - } - result.push_str("'"); - result + impl Serialize for Implementor { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut seq = serializer.serialize_seq(None)?; + seq.serialize_element(&self.text)?; + if self.synthetic { + seq.serialize_element(&1)?; + seq.serialize_element(&self.types)?; } - crate::html::format::display_fn(|f| { - let text_esc = single_quote_string(&self.text); - if self.synthetic { - let types = crate::html::format::comma_sep( - self.types.iter().map(|type_| single_quote_string(type_)), - false, - ); - // use `1` to represent a synthetic, because it's fewer bytes than `true` - write!(f, "[{text_esc},1,[{types}]]") - } else { - // The types list is only used for synthetic impls. - // If this changes, `main.js` and `write_shared.rs` both need changed. - write!(f, "[{text_esc}]") - } - }) + seq.end() } } @@ -626,12 +609,9 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; } let implementors = format!( - r#""{}":[{}]"#, + r#""{}":{}"#, krate.name(cx.tcx()), - crate::html::format::comma_sep( - implementors.iter().map(Implementor::to_js_string), - false - ) + serde_json::to_string(&implementors).expect("failed serde conversion"), ); let mut mydst = dst.clone();