Skip to content

Commit 2975083

Browse files
authored
Rollup merge of rust-lang#60971 - rbtcollins:docs-perf, r=rbtcollins,GuillaumeGomez
Add DocFS layer to rustdoc * Move fs::create_dir_all calls into DocFS to provide a clean extension point if async extension there is needed. * Convert callsites of create_dir_all to ensure_dir to reduce syscalls. * Convert fs::write usage to DocFS.write (which also removes a lot of try_err! usage for easier reading) * Convert File::create calls to use Vec buffers and then DocFS.write in order to both consistently reduce syscalls as well as make deferring to threads cleaner. * Convert OpenOptions usage similarly - I could find no discussion on the use of create_new for that one output file vs all the other files render creates, if link redirection attacks are a concern DocFS will provide a good central point to introduce systematic create_new usage. * DocFS::write defers to rayon for IO on Windows producing a modest speedup: before this patch on my development workstation: $ time cargo +mystg1 doc -p winapi:0.3.7 Documenting winapi v0.3.7 Finished dev [unoptimized + debuginfo] target(s) in 6m 11s real 6m11.734s user 0m0.015s sys 0m0.000s Afterwards: $ time cargo +mystg1 doc -p winapi:0.3.7 Compiling winapi v0.3.7 Documenting winapi v0.3.7 Finished dev [unoptimized + debuginfo] target(s) in 49.53s real 0m49.643s user 0m0.000s sys 0m0.015s I haven't measured how much time is in the compilation logic vs in the IO and outputting etc, but this takes it from frustating to tolerable for me, at least for now.
2 parents 38cd948 + 65f1295 commit 2975083

File tree

5 files changed

+286
-137
lines changed

5 files changed

+286
-137
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -3254,6 +3254,7 @@ dependencies = [
32543254
"minifier 0.0.30 (registry+https://github.com/rust-lang/crates.io-index)",
32553255
"parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
32563256
"pulldown-cmark 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
3257+
"rustc-rayon 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
32573258
"tempfile 3.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
32583259
]
32593260

src/librustdoc/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ path = "lib.rs"
1111
[dependencies]
1212
pulldown-cmark = { version = "0.5.2", default-features = false }
1313
minifier = "0.0.30"
14+
rayon = { version = "0.2.0", package = "rustc-rayon" }
1415
tempfile = "3"
1516
parking_lot = "0.7"

src/librustdoc/docfs.rs

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
//! Rustdoc's FileSystem abstraction module.
2+
//!
3+
//! On Windows this indirects IO into threads to work around performance issues
4+
//! with Defender (and other similar virus scanners that do blocking operations).
5+
//! On other platforms this is a thin shim to fs.
6+
//!
7+
//! Only calls needed to permit this workaround have been abstracted: thus
8+
//! fs::read is still done directly via the fs module; if in future rustdoc
9+
//! needs to read-after-write from a file, then it would be added to this
10+
//! abstraction.
11+
12+
use errors;
13+
14+
use std::fs;
15+
use std::io;
16+
use std::path::Path;
17+
use std::sync::Arc;
18+
use std::sync::mpsc::{channel, Receiver, Sender};
19+
20+
macro_rules! try_err {
21+
($e:expr, $file:expr) => {{
22+
match $e {
23+
Ok(e) => e,
24+
Err(e) => return Err(E::new(e, $file)),
25+
}
26+
}};
27+
}
28+
29+
pub trait PathError {
30+
fn new<P: AsRef<Path>>(e: io::Error, path: P) -> Self;
31+
}
32+
33+
pub struct ErrorStorage {
34+
sender: Option<Sender<Option<String>>>,
35+
receiver: Receiver<Option<String>>,
36+
}
37+
38+
impl ErrorStorage {
39+
pub fn new() -> ErrorStorage {
40+
let (sender, receiver) = channel();
41+
ErrorStorage {
42+
sender: Some(sender),
43+
receiver,
44+
}
45+
}
46+
47+
/// Prints all stored errors. Returns the number of printed errors.
48+
pub fn write_errors(&mut self, diag: &errors::Handler) -> usize {
49+
let mut printed = 0;
50+
// In order to drop the sender part of the channel.
51+
self.sender = None;
52+
53+
for msg in self.receiver.iter() {
54+
if let Some(ref error) = msg {
55+
diag.struct_err(&error).emit();
56+
printed += 1;
57+
}
58+
}
59+
printed
60+
}
61+
}
62+
63+
pub struct DocFS {
64+
sync_only: bool,
65+
errors: Arc<ErrorStorage>,
66+
}
67+
68+
impl DocFS {
69+
pub fn new(errors: &Arc<ErrorStorage>) -> DocFS {
70+
DocFS {
71+
sync_only: false,
72+
errors: Arc::clone(errors),
73+
}
74+
}
75+
76+
pub fn set_sync_only(&mut self, sync_only: bool) {
77+
self.sync_only = sync_only;
78+
}
79+
80+
pub fn create_dir_all<P: AsRef<Path>>(&self, path: P) -> io::Result<()> {
81+
// For now, dir creation isn't a huge time consideration, do it
82+
// synchronously, which avoids needing ordering between write() actions
83+
// and directory creation.
84+
fs::create_dir_all(path)
85+
}
86+
87+
pub fn write<P, C, E>(&self, path: P, contents: C) -> Result<(), E>
88+
where
89+
P: AsRef<Path>,
90+
C: AsRef<[u8]>,
91+
E: PathError,
92+
{
93+
if !self.sync_only && cfg!(windows) {
94+
// A possible future enhancement after more detailed profiling would
95+
// be to create the file sync so errors are reported eagerly.
96+
let contents = contents.as_ref().to_vec();
97+
let path = path.as_ref().to_path_buf();
98+
let sender = self.errors.sender.clone().unwrap();
99+
rayon::spawn(move || {
100+
match fs::write(&path, &contents) {
101+
Ok(_) => {
102+
sender.send(None)
103+
.expect(&format!("failed to send error on \"{}\"", path.display()));
104+
}
105+
Err(e) => {
106+
sender.send(Some(format!("\"{}\": {}", path.display(), e)))
107+
.expect(&format!("failed to send non-error on \"{}\"", path.display()));
108+
}
109+
}
110+
});
111+
Ok(())
112+
} else {
113+
Ok(try_err!(fs::write(&path, contents), path))
114+
}
115+
}
116+
}

0 commit comments

Comments
 (0)