Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Several fixes for the mdbook-spec preprocessor. #61

Merged
merged 7 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mdbook-spec/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions mdbook-spec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ license = "MIT OR Apache-2.0"
[dependencies]
anyhow = "1.0.79"
mdbook = { version = "0.4.36", default-features = false }
once_cell = "1.19.0"
pathdiff = "0.2.1"
regex = "1.10.3"
semver = "1.0.21"
Expand Down
193 changes: 31 additions & 162 deletions mdbook-spec/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,24 @@ use mdbook::book::{Book, Chapter};
use mdbook::errors::Error;
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext};
use mdbook::BookItem;
use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use semver::{Version, VersionReq};
use std::collections::BTreeMap;
use std::fmt::Write as _;
use std::fs;
use std::io::{self, Write as _};
use std::io;
use std::path::PathBuf;
use std::process::{self, Command};
use std::process;

mod std_links;

/// The Regex for rules like `r[foo]`.
static RULE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^r\[([^]]+)]$").unwrap());

/// The Regex for the syntax for blockquotes that have a specific CSS class,
/// like `> [!WARNING]`.
static ADMONITION_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^ *> \[!(?<admon>[^]]+)\]\n(?<blockquote>(?: *> .*\n)+)").unwrap()
});

fn main() {
let mut args = std::env::args().skip(1);
Expand Down Expand Up @@ -56,41 +66,15 @@ fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> {
}

struct Spec {
/// Whether or not warnings should be errors (set by SPEC_DENY_WARNINGS
/// environment variable).
deny_warnings: bool,
rule_re: Regex,
admonition_re: Regex,
std_link_re: Regex,
std_link_extract_re: Regex,
}

impl Spec {
pub fn new() -> Spec {
// This is roughly a rustdoc intra-doc link definition.
let std_link = r"(?: [a-z]+@ )?
(?: std|core|alloc|proc_macro|test )
(?: ::[A-Za-z_!:<>{}()\[\]]+ )?";
Spec {
deny_warnings: std::env::var("SPEC_DENY_WARNINGS").as_deref() == Ok("1"),
rule_re: Regex::new(r"(?m)^r\[([^]]+)]$").unwrap(),
admonition_re: Regex::new(
r"(?m)^ *> \[!(?<admon>[^]]+)\]\n(?<blockquote>(?: *> .*\n)+)",
)
.unwrap(),
std_link_re: Regex::new(&format!(
r"(?x)
(?:
( \[`[^`]+`\] ) \( ({std_link}) \)
)
| (?:
( \[`{std_link}`\] )
)
"
))
.unwrap(),
std_link_extract_re: Regex::new(
r#"<li><a [^>]*href="(https://doc.rust-lang.org/[^"]+)""#,
)
.unwrap(),
}
}

Expand All @@ -103,7 +87,7 @@ impl Spec {
) -> String {
let source_path = chapter.source_path.clone().unwrap_or_default();
let path = chapter.path.clone().unwrap_or_default();
self.rule_re
RULE_RE
.replace_all(&chapter.content, |caps: &Captures| {
let rule_id = &caps[1];
if let Some((old, _)) =
Expand Down Expand Up @@ -165,7 +149,7 @@ impl Spec {
/// be a CSS class is valid. The actual styling needs to be added in a CSS
/// file.
fn admonitions(&self, chapter: &Chapter) -> String {
self.admonition_re
ADMONITION_RE
.replace_all(&chapter.content, |caps: &Captures| {
let lower = caps["admon"].to_lowercase();
format!(
Expand All @@ -175,122 +159,6 @@ impl Spec {
})
.to_string()
}

/// Converts links to the standard library to the online documentation in
/// a fashion similar to rustdoc intra-doc links.
fn std_links(&self, chapter: &Chapter) -> String {
// This is very hacky, but should work well enough.
//
// Collect all standard library links.
//
// links are tuples of ("[`std::foo`]", None) for links without dest,
// or ("[`foo`]", "std::foo") with a dest.
let mut links: Vec<_> = self
.std_link_re
.captures_iter(&chapter.content)
.map(|cap| {
if let Some(no_dest) = cap.get(3) {
(no_dest.as_str(), None)
} else {
(
cap.get(1).unwrap().as_str(),
Some(cap.get(2).unwrap().as_str()),
)
}
})
.collect();
if links.is_empty() {
return chapter.content.clone();
}
links.sort();
links.dedup();

// Write a Rust source file to use with rustdoc to generate intra-doc links.
let tmp = tempfile::TempDir::with_prefix("mdbook-spec-").unwrap();
let src_path = tmp.path().join("a.rs");
// Allow redundant since there could some in-scope things that are
// technically not necessary, but we don't care about (like
// [`Option`](std::option::Option)).
let mut src = format!(
"#![deny(rustdoc::broken_intra_doc_links)]\n\
#![allow(rustdoc::redundant_explicit_links)]\n"
);
for (link, dest) in &links {
write!(src, "//! - {link}").unwrap();
if let Some(dest) = dest {
write!(src, "({})", dest).unwrap();
}
src.push('\n');
}
writeln!(
src,
"extern crate alloc;\n\
extern crate proc_macro;\n\
extern crate test;\n"
)
.unwrap();
fs::write(&src_path, &src).unwrap();
let output = Command::new("rustdoc")
.arg("--edition=2021")
.arg(&src_path)
.current_dir(tmp.path())
.output()
.expect("rustdoc installed");
if !output.status.success() {
eprintln!(
"error: failed to extract std links ({:?}) in chapter {} ({:?})\n",
output.status,
chapter.name,
chapter.source_path.as_ref().unwrap()
);
io::stderr().write_all(&output.stderr).unwrap();
process::exit(1);
}

// Extract the links from the generated html.
let generated =
fs::read_to_string(tmp.path().join("doc/a/index.html")).expect("index.html generated");
let urls: Vec<_> = self
.std_link_extract_re
.captures_iter(&generated)
.map(|cap| cap.get(1).unwrap().as_str())
.collect();
if urls.len() != links.len() {
eprintln!(
"error: expected rustdoc to generate {} links, but found {} in chapter {} ({:?})",
links.len(),
urls.len(),
chapter.name,
chapter.source_path.as_ref().unwrap()
);
process::exit(1);
}

// Replace any disambiguated links with just the disambiguation.
let mut output = self
.std_link_re
.replace_all(&chapter.content, |caps: &Captures| {
if let Some(dest) = caps.get(2) {
// Replace destination parenthesis with a link definition (square brackets).
format!("{}[{}]", &caps[1], dest.as_str())
} else {
caps[0].to_string()
}
})
.to_string();

// Append the link definitions to the bottom of the chapter.
write!(output, "\n").unwrap();
for ((link, dest), url) in links.iter().zip(urls) {
if let Some(dest) = dest {
write!(output, "[{dest}]: {url}\n").unwrap();
} else {
write!(output, "{link}: {url}\n").unwrap();
}
}

output
}
}

impl Preprocessor for Spec {
Expand All @@ -300,27 +168,28 @@ impl Preprocessor for Spec {

fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
let mut found_rules = BTreeMap::new();
for section in &mut book.sections {
let BookItem::Chapter(ch) = section else {
continue;
book.for_each_mut(|item| {
let BookItem::Chapter(ch) = item else {
return;
};
if ch.is_draft_chapter() {
continue;
return;
}
ch.content = self.rule_definitions(&ch, &mut found_rules);
ch.content = self.admonitions(&ch);
ch.content = self.std_links(&ch);
}
for section in &mut book.sections {
let BookItem::Chapter(ch) = section else {
continue;
ch.content = std_links::std_links(&ch);
});
// This is a separate pass because it relies on the modifications of
// the previous passes.
book.for_each_mut(|item| {
let BookItem::Chapter(ch) = item else {
return;
};
if ch.is_draft_chapter() {
continue;
return;
}
ch.content = self.auto_link_references(&ch, &found_rules);
}

});
Ok(book)
}
}
Loading