Skip to content

Commit

Permalink
Merge pull request #61 from ehuss/mdbook-spec-fixes
Browse files Browse the repository at this point in the history
Several fixes for the mdbook-spec preprocessor.
  • Loading branch information
ehuss authored Jun 27, 2024
2 parents 46b05e8 + 59586c0 commit f1a2cac
Show file tree
Hide file tree
Showing 4 changed files with 247 additions and 162 deletions.
1 change: 1 addition & 0 deletions mdbook-spec/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions mdbook-spec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ license = "MIT OR Apache-2.0"
[dependencies]
anyhow = "1.0.79"
mdbook = { version = "0.4.36", default-features = false }
once_cell = "1.19.0"
pathdiff = "0.2.1"
regex = "1.10.3"
semver = "1.0.21"
Expand Down
193 changes: 31 additions & 162 deletions mdbook-spec/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,24 @@ use mdbook::book::{Book, Chapter};
use mdbook::errors::Error;
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext};
use mdbook::BookItem;
use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use semver::{Version, VersionReq};
use std::collections::BTreeMap;
use std::fmt::Write as _;
use std::fs;
use std::io::{self, Write as _};
use std::io;
use std::path::PathBuf;
use std::process::{self, Command};
use std::process;

mod std_links;

/// The Regex for rules like `r[foo]`.
static RULE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^r\[([^]]+)]$").unwrap());

/// The Regex for the syntax for blockquotes that have a specific CSS class,
/// like `> [!WARNING]`.
static ADMONITION_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^ *> \[!(?<admon>[^]]+)\]\n(?<blockquote>(?: *> .*\n)+)").unwrap()
});

fn main() {
let mut args = std::env::args().skip(1);
Expand Down Expand Up @@ -56,41 +66,15 @@ fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> {
}

struct Spec {
/// Whether or not warnings should be errors (set by SPEC_DENY_WARNINGS
/// environment variable).
deny_warnings: bool,
rule_re: Regex,
admonition_re: Regex,
std_link_re: Regex,
std_link_extract_re: Regex,
}

impl Spec {
pub fn new() -> Spec {
// This is roughly a rustdoc intra-doc link definition.
let std_link = r"(?: [a-z]+@ )?
(?: std|core|alloc|proc_macro|test )
(?: ::[A-Za-z_!:<>{}()\[\]]+ )?";
Spec {
deny_warnings: std::env::var("SPEC_DENY_WARNINGS").as_deref() == Ok("1"),
rule_re: Regex::new(r"(?m)^r\[([^]]+)]$").unwrap(),
admonition_re: Regex::new(
r"(?m)^ *> \[!(?<admon>[^]]+)\]\n(?<blockquote>(?: *> .*\n)+)",
)
.unwrap(),
std_link_re: Regex::new(&format!(
r"(?x)
(?:
( \[`[^`]+`\] ) \( ({std_link}) \)
)
| (?:
( \[`{std_link}`\] )
)
"
))
.unwrap(),
std_link_extract_re: Regex::new(
r#"<li><a [^>]*href="(https://doc.rust-lang.org/[^"]+)""#,
)
.unwrap(),
}
}

Expand All @@ -103,7 +87,7 @@ impl Spec {
) -> String {
let source_path = chapter.source_path.clone().unwrap_or_default();
let path = chapter.path.clone().unwrap_or_default();
self.rule_re
RULE_RE
.replace_all(&chapter.content, |caps: &Captures| {
let rule_id = &caps[1];
if let Some((old, _)) =
Expand Down Expand Up @@ -165,7 +149,7 @@ impl Spec {
/// be a CSS class is valid. The actual styling needs to be added in a CSS
/// file.
fn admonitions(&self, chapter: &Chapter) -> String {
self.admonition_re
ADMONITION_RE
.replace_all(&chapter.content, |caps: &Captures| {
let lower = caps["admon"].to_lowercase();
format!(
Expand All @@ -175,122 +159,6 @@ impl Spec {
})
.to_string()
}

/// Converts links to the standard library to the online documentation in
/// a fashion similar to rustdoc intra-doc links.
fn std_links(&self, chapter: &Chapter) -> String {
// This is very hacky, but should work well enough.
//
// Collect all standard library links.
//
// links are tuples of ("[`std::foo`]", None) for links without dest,
// or ("[`foo`]", "std::foo") with a dest.
let mut links: Vec<_> = self
.std_link_re
.captures_iter(&chapter.content)
.map(|cap| {
if let Some(no_dest) = cap.get(3) {
(no_dest.as_str(), None)
} else {
(
cap.get(1).unwrap().as_str(),
Some(cap.get(2).unwrap().as_str()),
)
}
})
.collect();
if links.is_empty() {
return chapter.content.clone();
}
links.sort();
links.dedup();

// Write a Rust source file to use with rustdoc to generate intra-doc links.
let tmp = tempfile::TempDir::with_prefix("mdbook-spec-").unwrap();
let src_path = tmp.path().join("a.rs");
// Allow redundant since there could some in-scope things that are
// technically not necessary, but we don't care about (like
// [`Option`](std::option::Option)).
let mut src = format!(
"#![deny(rustdoc::broken_intra_doc_links)]\n\
#![allow(rustdoc::redundant_explicit_links)]\n"
);
for (link, dest) in &links {
write!(src, "//! - {link}").unwrap();
if let Some(dest) = dest {
write!(src, "({})", dest).unwrap();
}
src.push('\n');
}
writeln!(
src,
"extern crate alloc;\n\
extern crate proc_macro;\n\
extern crate test;\n"
)
.unwrap();
fs::write(&src_path, &src).unwrap();
let output = Command::new("rustdoc")
.arg("--edition=2021")
.arg(&src_path)
.current_dir(tmp.path())
.output()
.expect("rustdoc installed");
if !output.status.success() {
eprintln!(
"error: failed to extract std links ({:?}) in chapter {} ({:?})\n",
output.status,
chapter.name,
chapter.source_path.as_ref().unwrap()
);
io::stderr().write_all(&output.stderr).unwrap();
process::exit(1);
}

// Extract the links from the generated html.
let generated =
fs::read_to_string(tmp.path().join("doc/a/index.html")).expect("index.html generated");
let urls: Vec<_> = self
.std_link_extract_re
.captures_iter(&generated)
.map(|cap| cap.get(1).unwrap().as_str())
.collect();
if urls.len() != links.len() {
eprintln!(
"error: expected rustdoc to generate {} links, but found {} in chapter {} ({:?})",
links.len(),
urls.len(),
chapter.name,
chapter.source_path.as_ref().unwrap()
);
process::exit(1);
}

// Replace any disambiguated links with just the disambiguation.
let mut output = self
.std_link_re
.replace_all(&chapter.content, |caps: &Captures| {
if let Some(dest) = caps.get(2) {
// Replace destination parenthesis with a link definition (square brackets).
format!("{}[{}]", &caps[1], dest.as_str())
} else {
caps[0].to_string()
}
})
.to_string();

// Append the link definitions to the bottom of the chapter.
write!(output, "\n").unwrap();
for ((link, dest), url) in links.iter().zip(urls) {
if let Some(dest) = dest {
write!(output, "[{dest}]: {url}\n").unwrap();
} else {
write!(output, "{link}: {url}\n").unwrap();
}
}

output
}
}

impl Preprocessor for Spec {
Expand All @@ -300,27 +168,28 @@ impl Preprocessor for Spec {

fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
let mut found_rules = BTreeMap::new();
for section in &mut book.sections {
let BookItem::Chapter(ch) = section else {
continue;
book.for_each_mut(|item| {
let BookItem::Chapter(ch) = item else {
return;
};
if ch.is_draft_chapter() {
continue;
return;
}
ch.content = self.rule_definitions(&ch, &mut found_rules);
ch.content = self.admonitions(&ch);
ch.content = self.std_links(&ch);
}
for section in &mut book.sections {
let BookItem::Chapter(ch) = section else {
continue;
ch.content = std_links::std_links(&ch);
});
// This is a separate pass because it relies on the modifications of
// the previous passes.
book.for_each_mut(|item| {
let BookItem::Chapter(ch) = item else {
return;
};
if ch.is_draft_chapter() {
continue;
return;
}
ch.content = self.auto_link_references(&ch, &found_rules);
}

});
Ok(book)
}
}
Loading

0 comments on commit f1a2cac

Please sign in to comment.