Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Several fixes for the mdbook-spec preprocessor. #61

Merged
merged 7 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Move regular expressions to be global statics.
This just makes it a little easier to refer to them.
  • Loading branch information
ehuss committed Jun 27, 2024
commit 499b96b3437a4b20e5d5bde92e03554230dd22e1
1 change: 1 addition & 0 deletions mdbook-spec/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions mdbook-spec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ license = "MIT OR Apache-2.0"
[dependencies]
anyhow = "1.0.79"
mdbook = { version = "0.4.36", default-features = false }
once_cell = "1.19.0"
pathdiff = "0.2.1"
regex = "1.10.3"
semver = "1.0.21"
Expand Down
78 changes: 42 additions & 36 deletions mdbook-spec/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use mdbook::book::{Book, Chapter};
use mdbook::errors::Error;
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext};
use mdbook::BookItem;
use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use semver::{Version, VersionReq};
use std::collections::BTreeMap;
Expand All @@ -11,6 +12,40 @@ use std::io::{self, Write as _};
use std::path::PathBuf;
use std::process::{self, Command};

/// The Regex for rules like `r[foo]`.
static RULE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^r\[([^]]+)]$").unwrap());

/// The Regex for the syntax for blockquotes that have a specific CSS class,
/// like `> [!WARNING]`.
static ADMONITION_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^ *> \[!(?<admon>[^]]+)\]\n(?<blockquote>(?: *> .*\n)+)").unwrap()
});

/// A markdown link (without the brackets) that might possibly be a link to
/// the standard library using rustdoc's intra-doc notation.
const STD_LINK: &str = r"(?: [a-z]+@ )?
(?: std|core|alloc|proc_macro|test )
(?: ::[A-Za-z_!:<>{}()\[\]]+ )?";

/// The Regex for a markdown link that might be a link to the standard library.
static STD_LINK_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(&format!(
r"(?x)
(?:
( \[`[^`]+`\] ) \( ({STD_LINK}) \)
)
| (?:
( \[`{STD_LINK}`\] )
)
"
))
.unwrap()
});

/// The Regex used to extract the std links from the HTML generated by rustdoc.
static STD_LINK_EXTRACT_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"<li><a [^>]*href="(https://doc.rust-lang.org/[^"]+)""#).unwrap());

fn main() {
let mut args = std::env::args().skip(1);
match args.next().as_deref() {
Expand Down Expand Up @@ -56,41 +91,15 @@ fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> {
}

struct Spec {
/// Whether or not warnings should be errors (set by SPEC_DENY_WARNINGS
/// environment variable).
deny_warnings: bool,
rule_re: Regex,
admonition_re: Regex,
std_link_re: Regex,
std_link_extract_re: Regex,
}

impl Spec {
pub fn new() -> Spec {
// This is roughly a rustdoc intra-doc link definition.
let std_link = r"(?: [a-z]+@ )?
(?: std|core|alloc|proc_macro|test )
(?: ::[A-Za-z_!:<>{}()\[\]]+ )?";
Spec {
deny_warnings: std::env::var("SPEC_DENY_WARNINGS").as_deref() == Ok("1"),
rule_re: Regex::new(r"(?m)^r\[([^]]+)]$").unwrap(),
admonition_re: Regex::new(
r"(?m)^ *> \[!(?<admon>[^]]+)\]\n(?<blockquote>(?: *> .*\n)+)",
)
.unwrap(),
std_link_re: Regex::new(&format!(
r"(?x)
(?:
( \[`[^`]+`\] ) \( ({std_link}) \)
)
| (?:
( \[`{std_link}`\] )
)
"
))
.unwrap(),
std_link_extract_re: Regex::new(
r#"<li><a [^>]*href="(https://doc.rust-lang.org/[^"]+)""#,
)
.unwrap(),
}
}

Expand All @@ -103,7 +112,7 @@ impl Spec {
) -> String {
let source_path = chapter.source_path.clone().unwrap_or_default();
let path = chapter.path.clone().unwrap_or_default();
self.rule_re
RULE_RE
.replace_all(&chapter.content, |caps: &Captures| {
let rule_id = &caps[1];
if let Some((old, _)) =
Expand Down Expand Up @@ -165,7 +174,7 @@ impl Spec {
/// be a CSS class is valid. The actual styling needs to be added in a CSS
/// file.
fn admonitions(&self, chapter: &Chapter) -> String {
self.admonition_re
ADMONITION_RE
.replace_all(&chapter.content, |caps: &Captures| {
let lower = caps["admon"].to_lowercase();
format!(
Expand All @@ -185,8 +194,7 @@ impl Spec {
//
// links are tuples of ("[`std::foo`]", None) for links without dest,
// or ("[`foo`]", "std::foo") with a dest.
let mut links: Vec<_> = self
.std_link_re
let mut links: Vec<_> = STD_LINK_RE
.captures_iter(&chapter.content)
.map(|cap| {
if let Some(no_dest) = cap.get(3) {
Expand Down Expand Up @@ -250,8 +258,7 @@ impl Spec {
// Extract the links from the generated html.
let generated =
fs::read_to_string(tmp.path().join("doc/a/index.html")).expect("index.html generated");
let urls: Vec<_> = self
.std_link_extract_re
let urls: Vec<_> = STD_LINK_EXTRACT_RE
.captures_iter(&generated)
.map(|cap| cap.get(1).unwrap().as_str())
.collect();
Expand All @@ -267,8 +274,7 @@ impl Spec {
}

// Replace any disambiguated links with just the disambiguation.
let mut output = self
.std_link_re
let mut output = STD_LINK_RE
.replace_all(&chapter.content, |caps: &Captures| {
if let Some(dest) = caps.get(2) {
// Replace destination parenthesis with a link definition (square brackets).
Expand Down