-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Deny broken intra-doc links in linkchecker #77971
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,7 @@ edition = "2018" | |
[[bin]] | ||
name = "linkchecker" | ||
path = "main.rs" | ||
|
||
[dependencies] | ||
regex = "1" | ||
once_cell = "1" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,9 @@ use std::fs; | |
use std::path::{Component, Path, PathBuf}; | ||
use std::rc::Rc; | ||
|
||
use once_cell::sync::Lazy; | ||
use regex::Regex; | ||
|
||
use crate::Redirect::*; | ||
|
||
// Add linkcheck exceptions here | ||
|
@@ -50,6 +53,44 @@ const LINKCHECK_EXCEPTIONS: &[(&str, &[&str])] = &[ | |
("alloc/collections/btree_set/struct.BTreeSet.html", &["#insert-and-complex-keys"]), | ||
]; | ||
|
||
#[rustfmt::skip] | ||
const INTRA_DOC_LINK_EXCEPTIONS: &[(&str, &[&str])] = &[ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure you want to run this check on html? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, there's no way for the linkchecker to tell if a link will be resolved by rustdoc or not. It has to look at the HTML to see whether the markdown was transformed into a link or left as-is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That sounds really risky. :-/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what you mean by risky. What would be a better way? This inherently wants to look at the generated HTML, not the original, because you could have |
||
// This will never have links that are not in other pages. | ||
// To avoid repeating the exceptions twice, an empty list means all broken links are allowed. | ||
("reference/print.html", &[]), | ||
// All the reference 'links' are actually ENBF highlighted as code | ||
("reference/comments.html", &[ | ||
"/</code> <code>!", | ||
"*</code> <code>!", | ||
]), | ||
("reference/identifiers.html", &[ | ||
"a</code>-<code>z</code> <code>A</code>-<code>Z", | ||
"a</code>-<code>z</code> <code>A</code>-<code>Z</code> <code>0</code>-<code>9</code> <code>_", | ||
"a</code>-<code>z</code> <code>A</code>-<code>Z</code>] [<code>a</code>-<code>z</code> <code>A</code>-<code>Z</code> <code>0</code>-<code>9</code> <code>_", | ||
]), | ||
("reference/tokens.html", &[ | ||
"0</code>-<code>1", | ||
"0</code>-<code>7", | ||
"0</code>-<code>9", | ||
"0</code>-<code>9", | ||
"0</code>-<code>9</code> <code>a</code>-<code>f</code> <code>A</code>-<code>F", | ||
]), | ||
("reference/notation.html", &[ | ||
"b</code> <code>B", | ||
"a</code>-<code>z", | ||
]), | ||
// This is being used in the sense of 'inclusive range', not a markdown link | ||
("core/ops/struct.RangeInclusive.html", &["begin</code>, <code>end"]), | ||
("std/ops/struct.RangeInclusive.html", &["begin</code>, <code>end"]), | ||
("core/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]), | ||
("alloc/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]), | ||
("std/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]), | ||
|
||
]; | ||
|
||
static BROKEN_INTRA_DOC_LINK: Lazy<Regex> = | ||
Lazy::new(|| Regex::new(r#"\[<code>(.*)</code>\]"#).unwrap()); | ||
|
||
macro_rules! t { | ||
($e:expr) => { | ||
match $e { | ||
|
@@ -138,6 +179,14 @@ fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) { | |
} | ||
} | ||
|
||
fn is_intra_doc_exception(file: &Path, link: &str) -> bool { | ||
if let Some(entry) = INTRA_DOC_LINK_EXCEPTIONS.iter().find(|&(f, _)| file.ends_with(f)) { | ||
entry.1.is_empty() || entry.1.contains(&link) | ||
} else { | ||
false | ||
} | ||
} | ||
|
||
fn is_exception(file: &Path, link: &str) -> bool { | ||
if let Some(entry) = LINKCHECK_EXCEPTIONS.iter().find(|&(f, _)| file.ends_with(f)) { | ||
entry.1.contains(&link) | ||
|
@@ -292,6 +341,19 @@ fn check(cache: &mut Cache, root: &Path, file: &Path, errors: &mut bool) -> Opti | |
} | ||
} | ||
}); | ||
|
||
// Search for intra-doc links that rustdoc didn't warn about | ||
// FIXME(#77199, 77200) Rustdoc should just warn about these directly. | ||
// NOTE: only looks at one line at a time; in practice this should find most links | ||
for (i, line) in contents.lines().enumerate() { | ||
for broken_link in BROKEN_INTRA_DOC_LINK.captures_iter(line) { | ||
if !is_intra_doc_exception(file, &broken_link[1]) { | ||
*errors = true; | ||
print!("{}:{}: broken intra-doc link - ", pretty_file.display(), i + 1); | ||
println!("{}", &broken_link[0]); | ||
} | ||
} | ||
} | ||
Some(pretty_file) | ||
} | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.