Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unclosed html tag lint #77119

Merged
merged 12 commits into from
Oct 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions compiler/rustc_lint/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ use rustc_middle::ty::query::Providers;
use rustc_middle::ty::TyCtxt;
use rustc_session::lint::builtin::{
BARE_TRAIT_OBJECTS, BROKEN_INTRA_DOC_LINKS, ELIDED_LIFETIMES_IN_PATHS,
EXPLICIT_OUTLIVES_REQUIREMENTS, INVALID_CODEBLOCK_ATTRIBUTES, MISSING_DOC_CODE_EXAMPLES,
PRIVATE_DOC_TESTS,
EXPLICIT_OUTLIVES_REQUIREMENTS, INVALID_CODEBLOCK_ATTRIBUTES, INVALID_HTML_TAGS,
MISSING_DOC_CODE_EXAMPLES, PRIVATE_DOC_TESTS,
};
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::Span;
Expand Down Expand Up @@ -308,7 +308,8 @@ fn register_builtins(store: &mut LintStore, no_interleave_lints: bool) {
PRIVATE_INTRA_DOC_LINKS,
INVALID_CODEBLOCK_ATTRIBUTES,
MISSING_DOC_CODE_EXAMPLES,
PRIVATE_DOC_TESTS
PRIVATE_DOC_TESTS,
INVALID_HTML_TAGS
);

// Register renamed and removed lints.
Expand Down
11 changes: 11 additions & 0 deletions compiler/rustc_session/src/lint/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1881,6 +1881,16 @@ declare_lint! {
"detects code samples in docs of private items not documented by rustdoc"
}

declare_lint! {
/// The `invalid_html_tags` lint detects invalid HTML tags. This is a
/// `rustdoc` only lint, see the documentation in the [rustdoc book].
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
///
/// [rustdoc book]: ../../../rustdoc/lints.html#invalid_html_tags
pub INVALID_HTML_TAGS,
Allow,
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
"detects invalid HTML tags in doc comments"
}

declare_lint! {
/// The `where_clauses_object_safety` lint detects for [object safety] of
/// [where clauses].
Expand Down Expand Up @@ -2699,6 +2709,7 @@ declare_lint_pass! {
INVALID_CODEBLOCK_ATTRIBUTES,
MISSING_CRATE_LEVEL_DOCS,
MISSING_DOC_CODE_EXAMPLES,
INVALID_HTML_TAGS,
PRIVATE_DOC_TESTS,
WHERE_CLAUSES_OBJECT_SAFETY,
PROC_MACRO_DERIVE_RESOLUTION_FALLBACK,
Expand Down
35 changes: 35 additions & 0 deletions src/doc/rustdoc/src/lints.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,38 @@ warning: unknown attribute `should-panic`. Did you mean `should_panic`?

In the example above, the correct form is `should_panic`. This helps detect
typo mistakes for some common attributes.

## invalid_html_tags

This lint is **allowed by default** and is **nightly-only**. It detects unclosed
or invalid HTML tags. For example:

```rust
#![warn(invalid_html_tags)]

/// <h1>
/// </script>
pub fn foo() {}
```

Which will give:

```text
warning: unopened HTML tag `script`
--> foo.rs:1:1
|
1 | / /// <h1>
2 | | /// </script>
| |_____________^
|
= note: `#[warn(invalid_html_tags)]` on by default

warning: unclosed HTML tag `h1`
--> foo.rs:1:1
|
1 | / /// <h1>
2 | | /// </script>
| |_____________^

warning: 2 warnings emitted
```
2 changes: 2 additions & 0 deletions src/librustdoc/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ pub fn run_core(
let private_doc_tests = rustc_lint::builtin::PRIVATE_DOC_TESTS.name;
let no_crate_level_docs = rustc_lint::builtin::MISSING_CRATE_LEVEL_DOCS.name;
let invalid_codeblock_attributes_name = rustc_lint::builtin::INVALID_CODEBLOCK_ATTRIBUTES.name;
let invalid_html_tags = rustc_lint::builtin::INVALID_HTML_TAGS.name;
let renamed_and_removed_lints = rustc_lint::builtin::RENAMED_AND_REMOVED_LINTS.name;
let unknown_lints = rustc_lint::builtin::UNKNOWN_LINTS.name;

Expand All @@ -340,6 +341,7 @@ pub fn run_core(
private_doc_tests.to_owned(),
no_crate_level_docs.to_owned(),
invalid_codeblock_attributes_name.to_owned(),
invalid_html_tags.to_owned(),
renamed_and_removed_lints.to_owned(),
unknown_lints.to_owned(),
];
Expand Down
2 changes: 1 addition & 1 deletion src/librustdoc/html/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ use pulldown_cmark::{html, BrokenLink, CodeBlockKind, CowStr, Event, Options, Pa
#[cfg(test)]
mod tests;

fn opts() -> Options {
pub(crate) fn opts() -> Options {
Options::ENABLE_TABLES | Options::ENABLE_FOOTNOTES | Options::ENABLE_STRIKETHROUGH
}

Expand Down
190 changes: 190 additions & 0 deletions src/librustdoc/passes/html_tags.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
use super::{span_of_attrs, Pass};
use crate::clean::*;
use crate::core::DocContext;
use crate::fold::DocFolder;
use crate::html::markdown::opts;
use core::ops::Range;
use pulldown_cmark::{Event, Parser};
use rustc_feature::UnstableFeatures;
use rustc_session::lint;

pub const CHECK_INVALID_HTML_TAGS: Pass = Pass {
name: "check-invalid-html-tags",
run: check_invalid_html_tags,
description: "detects invalid HTML tags in doc comments",
};

struct InvalidHtmlTagsLinter<'a, 'tcx> {
cx: &'a DocContext<'tcx>,
}

impl<'a, 'tcx> InvalidHtmlTagsLinter<'a, 'tcx> {
fn new(cx: &'a DocContext<'tcx>) -> Self {
InvalidHtmlTagsLinter { cx }
}
}

pub fn check_invalid_html_tags(krate: Crate, cx: &DocContext<'_>) -> Crate {
if !UnstableFeatures::from_environment().is_nightly_build() {
krate
} else {
let mut coll = InvalidHtmlTagsLinter::new(cx);

coll.fold_crate(krate)
}
}

const ALLOWED_UNCLOSED: &[&str] = &[
"area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
"source", "track", "wbr",
];

fn drop_tag(
tags: &mut Vec<(String, Range<usize>)>,
tag_name: String,
range: Range<usize>,
f: &impl Fn(&str, &Range<usize>),
) {
let tag_name_low = tag_name.to_lowercase();
if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) {
// If the tag is nested inside a "<script>" or a "<style>" tag, no warning should
// be emitted.
let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| {
let at = at.to_lowercase();
at == "script" || at == "style"
});
for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) {
if should_not_warn {
continue;
}
let last_tag_name_low = last_tag_name.to_lowercase();
if ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name_low) {
continue;
}
// `tags` is used as a queue, meaning that everything after `pos` is included inside it.
// So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still
// have `h3`, meaning the tag wasn't closed as it should have.
f(&format!("unclosed HTML tag `{}`", last_tag_name), &last_tag_span);
}
// Remove the `tag_name` that was originally closed
tags.pop();
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
} else {
// It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required
// but it helps for the visualization).
f(&format!("unopened HTML tag `{}`", tag_name), &range);
}
}

fn extract_tag(
tags: &mut Vec<(String, Range<usize>)>,
text: &str,
range: Range<usize>,
f: &impl Fn(&str, &Range<usize>),
) {
let mut iter = text.char_indices().peekable();

while let Some((start_pos, c)) = iter.next() {
if c == '<' {
let mut tag_name = String::new();
let mut is_closing = false;
let mut prev_pos = start_pos;
loop {
let (pos, c) = match iter.peek() {
Some((pos, c)) => (*pos, *c),
// In case we reached the of the doc comment, we want to check that it's an
// unclosed HTML tag. For example "/// <h3".
None => (prev_pos, '\0'),
};
prev_pos = pos;
// Checking if this is a closing tag (like `</a>` for `<a>`).
if c == '/' && tag_name.is_empty() {
jyn514 marked this conversation as resolved.
Show resolved Hide resolved
is_closing = true;
} else if c.is_ascii_alphanumeric() {
tag_name.push(c);
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
} else {
if !tag_name.is_empty() {
let mut r =
Range { start: range.start + start_pos, end: range.start + pos };
if c == '>' {
// In case we have a tag without attribute, we can consider the span to
// refer to it fully.
r.end += 1;
}
if is_closing {
// In case we have "</div >" or even "</div >".
if c != '>' {
if !c.is_whitespace() {
// It seems like it's not a valid HTML tag.
break;
}
GuillaumeGomez marked this conversation as resolved.
Show resolved Hide resolved
let mut found = false;
for (new_pos, c) in text[pos..].char_indices() {
if !c.is_whitespace() {
if c == '>' {
r.end = range.start + new_pos + 1;
found = true;
}
break;
}
Comment on lines +122 to +128
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would be clearer as an if-else:

Suggested change
if !c.is_whitespace() {
if c == '>' {
r.end = range.start + new_pos + 1;
found = true;
}
break;
}
if c == '>' {
r.end = range.start + new_pos + 1;
found = true;
break;
} else if !c.is_whitespace() {
break;
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure to understand in which it is more readable. :-/

In my code, if it's not a whitespace, then you enter the block and you check along the way if the character is >. It's not about optimization, I just find it more logical this way.

}
if !found {
break;
}
}
drop_tag(tags, tag_name, r, f);
} else {
tags.push((tag_name, r));
}
}
break;
}
iter.next();
}
}
}
}

impl<'a, 'tcx> DocFolder for InvalidHtmlTagsLinter<'a, 'tcx> {
fn fold_item(&mut self, item: Item) -> Option<Item> {
let hir_id = match self.cx.as_local_hir_id(item.def_id) {
Some(hir_id) => hir_id,
None => {
// If non-local, no need to check anything.
jyn514 marked this conversation as resolved.
Show resolved Hide resolved
return self.fold_item_recur(item);
}
};
let dox = item.attrs.collapsed_doc_value().unwrap_or_default();
if !dox.is_empty() {
let cx = &self.cx;
let report_diag = |msg: &str, range: &Range<usize>| {
let sp = match super::source_span_for_markdown_range(cx, &dox, range, &item.attrs) {
Some(sp) => sp,
None => span_of_attrs(&item.attrs).unwrap_or(item.source.span()),
};
cx.tcx.struct_span_lint_hir(lint::builtin::INVALID_HTML_TAGS, hir_id, sp, |lint| {
lint.build(msg).emit()
});
};

let mut tags = Vec::new();

let p = Parser::new_ext(&dox, opts()).into_offset_iter();

for (event, range) in p {
match event {
Event::Html(text) => extract_tag(&mut tags, &text, range, &report_diag),
_ => {}
}
}

for (tag, range) in tags.iter().filter(|(t, _)| {
let t = t.to_lowercase();
ALLOWED_UNCLOSED.iter().find(|&&at| at == t).is_none()
}) {
report_diag(&format!("unclosed HTML tag `{}`", tag), range);
}
}

self.fold_item_recur(item)
}
}
5 changes: 5 additions & 0 deletions src/librustdoc/passes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ pub use self::check_code_block_syntax::CHECK_CODE_BLOCK_SYNTAX;
mod calculate_doc_coverage;
pub use self::calculate_doc_coverage::CALCULATE_DOC_COVERAGE;

mod html_tags;
pub use self::html_tags::CHECK_INVALID_HTML_TAGS;

/// A single pass over the cleaned documentation.
///
/// Runs in the compiler context, so it has access to types and traits and the like.
Expand Down Expand Up @@ -87,6 +90,7 @@ pub const PASSES: &[Pass] = &[
CHECK_CODE_BLOCK_SYNTAX,
COLLECT_TRAIT_IMPLS,
CALCULATE_DOC_COVERAGE,
CHECK_INVALID_HTML_TAGS,
];

/// The list of passes run by default.
Expand All @@ -100,6 +104,7 @@ pub const DEFAULT_PASSES: &[ConditionalPass] = &[
ConditionalPass::new(STRIP_PRIV_IMPORTS, WhenDocumentPrivate),
ConditionalPass::always(COLLECT_INTRA_DOC_LINKS),
ConditionalPass::always(CHECK_CODE_BLOCK_SYNTAX),
ConditionalPass::always(CHECK_INVALID_HTML_TAGS),
ConditionalPass::always(PROPAGATE_DOC_CFG),
];

Expand Down
Loading