Fix even more print page links. (#963)

ehuss · Dylan-DPC · commit 228e99ba116e · 2019-07-01T17:52:25.000+02:00
diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs
@@ -33,12 +33,10 @@ impl HtmlHandlebars {
             let content = ch.content.clone();
             let content = utils::render_markdown(&content, ctx.html_config.curly_quotes);
 
-            let string_path = ch.path.parent().unwrap().display().to_string();
-
-            let fixed_content = utils::render_markdown_with_base(
+            let fixed_content = utils::render_markdown_with_path(
                 &ch.content,
                 ctx.html_config.curly_quotes,
-                &string_path,
+                Some(&ch.path),
             );
             print_content.push_str(&fixed_content);
 
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
@@ -8,6 +8,8 @@ use regex::Regex;
 use pulldown_cmark::{html, CowStr, Event, Options, Parser, Tag};
 
 use std::borrow::Cow;
+use std::fmt::Write;
+use std::path::Path;
 
 pub use self::string::take_lines;
 
@@ -65,20 +67,47 @@ pub fn id_from_content(content: &str) -> String {
     normalize_id(trimmed)
 }
 
-fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> {
+/// Fix links to the correct location.
+///
+/// This adjusts links, such as turning `.md` extensions to `.html`.
+///
+/// `path` is the path to the page being rendered relative to the root of the
+/// book. This is used for the `print.html` page so that links on the print
+/// page go to the original location. Normal page rendering sets `path` to
+/// None. Ideally, print page links would link to anchors on the print page,
+/// but that is very difficult.
+fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
     lazy_static! {
         static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
         static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
     }
 
-    fn fix<'a>(dest: CowStr<'a>, base: &str) -> CowStr<'a> {
+    fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
+        if dest.starts_with('#') {
+            // Fragment-only link.
+            if let Some(path) = path {
+                let mut base = path.display().to_string();
+                if base.ends_with(".md") {
+                    base.replace_range(base.len() - 3.., ".html");
+                }
+                return format!("{}{}", base, dest).into();
+            } else {
+                return dest;
+            }
+        }
         // Don't modify links with schemes like `https`.
         if !SCHEME_LINK.is_match(&dest) {
             // This is a relative link, adjust it as necessary.
             let mut fixed_link = String::new();
-            if !base.is_empty() {
-                fixed_link.push_str(base);
-                fixed_link.push_str("/");
+            if let Some(path) = path {
+                let base = path
+                    .parent()
+                    .expect("path can't be empty")
+                    .to_str()
+                    .expect("utf-8 paths only");
+                if !base.is_empty() {
+                    write!(fixed_link, "{}/", base).unwrap();
+                }
             }
 
             if let Some(caps) = MD_LINK.captures(&dest) {
@@ -95,20 +124,45 @@ fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> {
         dest
     }
 
+    fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
+        // This is a terrible hack, but should be reasonably reliable. Nobody
+        // should ever parse a tag with a regex. However, there isn't anything
+        // in Rust that I know of that is suitable for handling partial html
+        // fragments like those generated by pulldown_cmark.
+        //
+        // There are dozens of HTML tags/attributes that contain paths, so
+        // feel free to add more tags if desired; these are the only ones I
+        // care about right now.
+        lazy_static! {
+            static ref HTML_LINK: Regex =
+                Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
+        }
+
+        HTML_LINK
+            .replace_all(&html, |caps: &regex::Captures<'_>| {
+                let fixed = fix(caps[2].into(), path);
+                format!("{}{}\"", &caps[1], fixed)
+            })
+            .into_owned()
+            .into()
+    }
+
     match event {
         Event::Start(Tag::Link(link_type, dest, title)) => {
-            Event::Start(Tag::Link(link_type, fix(dest, with_base), title))
+            Event::Start(Tag::Link(link_type, fix(dest, path), title))
         }
         Event::Start(Tag::Image(link_type, dest, title)) => {
-            Event::Start(Tag::Image(link_type, fix(dest, with_base), title))
+            Event::Start(Tag::Image(link_type, fix(dest, path), title))
         }
+        Event::Html(html) => Event::Html(fix_html(html, path)),
+        Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
         _ => event,
     }
 }
 
 /// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
 pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
-    render_markdown_with_base(text, curly_quotes, "")
+    render_markdown_with_path(text, curly_quotes, None)
 }
 
 pub fn new_cmark_parser(text: &str) -> Parser<'_> {
@@ -120,13 +174,13 @@ pub fn new_cmark_parser(text: &str) -> Parser<'_> {
     Parser::new_ext(text, opts)
 }
 
-pub fn render_markdown_with_base(text: &str, curly_quotes: bool, base: &str) -> String {
+pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
     let mut s = String::with_capacity(text.len() * 3 / 2);
     let p = new_cmark_parser(text);
     let mut converter = EventQuoteConverter::new(curly_quotes);
     let events = p
         .map(clean_codeblock_headers)
-        .map(|event| adjust_links(event, base))
+        .map(|event| adjust_links(event, path))
         .map(|event| converter.convert(event));
 
     html::push_html(&mut s, events);
diff --git a/tests/dummy_book/src/second/nested.md b/tests/dummy_book/src/second/nested.md
@@ -3,6 +3,14 @@
 When we link to [the first section](../first/nested.md), it should work on
 both the print page and the non-print page.
 
+A [fragment link](#some-section) should work.
+
 Link [outside](../../std/foo/bar.html).
 
 ![Some image](../images/picture.png)
+
+<a href="../first/markdown.md">HTML Link</a>
+
+<img src="../images/picture.png" alt="raw html">
+
+## Some section
diff --git a/tests/rendered_output.rs b/tests/rendered_output.rs
@@ -124,6 +124,9 @@ fn check_correct_relative_links_in_print_page() {
             r##"<a href="second/../first/nested.html">the first section</a>,"##,
             r##"<a href="second/../../std/foo/bar.html">outside</a>"##,
             r##"<img src="second/../images/picture.png" alt="Some image" />"##,
+            r##"<a href="second/nested.html#some-section">fragment link</a>"##,
+            r##"<a href="second/../first/markdown.html">HTML Link</a>"##,
+            r##"<img src="second/../images/picture.png" alt="raw html">"##,
         ],
     );
 }
diff --git a/tests/searchindex_fixture.json b/tests/searchindex_fixture.json