@@ -8,6 +8,8 @@ use regex::Regex;
88use pulldown_cmark:: { html, CowStr , Event , Options , Parser , Tag } ;
99
1010use std:: borrow:: Cow ;
11+ use std:: fmt:: Write ;
12+ use std:: path:: Path ;
1113
1214pub use self :: string:: take_lines;
1315
@@ -65,20 +67,47 @@ pub fn id_from_content(content: &str) -> String {
6567 normalize_id ( trimmed)
6668}
6769
68- fn adjust_links < ' a > ( event : Event < ' a > , with_base : & str ) -> Event < ' a > {
70+ /// Fix links to the correct location.
71+ ///
72+ /// This adjusts links, such as turning `.md` extensions to `.html`.
73+ ///
74+ /// `path` is the path to the page being rendered relative to the root of the
75+ /// book. This is used for the `print.html` page so that links on the print
76+ /// page go to the original location. Normal page rendering sets `path` to
77+ /// None. Ideally, print page links would link to anchors on the print page,
78+ /// but that is very difficult.
79+ fn adjust_links < ' a > ( event : Event < ' a > , path : Option < & Path > ) -> Event < ' a > {
6980 lazy_static ! {
7081 static ref SCHEME_LINK : Regex = Regex :: new( r"^[a-z][a-z0-9+.-]*:" ) . unwrap( ) ;
7182 static ref MD_LINK : Regex = Regex :: new( r"(?P<link>.*)\.md(?P<anchor>#.*)?" ) . unwrap( ) ;
7283 }
7384
74- fn fix < ' a > ( dest : CowStr < ' a > , base : & str ) -> CowStr < ' a > {
85+ fn fix < ' a > ( dest : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
86+ if dest. starts_with ( '#' ) {
87+ // Fragment-only link.
88+ if let Some ( path) = path {
89+ let mut base = path. display ( ) . to_string ( ) ;
90+ if base. ends_with ( ".md" ) {
91+ base. replace_range ( base. len ( ) - 3 .., ".html" ) ;
92+ }
93+ return format ! ( "{}{}" , base, dest) . into ( ) ;
94+ } else {
95+ return dest;
96+ }
97+ }
7598 // Don't modify links with schemes like `https`.
7699 if !SCHEME_LINK . is_match ( & dest) {
77100 // This is a relative link, adjust it as necessary.
78101 let mut fixed_link = String :: new ( ) ;
79- if !base. is_empty ( ) {
80- fixed_link. push_str ( base) ;
81- fixed_link. push_str ( "/" ) ;
102+ if let Some ( path) = path {
103+ let base = path
104+ . parent ( )
105+ . expect ( "path can't be empty" )
106+ . to_str ( )
107+ . expect ( "utf-8 paths only" ) ;
108+ if !base. is_empty ( ) {
109+ write ! ( fixed_link, "{}/" , base) . unwrap ( ) ;
110+ }
82111 }
83112
84113 if let Some ( caps) = MD_LINK . captures ( & dest) {
@@ -95,20 +124,45 @@ fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> {
95124 dest
96125 }
97126
127+ fn fix_html < ' a > ( html : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
128+ // This is a terrible hack, but should be reasonably reliable. Nobody
129+ // should ever parse a tag with a regex. However, there isn't anything
130+ // in Rust that I know of that is suitable for handling partial html
131+ // fragments like those generated by pulldown_cmark.
132+ //
133+ // There are dozens of HTML tags/attributes that contain paths, so
134+ // feel free to add more tags if desired; these are the only ones I
135+ // care about right now.
136+ lazy_static ! {
137+ static ref HTML_LINK : Regex =
138+ Regex :: new( r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""# ) . unwrap( ) ;
139+ }
140+
141+ HTML_LINK
142+ . replace_all ( & html, |caps : & regex:: Captures < ' _ > | {
143+ let fixed = fix ( caps[ 2 ] . into ( ) , path) ;
144+ format ! ( "{}{}\" " , & caps[ 1 ] , fixed)
145+ } )
146+ . into_owned ( )
147+ . into ( )
148+ }
149+
98150 match event {
99151 Event :: Start ( Tag :: Link ( link_type, dest, title) ) => {
100- Event :: Start ( Tag :: Link ( link_type, fix ( dest, with_base ) , title) )
152+ Event :: Start ( Tag :: Link ( link_type, fix ( dest, path ) , title) )
101153 }
102154 Event :: Start ( Tag :: Image ( link_type, dest, title) ) => {
103- Event :: Start ( Tag :: Image ( link_type, fix ( dest, with_base ) , title) )
155+ Event :: Start ( Tag :: Image ( link_type, fix ( dest, path ) , title) )
104156 }
157+ Event :: Html ( html) => Event :: Html ( fix_html ( html, path) ) ,
158+ Event :: InlineHtml ( html) => Event :: InlineHtml ( fix_html ( html, path) ) ,
105159 _ => event,
106160 }
107161}
108162
109163/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
110164pub fn render_markdown ( text : & str , curly_quotes : bool ) -> String {
111- render_markdown_with_base ( text, curly_quotes, "" )
165+ render_markdown_with_path ( text, curly_quotes, None )
112166}
113167
114168pub fn new_cmark_parser ( text : & str ) -> Parser < ' _ > {
@@ -120,13 +174,13 @@ pub fn new_cmark_parser(text: &str) -> Parser<'_> {
120174 Parser :: new_ext ( text, opts)
121175}
122176
123- pub fn render_markdown_with_base ( text : & str , curly_quotes : bool , base : & str ) -> String {
177+ pub fn render_markdown_with_path ( text : & str , curly_quotes : bool , path : Option < & Path > ) -> String {
124178 let mut s = String :: with_capacity ( text. len ( ) * 3 / 2 ) ;
125179 let p = new_cmark_parser ( text) ;
126180 let mut converter = EventQuoteConverter :: new ( curly_quotes) ;
127181 let events = p
128182 . map ( clean_codeblock_headers)
129- . map ( |event| adjust_links ( event, base ) )
183+ . map ( |event| adjust_links ( event, path ) )
130184 . map ( |event| converter. convert ( event) ) ;
131185
132186 html:: push_html ( & mut s, events) ;
0 commit comments