Skip to content

Commit

Permalink
fix: fix EPUB transformation of HTML elements spanning multiple blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
max-heller committed Jun 28, 2024
1 parent d130982 commit 604812a
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 6 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ aho-corasick = "1.0.0"
anyhow = "1.0.47"
env_logger = "0.11.0"
genawaiter = { version = "0.99.1", default-features = false }
html5gum = "0.5.7"
log = "0.4.0"
mdbook = { version = "0.4.35", default-features = false }
normpath = "1.0.0"
Expand Down
57 changes: 57 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,63 @@ fn main() {}
"###);
}

#[test]
fn matched_html_tags() {
let output = MDBook::init()
.config(Config::pandoc())
.chapter(Chapter::new(
"Chapter",
"
<details>
<summary>
## Heading
text
</summary>
<p>
more **markdown**
</p>
</details>
outside divs
",
"chapter.md",
))
.build();
insta::assert_snapshot!(output, @r###"
├─ log output
│ INFO mdbook::book: Running the pandoc backend
│ INFO mdbook_pandoc::pandoc::renderer: Wrote output to book/markdown/pandoc-ir
├─ markdown/pandoc-ir
│ [ RawBlock (Format "html") "<details>\n<summary>\n"
│ , Div
│ ( "" , [ "details" ] , [] )
│ [ Div
│ ( "" , [ "summary" ] , [] )
│ [ Header
│ 2
│ ( "book__markdown__src__chaptermd__heading"
│ , [ "unnumbered" , "unlisted" ]
│ , []
│ )
│ [ Str "Heading" ]
│ , Para [ Str "text" ]
│ ]
│ , RawBlock (Format "html") "</summary>\n<p>\n"
│ , Div
│ ( "" , [ "p" ] , [] )
│ [ Para [ Str "more" , Space , Strong [ Str "markdown" ] ] ]
│ ]
│ , RawBlock (Format "html") "</p>\n</details>\n"
│ , Para [ Str "outside" , Space , Str "divs" ]
│ ]
"###);
}

#[test]
/// Respect enabled/disabled extensions in Pandoc's `from` option
fn extension_overrides() {
Expand Down
3 changes: 3 additions & 0 deletions src/pandoc/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub enum Extension {
Attributes,
GfmAutoIdentifiers,
RawAttribute,
FencedDivs,
// TODO: pandoc's `rebase_relative_paths` extension works for Markdown links and images,
// but not for raw HTML links and images. Switch if/when pandoc supports HTML as well.
/// Treat paths as relative to the chapter containing them
Expand All @@ -27,6 +28,7 @@ impl Extension {
Extension::Attributes => "attributes",
Extension::GfmAutoIdentifiers => "gfm_auto_identifiers",
Extension::RawAttribute => "raw_attribute",
Extension::FencedDivs => "fenced_divs",
Extension::RebaseRelativePaths => "rebase_relative_paths",
}
}
Expand All @@ -41,6 +43,7 @@ impl Extension {
Extension::Attributes => (2, 10, 1),
Extension::GfmAutoIdentifiers => (2, 0, 0),
Extension::RawAttribute => (2, 10, 1),
Extension::FencedDivs => (2, 0, 0),
Extension::RebaseRelativePaths => (2, 14, 0),
};
Version {
Expand Down
75 changes: 69 additions & 6 deletions src/preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,7 @@ struct PreprocessChapter<'book, 'preprocessor> {
parser: Peekable<pulldown_cmark::OffsetIter<'book, pulldown_cmark::DefaultBrokenLinkCallback>>,
matching_tags: Vec<pulldown_cmark::TagEnd>,
encountered_h1: bool,
open_html_tags: Vec<html5gum::HtmlString>,
}

impl<'book, 'preprocessor> PreprocessChapter<'book, 'preprocessor> {
Expand All @@ -697,6 +698,7 @@ impl<'book, 'preprocessor> PreprocessChapter<'book, 'preprocessor> {
.peekable(),
matching_tags: Default::default(),
encountered_h1: false,
open_html_tags: Vec::new(),
}
}

Expand Down Expand Up @@ -1036,8 +1038,10 @@ impl<'book, 'preprocessor> PreprocessChapter<'book, 'preprocessor> {
// Actually consume the item from the iterator
self.parser.next();
}
html = self.preprocess_contiguous_html(html);
Event::Html(html)
for event in self.preprocess_contiguous_html(html, Event::Html) {
co.yield_((event, None)).await
}
continue 'events;
}
Event::InlineHtml(mut html) => {
while let Some((Event::InlineHtml(more), _)) = self.parser.peek() {
Expand All @@ -1047,8 +1051,10 @@ impl<'book, 'preprocessor> PreprocessChapter<'book, 'preprocessor> {
// Actually consume the item from the iterator
self.parser.next();
}
html = self.preprocess_contiguous_html(html);
Event::InlineHtml(html)
for event in self.preprocess_contiguous_html(html, Event::InlineHtml) {
co.yield_((event, None)).await
}
continue 'events;
}
Event::TaskListMarker(checked) => {
(self.preprocessor.ctx.pandoc).enable_extension(pandoc::Extension::TaskLists);
Expand All @@ -1060,7 +1066,11 @@ impl<'book, 'preprocessor> PreprocessChapter<'book, 'preprocessor> {
}
}

fn preprocess_contiguous_html(&mut self, mut html: CowStr<'book>) -> CowStr<'book> {
fn preprocess_contiguous_html(
&mut self,
mut html: CowStr<'book>,
wrap_html: impl FnOnce(CowStr<'book>) -> pulldown_cmark::Event,
) -> impl Iterator<Item = pulldown_cmark::Event<'book>> + '_ {
if let OutputFormat::Latex { packages } = &mut self.preprocessor.ctx.output {
static FONT_AWESOME_ICON: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<i\s+class\s*=\s*"fa fa-(?P<icon>.*?)"(>\s*</i>|/>)"#).unwrap()
Expand All @@ -1078,7 +1088,60 @@ impl<'book, 'preprocessor> PreprocessChapter<'book, 'preprocessor> {
};
}
}
html
let already_open_tags = self.open_html_tags.len();
let mut still_open_tags = self.open_html_tags.len();
for node in html5gum::Tokenizer::new(html.as_ref()).infallible() {
match node {
html5gum::Token::StartTag(start) => {
self.open_html_tags.push(start.name);
}
html5gum::Token::EndTag(end) => match self.open_html_tags.last() {
Some(tag) if *tag == end.name => {
self.open_html_tags.pop();
still_open_tags = still_open_tags.min(self.open_html_tags.len());
}
_ => {}
},
_ => {}
}
}
use pulldown_cmark::Event;
let mut fenced_divs_available = || {
self.preprocessor
.ctx
.pandoc
.enable_extension(pandoc::Extension::FencedDivs)
.is_available()
};
let close_divs = {
let closed_tags = already_open_tags - still_open_tags;
(closed_tags > 0 && fenced_divs_available())
.then(|| {
iter::once(Event::Text("\n\n".into()))
.chain((0..closed_tags).map(|_| Event::Text(":::\n\n".into())))
.chain(iter::once(Event::Text("\n\n".into())))
})
.into_iter()
.flatten()
};
let open_divs = {
let opened_tags = &self.open_html_tags[still_open_tags..];
(!opened_tags.is_empty() && fenced_divs_available())
.then(|| {
iter::once(Event::Text("\n\n".into()))
.chain(opened_tags.iter().map(|tag| {
Event::Text(
format!("::: {}\n\n", String::from_utf8_lossy(&tag.0)).into(),
)
}))
.chain(iter::once(Event::Text("\n\n".into())))
})
.into_iter()
.flatten()
};
close_divs
.chain(iter::once(wrap_html(html)))
.chain(open_divs)
}
}

Expand Down

0 comments on commit 604812a

Please sign in to comment.