diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 324401be..17592ccb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -606,7 +606,7 @@ impl<'a, 'o> Parser<'a, 'o> { fence_length: matched, fence_offset: first_nonspace - offset, info: Vec::with_capacity(10), - literal: Vec::with_capacity(80), + literal: Vec::new(), }; *container = self.add_child(*container, NodeValue::CodeBlock(ncb), first_nonspace + 1); @@ -625,7 +625,7 @@ impl<'a, 'o> Parser<'a, 'o> { let offset = self.first_nonspace + 1; let nhb = NodeHtmlBlock { block_type: matched as u8, - literal: Vec::with_capacity(10), + literal: Vec::new(), }; *container = self.add_child(*container, NodeValue::HtmlBlock(nhb), offset); @@ -731,7 +731,7 @@ impl<'a, 'o> Parser<'a, 'o> { fence_length: 0, fence_offset: 0, info: vec![], - literal: Vec::with_capacity(80), + literal: Vec::new(), }; let offset = self.offset + 1; *container = self.add_child(*container, NodeValue::CodeBlock(ncb), offset); @@ -1111,7 +1111,9 @@ impl<'a, 'o> Parser<'a, 'o> { seeked += pos; } } - *content = content[seeked..].to_vec(); + if seeked != 0 { + *content = content[seeked..].to_vec(); + } if strings::is_blank(content) { node.detach(); } @@ -1152,11 +1154,9 @@ impl<'a, 'o> Parser<'a, 'o> { *content = content[pos..].to_vec(); } mem::swap(&mut ncb.literal, content); - content.clear(); } NodeValue::HtmlBlock(ref mut nhb) => { mem::swap(&mut nhb.literal, content); - content.clear(); } NodeValue::List(ref mut nl) => { nl.tight = true; @@ -1399,7 +1399,9 @@ impl<'a, 'o> Parser<'a, 'o> { } fn parse_reference_inline(&mut self, content: &[u8]) -> Option { - let delimiter_arena = Arena::new(); + // In this case reference inlines rarely have delimiters + // so we often just need the minimal case + let delimiter_arena = Arena::with_capacity(0); let mut subj = inlines::Subject::new( self.arena, self.options, diff --git a/src/scanners.rs b/src/scanners.rs index ef37976e..3f84d431 100644 --- a/src/scanners.rs +++ b/src/scanners.rs @@ -1,3 +1,14 @@ +/*! + In many of these cases the AST will be scanned and then it + is found there is no match. In many of these cases the scan + turns up False. It can be see that in the very simplest cases, + usually by doing a char check at the very begginning of the + line, we can eliminate these checks without the same allocations + that are done otherwise and cause the program considerable + slowdown. + +*/ + use pest::Parser; use std::str; use twoway::find_bytes; @@ -24,6 +35,9 @@ fn is_match(rule: Rule, line: &[u8]) -> bool { #[inline(always)] pub fn atx_heading_start(line: &[u8]) -> Option { + if line[0] != b'#'{ + return None + } search(Rule::atx_heading_start, line) } @@ -55,11 +69,17 @@ pub fn html_block_end_5(line: &[u8]) -> bool { #[inline(always)] pub fn open_code_fence(line: &[u8]) -> Option { + if line[0] != b'`' && line[0] != b'~' { + return None + } search(Rule::open_code_fence, line) } #[inline(always)] pub fn close_code_fence(line: &[u8]) -> Option { + if line[0] != b'`' && line[0] != b'~' { + return None + } search(Rule::close_code_fence, line) } @@ -108,7 +128,8 @@ pub enum SetextChar { #[inline(always)] pub fn setext_heading_line(line: &[u8]) -> Option { - if is_match(Rule::setext_heading_line, line) { + if (line[0] == b'=' || line[0] == b'-') + && is_match(Rule::setext_heading_line, line) { if line[0] == b'=' { Some(SetextChar::Equals) } else { @@ -121,6 +142,9 @@ pub fn setext_heading_line(line: &[u8]) -> Option { #[inline(always)] pub fn thematic_break(line: &[u8]) -> Option { + if line[0] != b'*' && line[0] != b'-' && line[0] != b'_' { + return None + } search(Rule::thematic_break, line) }