From 876e8b9146e036a2a965bb3b6a0deafe81cc436a Mon Sep 17 00:00:00 2001 From: SSJohns Date: Thu, 21 Jun 2018 15:19:59 -0700 Subject: [PATCH 1/4] Dont init vecs that are never used In many cases these vecs are never used so initing them with a capacity will waste space and allocations. Also the content.clear() in these cases don't seem to be doing anything --- src/parser/mod.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 324401be..3cdc50b5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -606,7 +606,7 @@ impl<'a, 'o> Parser<'a, 'o> { fence_length: matched, fence_offset: first_nonspace - offset, info: Vec::with_capacity(10), - literal: Vec::with_capacity(80), + literal: Vec::new(), }; *container = self.add_child(*container, NodeValue::CodeBlock(ncb), first_nonspace + 1); @@ -625,7 +625,7 @@ impl<'a, 'o> Parser<'a, 'o> { let offset = self.first_nonspace + 1; let nhb = NodeHtmlBlock { block_type: matched as u8, - literal: Vec::with_capacity(10), + literal: Vec::new(), }; *container = self.add_child(*container, NodeValue::HtmlBlock(nhb), offset); @@ -731,7 +731,7 @@ impl<'a, 'o> Parser<'a, 'o> { fence_length: 0, fence_offset: 0, info: vec![], - literal: Vec::with_capacity(80), + literal: Vec::new(), }; let offset = self.offset + 1; *container = self.add_child(*container, NodeValue::CodeBlock(ncb), offset); @@ -1152,11 +1152,9 @@ impl<'a, 'o> Parser<'a, 'o> { *content = content[pos..].to_vec(); } mem::swap(&mut ncb.literal, content); - content.clear(); } NodeValue::HtmlBlock(ref mut nhb) => { mem::swap(&mut nhb.literal, content); - content.clear(); } NodeValue::List(ref mut nl) => { nl.tight = true; From 67faebf11cd48c2024ea00180da14d56aa2c9208 Mon Sep 17 00:00:00 2001 From: SSJohns Date: Mon, 11 Jun 2018 14:42:52 -0700 Subject: [PATCH 2/4] Guard against unchanged seeked --- src/parser/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3cdc50b5..7f86561f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1111,7 +1111,9 @@ impl<'a, 'o> Parser<'a, 'o> { seeked += pos; } } - *content = content[seeked..].to_vec(); + if seeked != 0 { + *content = content[seeked..].to_vec(); + } if strings::is_blank(content) { node.detach(); } From f9b0f9db60650cdba6ab01cd35160dad8e25368d Mon Sep 17 00:00:00 2001 From: SSJohns Date: Thu, 21 Jun 2018 15:30:39 -0700 Subject: [PATCH 3/4] Change ref inl to have zero capacity vec --- src/parser/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7f86561f..17592ccb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1399,7 +1399,9 @@ impl<'a, 'o> Parser<'a, 'o> { } fn parse_reference_inline(&mut self, content: &[u8]) -> Option { - let delimiter_arena = Arena::new(); + // In this case reference inlines rarely have delimiters + // so we often just need the minimal case + let delimiter_arena = Arena::with_capacity(0); let mut subj = inlines::Subject::new( self.arena, self.options, From 6abd7907f85440193d8f48b575bffd1cb65e1b30 Mon Sep 17 00:00:00 2001 From: SSJohns Date: Thu, 21 Jun 2018 16:28:12 -0700 Subject: [PATCH 4/4] Check start of line in scanners for minimal case --- src/scanners.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/scanners.rs b/src/scanners.rs index ef37976e..3f84d431 100644 --- a/src/scanners.rs +++ b/src/scanners.rs @@ -1,3 +1,14 @@ +/*! + In many of these cases the AST will be scanned and then it + is found there is no match. In many of these cases the scan + turns up False. It can be see that in the very simplest cases, + usually by doing a char check at the very begginning of the + line, we can eliminate these checks without the same allocations + that are done otherwise and cause the program considerable + slowdown. + +*/ + use pest::Parser; use std::str; use twoway::find_bytes; @@ -24,6 +35,9 @@ fn is_match(rule: Rule, line: &[u8]) -> bool { #[inline(always)] pub fn atx_heading_start(line: &[u8]) -> Option { + if line[0] != b'#'{ + return None + } search(Rule::atx_heading_start, line) } @@ -55,11 +69,17 @@ pub fn html_block_end_5(line: &[u8]) -> bool { #[inline(always)] pub fn open_code_fence(line: &[u8]) -> Option { + if line[0] != b'`' && line[0] != b'~' { + return None + } search(Rule::open_code_fence, line) } #[inline(always)] pub fn close_code_fence(line: &[u8]) -> Option { + if line[0] != b'`' && line[0] != b'~' { + return None + } search(Rule::close_code_fence, line) } @@ -108,7 +128,8 @@ pub enum SetextChar { #[inline(always)] pub fn setext_heading_line(line: &[u8]) -> Option { - if is_match(Rule::setext_heading_line, line) { + if (line[0] == b'=' || line[0] == b'-') + && is_match(Rule::setext_heading_line, line) { if line[0] == b'=' { Some(SetextChar::Equals) } else { @@ -121,6 +142,9 @@ pub fn setext_heading_line(line: &[u8]) -> Option { #[inline(always)] pub fn thematic_break(line: &[u8]) -> Option { + if line[0] != b'*' && line[0] != b'-' && line[0] != b'_' { + return None + } search(Rule::thematic_break, line) }