Skip to content

Commit

Permalink
Optimize header parsing
Browse files Browse the repository at this point in the history
Fixes #505

Benchmarks:

"setext":

```bash
ruby -rbenchmark -Ilib -rkramdown -e 'p Benchmark.measure{Kramdown::Document.new("1#{" "*20000}2\n==\n")}'
```

"atx":

```bash
ruby -rbenchmark -Ilib -rkramdown -e 'p Benchmark.measure{Kramdown::Document.new("## 1#{" "*20000}2")}'
```
  • Loading branch information
glebm committed May 18, 2018
1 parent c646673 commit 5fa05e5
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 16 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
htmldoc
pkg
webgen-tmp
.bundle/
CONTRIBUTERS
VERSION
kramdown.gemspec
man/man1/kramdown.1
19 changes: 14 additions & 5 deletions lib/kramdown/parser/gfm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,19 +112,28 @@ def generate_gfm_header_id(text)
@options[:auto_id_prefix] + result
end

ATX_HEADER_START = /^\#{1,6}\s/
ATX_HEADER_START = /^(?<level>\#{1,6})[\t ]+(?<contents>.*)\n/
define_parser(:atx_header_gfm, ATX_HEADER_START, nil, 'parse_atx_header')
define_parser(:atx_header_gfm_quirk, ATX_HEADER_START)

# Copied from kramdown/parser/kramdown/header.rb, removed the first line
def parse_atx_header_gfm_quirk
start_line_number = @src.current_line_number
@src.check(ATX_HEADER_MATCH)
level, text, id = @src[1], @src[2].to_s.strip, @src[3]
text = @src["contents"]
text.rstrip!
id_match = HEADER_ID.match(text)
if id_match
id = id_match["id"]
text = text[0...-id_match[0].length]
text.rstrip!
end
text.sub!(/[\t ]#+\z/, '')
text.rstrip!
return false if text.empty?
level = @src["level"].length

start_line_number = @src.current_line_number
@src.pos += @src.matched_size
el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text, :location => start_line_number)
el = new_block_el(:header, nil, nil, :level => level, :raw_text => text, :location => start_line_number)
add_text(text, el)
el.attr['id'] = id if id
@tree.children << el
Expand Down
39 changes: 28 additions & 11 deletions lib/kramdown/parser/kramdown/header.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,27 @@ module Kramdown
module Parser
class Kramdown

HEADER_ID=/(?:[ \t]+\{#([A-Za-z][\w:-]*)\})?/
SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
HEADER_ID = /[\t ]{#(?<id>[A-Za-z][\w:-]*)}\z/
SETEXT_HEADER_START = /^#{OPT_SPACE}(?<contents>.*)\n(?<level>[-=])[-=]*[ \t\r\f\v]*\n/

# Parse the Setext header at the current location.
def parse_setext_header
return false if !after_block_boundary?

text = @src["contents"]
text.strip!
id_match = HEADER_ID.match(text)
if id_match
id = id_match["id"]
text = text[0...-id_match[0].length]
text.rstrip!
end
return false if text.empty?
level = @src["level"] == '-' ? 2 : 1

start_line_number = @src.current_line_number
@src.pos += @src.matched_size
text, id, level = @src[1], @src[2], @src[3]
text.strip!
el = new_block_el(:header, nil, nil, :level => (level == '-' ? 2 : 1), :raw_text => text, :location => start_line_number)
el = new_block_el(:header, nil, nil, :level => level, :raw_text => text, :location => start_line_number)
add_text(text, el)
el.attr['id'] = id if id
@tree.children << el
Expand All @@ -33,20 +42,28 @@ def parse_setext_header
define_parser(:setext_header, SETEXT_HEADER_START)


ATX_HEADER_START = /^\#{1,6}/
ATX_HEADER_MATCH = /^(\#{1,6})(.+?(?:\\#)?)\s*?#*#{HEADER_ID}\s*?\n/
ATX_HEADER_START = /^(?<level>\#{1,6})[\t ]*(?<contents>.*)\n/

# Parse the Atx header at the current location.
def parse_atx_header
return false if !after_block_boundary?

start_line_number = @src.current_line_number
@src.check(ATX_HEADER_MATCH)
level, text, id = @src[1], @src[2].to_s.strip, @src[3]
text = @src["contents"]
text.rstrip!
id_match = HEADER_ID.match(text)
if id_match
id = id_match["id"]
text = text[0...-id_match[0].length]
text.rstrip!
end
text.sub!(/[\t ]#+\z/, '')
text.rstrip!
return false if text.empty?
level = @src["level"].length

start_line_number = @src.current_line_number
@src.pos += @src.matched_size
el = new_block_el(:header, nil, nil, :level => level.length, :raw_text => text, :location => start_line_number)
el = new_block_el(:header, nil, nil, :level => level, :raw_text => text, :location => start_line_number)
add_text(text, el)
el.attr['id'] = id if id
@tree.children << el
Expand Down
Empty file.

0 comments on commit 5fa05e5

Please sign in to comment.