Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions benchmark/parse_cdata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
loop_count: 100
contexts:
- gems:
rexml: 3.2.6
require: false
prelude: require 'rexml'
- name: master
prelude: |
$LOAD_PATH.unshift(File.expand_path("lib"))
require 'rexml'
- name: 3.2.6(YJIT)
gems:
rexml: 3.2.6
require: false
prelude: |
require 'rexml'
RubyVM::YJIT.enable
- name: master(YJIT)
prelude: |
$LOAD_PATH.unshift(File.expand_path("lib"))
require 'rexml'
RubyVM::YJIT.enable

prelude: |
require 'rexml/document'
require 'rexml/parsers/sax2parser'
require 'rexml/parsers/pullparser'
require 'rexml/parsers/streamparser'
require 'rexml/streamlistener'

def build_xml(size)
xml = "<?xml version=\"1.0\"?>\n" +
"<root>Test</root>\n" +
"<![CDATA[" + "a" * size + "]]>\n"
end
xml = build_xml(100000)

class Listener
include REXML::StreamListener
end

benchmark:
'dom' : REXML::Document.new(xml)
'sax' : REXML::Parsers::SAX2Parser.new(xml).parse
'pull' : |
parser = REXML::Parsers::PullParser.new(xml)
while parser.has_next?
parser.pull
end
'stream' : REXML::Parsers::StreamParser.new(xml, Listener.new).parse
10 changes: 7 additions & 3 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -471,9 +471,13 @@ def pull_event
end

return [ :comment, md[1] ]
else
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
return [ :cdata, md[1] ] if md
elsif @source.match?("[CDATA[", true)
text = @source.read_until("]]>")
if text.chomp!("]]>")
return [ :cdata, text ]
else
raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
end
end
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
Expand Down
2 changes: 1 addition & 1 deletion lib/rexml/source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class Source
module Private
SCANNER_RESET_SIZE = 100000
PRE_DEFINED_TERM_PATTERNS = {}
pre_defined_terms = ["'", '"', "<"]
pre_defined_terms = ["'", '"', "<", "]]>"]
if StringScanner::Version < "3.1.1"
pre_defined_terms.each do |term|
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
Expand Down
20 changes: 19 additions & 1 deletion test/parse/test_cdata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,28 @@ module REXMLTests
class TestParseCData < Test::Unit::TestCase
include Test::Unit::CoreAssertions

def parse(xml)
REXML::Document.new(xml)
end

def test_linear_performance_gt
seq = [10000, 50000, 100000, 150000, 200000]
assert_linear_performance(seq, rehearsal: 10) do |n|
REXML::Document.new('<description><![CDATA[ ' + ">" * n + ' ]]></description>')
parse('<description><![CDATA[ ' + ">" * n + ' ]]></description>')
end
end

class TestInvalid < self
def test_unclosed_cdata
exception = assert_raise(REXML::ParseException) do
parse("<root><![CDATA[a]></root>")
end
assert_equal(<<~DETAIL, exception.to_s)
Malformed CDATA: Missing end ']]>'
Line: 1
Position: 25
Last 80 unconsumed characters:
DETAIL
end
end
end
Expand Down