Skip to content

Commit

Permalink
more features in xml-simple added
Browse files Browse the repository at this point in the history
  • Loading branch information
Bin Dong committed Jun 19, 2008
1 parent 1dbebea commit 2566858
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 47 deletions.
107 changes: 68 additions & 39 deletions lib/faster_xml_simple.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#
#
# Copyright (c) 2006 Michael Koziarski
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in the
# Software without restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
# Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
Expand All @@ -24,63 +24,63 @@
class FasterXmlSimple
Version = '0.5.0'
class << self
# Take an string containing XML, and returns a hash representing that
# Take an string containing XML, and returns a hash representing that
# XML document. For example:
#
#
# FasterXmlSimple.xml_in("<root><something>1</something></root>")
# {"root"=>{"something"=>{"__content__"=>"1"}}}
#
# Faster XML Simple is designed to be a drop in replacement for the xml_in
# functionality of http://xml-simple.rubyforge.org
#
# The following options are supported:
#
#
# * <tt>contentkey</tt>: The key to use for the content of text elements,
# defaults to '\_\_content__'
# * <tt>forcearray</tt>: The list of elements which should always be returned
# as arrays. Under normal circumstances single element arrays are inlined.
# * <tt>suppressempty</tt>: The value to return for empty elements, pass +true+
# to remove empty elements entirely.
# * <tt>keeproot</tt>: By default the hash returned has a single key with the
# name of the root element. If the name of the root element isn't
# interesting to you, pass +false+.
# * <tt>forcecontent</tt>: By default a text element with no attributes, will
# be collapsed to just a string instead of a hash with a single key.
# name of the root element. If the name of the root element isn't
# interesting to you, pass +false+.
# * <tt>forcecontent</tt>: By default a text element with no attributes, will
# be collapsed to just a string instead of a hash with a single key.
# Pass +true+ to prevent this.
#
#
def xml_in(string, options={})
new(string, options).out
end
end

def initialize(string, options) #:nodoc:
@doc = parse(string)
@options = default_options.merge options
end

def out #:nodoc:
if @options['keeproot']
{@doc.root.name => collapse(@doc.root)}
else
collapse(@doc.root)
end
end

private
def default_options
{'contentkey' => '__content__', 'forcearray' => [], 'keeproot'=>true}
{'contentkey' => '__content__', 'forcearray' => [], 'keeproot'=> true}
end

def collapse(element)
result = hash_of_attributes(element)
result = hash_of_attributes(element)
if text_node? element
text = collapse_text(element)
result[content_key] = text if text =~ /\S/
elsif element.children?
element.inject(result) do |hash, child|
unless child.text?
child_result = collapse(child)
child_result = collapse(child)
(hash[child.name] ||= []) << child_result
end
hash
Expand All @@ -90,47 +90,73 @@ def collapse(element)
return empty_element
end
# Compact them to ensure it complies with the user's requests
inline_single_element_arrays(result)
inline_single_element_arrays(result)
remove_empty_elements(result) if suppress_empty?

make_groups(result)

if content_only?(result) && !force_content?
result[content_key]
else
result
end
end


def compress_whitespace?(ele_name)
@options.has_key?('compress_whitespace') &&
@options['compress_whitespace'].include?(ele_name)
end

def compress_whitespace(text)
text.squeeze!(" \n\t")
end

def make_groups(result)
# Disintermediate grouped tags.
if @options.has_key?('grouptags')
result.each do |key, value|

next unless (value.instance_of?(Hash) && (value.size == 1))
child_key, child_value = value.to_a[0]
if @options['grouptags'][key] == child_key
result[key] = child_value
end
end
end
end

def content_only?(result)
result.keys == [content_key]
end

def content_key
@options['contentkey']
end

def force_array?(key_name)
Array(@options['forcearray']).include?(key_name)
end

def inline_single_element_arrays(result)
result.each do |key, value|
if value.size == 1 && value.is_a?(Array) && !force_array?(key)
result[key] = value.first
end
end
end
end

def remove_empty_elements(result)
result.each do |key, value|
if value == empty_element
result.delete key
end
end
end

def suppress_empty?
@options['suppressempty'] == true
end

def empty_element
if !@options.has_key? 'suppressempty'
{}
Expand All @@ -140,38 +166,41 @@ def empty_element
end

# removes the content if it's nothing but blanks, prevents
# the hash being polluted with lots of content like "\n\t\t\t"
# the hash being polluted with lots of content like "\n\t\t\t"
def suppress_empty_content(result)
result.delete content_key if result[content_key] !~ /\S/
result.delete content_key if result[content_key] !~ /\S/
end

def force_content?
@options['forcecontent']
end

# a text node is one with 1 or more child nodes which are
# text nodes, and no non-text children, there's no sensible
# way to support nodes which are text and markup like:
# <p>Something <b>Bold</b> </p>
def text_node?(element)
!element.text? && element.all? {|c| c.text?}
# Support CDATA
!element.text? && element.all? {|c| c.cdata? || c.text?}
end

# takes a text node, and collapses it into a string
def collapse_text(element)
element.map {|c| c.content } * ''
text = element.map {|c| c.content } * ''
compress_whitespace(text) if compress_whitespace?(element.name)
text
end

def hash_of_attributes(element)
result = {}
element.each_attr do |attribute|
element.each_attr do |attribute|
name = attribute.name
name = [attribute.ns, attribute.name].join(':') if attribute.ns?
result[name] = attribute.value
result[name] = attribute.value
end
result
end

def parse(string)
if string == ''
string = ' '
Expand All @@ -184,4 +213,4 @@ class XmlSimple # :nodoc:
def self.xml_in(*args)
FasterXmlSimple.xml_in *args
end
end
end
42 changes: 34 additions & 8 deletions test/regression_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@ def test_content_nil_regressions
assert_equal expected, FasterXmlSimple.xml_in("<asdf><jklsemicolon /></asdf>")
assert_equal expected, FasterXmlSimple.xml_in("<asdf><jklsemicolon /></asdf>", 'forcearray'=>['asdf'])
end

def test_s3_regression
str = File.read("test/fixtures/test-7.xml")
assert_nil FasterXmlSimple.xml_in(str)["AccessControlPolicy"]["AccessControlList"]["__content__"]
end

def test_xml_simple_transparency
assert_equal XmlSimple.xml_in("<asdf />"), FasterXmlSimple.xml_in("<asdf />")
end

def test_suppress_empty_variations
str = "<asdf><fdsa /></asdf>"

assert_equal Hash.new, FasterXmlSimple.xml_in(str)["asdf"]["fdsa"]
assert_nil FasterXmlSimple.xml_in(str, 'suppressempty'=>nil)["asdf"]["fdsa"]
assert_equal '', FasterXmlSimple.xml_in(str, 'suppressempty'=>'')["asdf"]["fdsa"]
Expand All @@ -27,21 +27,47 @@ def test_suppress_empty_variations

def test_empty_string_doesnt_crash
assert_raise(XML::Parser::ParseError) do
silence_stderr do
silence_stderr do
FasterXmlSimple.xml_in('')
end
end
end

def test_keeproot_false
str = "<asdf><fdsa>1</fdsa></asdf>"
expected = {"fdsa"=>"1"}
assert_equal expected, FasterXmlSimple.xml_in(str, 'keeproot'=>false)
end

def test_keeproot_false_with_force_content
str = "<asdf><fdsa>1</fdsa></asdf>"
expected = {"fdsa"=>{"__content__"=>"1"}}
assert_equal expected, FasterXmlSimple.xml_in(str, 'keeproot'=>false, 'forcecontent'=>true)
end
end

def test_group_tag
str = "<a>
<c>
<b>1</b>
<b>2</b>
</c>
</a>"
expected = { "a" => { 'c' => ["1", "2"] }}
assert_equal expected, FasterXmlSimple.xml_in(str, 'grouptags' => { 'c' => 'b'})

end

def test_compress_whitespace
str = "<a><b> a b </b></a>"
expected = { "a" => { 'b' => " a b "}}
assert_equal expected, FasterXmlSimple.xml_in(str, 'compress_whitespace' => ['b'])

end

def test_cdata
str = "<a> <b><![CDATA[ a ]]></b> </a>"
expected = { "a" => { "b" => " a "}}
assert_equal expected, FasterXmlSimple.xml_in(str)

end
end

0 comments on commit 2566858

Please sign in to comment.