Skip to content

Commit

Permalink
Move text encoding tests to separate file
Browse files Browse the repository at this point in the history
  • Loading branch information
gatkin committed Jun 4, 2018
1 parent 60fefef commit 0cbc21a
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 76 deletions.
14 changes: 7 additions & 7 deletions declxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,13 @@ def serialize_to_file(root_processor, value, xml_file_path, encoding='utf-8', in
:param xml_file_path: Path to the XML file to which the serialized value will be written.
:param indent: If specified, then the XML will be formatted with the specified indentation.
"""
serialized_value = serialize_to_string(root_processor, value, indent,encoding=encoding)

serialized_value = serialized_value.decode('utf-8')
serialized_value = serialize_to_string(root_processor, value, indent)

with open(xml_file_path, 'w', encoding=encoding) as xml_file:
xml_file.write(serialized_value)


def serialize_to_string(root_processor, value, indent=None, encoding='utf-8'):
def serialize_to_string(root_processor, value, indent=None):
"""
Serializes the value to an XML string using the root processor.
Expand All @@ -141,14 +139,16 @@ def serialize_to_string(root_processor, value, indent=None, encoding='utf-8'):

state.pop_location()

serialized_value = ET.tostring(root, encoding=encoding)
# Always encode to UTF-8 because element tree does not support other
# encodings in earlier Python versions. See: https://bugs.python.org/issue1767933
serialized_value = ET.tostring(root, encoding='utf-8')

# Since element tree does not support pretty printing XML, we use minidom to do the pretty
# printing
if indent:
serialized_value = minidom.parseString(serialized_value).toprettyxml(indent=indent)
serialized_value = minidom.parseString(serialized_value).toprettyxml(indent=indent, encoding='utf-8')

return serialized_value
return serialized_value.decode('utf-8')


def array(item_processor, alias=None, nested=None, omit_empty=False):
Expand Down
2 changes: 1 addition & 1 deletion tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ def strip_xml(xml_string):
stripped = re.sub(r'>\s+<', '><', xml_string)

# Strip external whitespace
return stripped.strip().encode('utf8')
return stripped.strip()
4 changes: 0 additions & 4 deletions tests/test_files/test_unicode.xml

This file was deleted.

1 change: 0 additions & 1 deletion tests/test_files/test_unicode_write.xml

This file was deleted.

36 changes: 0 additions & 36 deletions tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1056,39 +1056,3 @@ def test_parse_string_strip_whitespace():
actual = xml.parse_from_string(processor, xml_string)

assert expected == actual

def test_parse_string_unicode():
"""Parses a string value stripping whitespace"""
xml_string = u"""
<root>
<value>Ḩ̵̛͇̞͖̹̯͓̙̮͙͙̇̽̈̈́͌̅̔͆e̵̢̫̪̬͖̹̤̘̖̥̮͙̮͖̫̒̈́̓͋̈́ĺ̷̗͖̘̞̦̹͉̩͋͗̑̈́̚͝l̷̢̩̮̳̞̺̳̣̹̜̒̒̈́̈́̓̑́̅̚o̷̫̾,̸̙̪̰̘̩̹͈̼̔̀̀͋̈́̅̎̕͘͝͝ ̷͉͔̿͋͑́͑̅̎͆͌́͝͝W̶̡̯̫̞̭̰̩̦̝̹̰̥̱͑͌̃͂̽̑͐̔͋͑̽͘̚͜͝͝o̴̖̮̪̰̦̝̅̈́̌̇͆͆̓̂̽̓̕̕̚͝r̸̛̭͈̞̤̟̮̿͛͑̍̌͛̓̆̊l̵̡͎̗͈͚̠̝͉̭̩̳̅̀̾̍̾́̍̚ḑ̷̯̀̾́́͘!̴̨͖̥͕̣̮̩͍̜̈́̌̎̿̀̽̒͆̓͐̄̓͛͘! </value>
</root>
"""

processor = xml.dictionary('root', [
xml.string('value')
])

expected = {
'value': u'Ḩ̵̛͇̞͖̹̯͓̙̮͙͙̇̽̈̈́͌̅̔͆e̵̢̫̪̬͖̹̤̘̖̥̮͙̮͖̫̒̈́̓͋̈́ĺ̷̗͖̘̞̦̹͉̩͋͗̑̈́̚͝l̷̢̩̮̳̞̺̳̣̹̜̒̒̈́̈́̓̑́̅̚o̷̫̾,̸̙̪̰̘̩̹͈̼̔̀̀͋̈́̅̎̕͘͝͝ ̷͉͔̿͋͑́͑̅̎͆͌́͝͝W̶̡̯̫̞̭̰̩̦̝̹̰̥̱͑͌̃͂̽̑͐̔͋͑̽͘̚͜͝͝o̴̖̮̪̰̦̝̅̈́̌̇͆͆̓̂̽̓̕̕̚͝r̸̛̭͈̞̤̟̮̿͛͑̍̌͛̓̆̊l̵̡͎̗͈͚̠̝͉̭̩̳̅̀̾̍̾́̍̚ḑ̷̯̀̾́́͘!̴̨͖̥͕̣̮̩͍̜̈́̌̎̿̀̽̒͆̓͐̄̓͛͘!',
}

actual = xml.parse_from_string(processor, xml_string)

assert expected == actual


def test_parse_from_file_unicode():
"""Tests parsing an XML file"""

processor = xml.dictionary('root', [
xml.string('value'),
])

expected = {
'value': u'Ḩ̵̛͇̞͖̹̯͓̙̮͙͙̇̽̈̈́͌̅̔͆e̵̢̫̪̬͖̹̤̘̖̥̮͙̮͖̫̒̈́̓͋̈́ĺ̷̗͖̘̞̦̹͉̩͋͗̑̈́̚͝l̷̢̩̮̳̞̺̳̣̹̜̒̒̈́̈́̓̑́̅̚o̷̫̾,̸̙̪̰̘̩̹͈̼̔̀̀͋̈́̅̎̕͘͝͝ ̷͉͔̿͋͑́͑̅̎͆͌́͝͝W̶̡̯̫̞̭̰̩̦̝̹̰̥̱͑͌̃͂̽̑͐̔͋͑̽͘̚͜͝͝o̴̖̮̪̰̦̝̅̈́̌̇͆͆̓̂̽̓̕̕̚͝r̸̛̭͈̞̤̟̮̿͛͑̍̌͛̓̆̊l̵̡͎̗͈͚̠̝͉̭̩̳̅̀̾̍̾́̍̚ḑ̷̯̀̾́́͘!̴̨͖̥͕̣̮̩͍̜̈́̌̎̿̀̽̒͆̓͐̄̓͛͘',
}

actual = xml.parse_from_file(processor, 'tests/test_files/test_unicode.xml')

assert expected == actual
29 changes: 2 additions & 27 deletions tests/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,7 @@ def test_serialize_pretty():
xml.integer('age')
])

expected = """<?xml version="1.0" ?>
expected = """<?xml version="1.0" encoding="utf-8"?>
<root>
<name>Bob</name>
<age>27</age>
Expand Down Expand Up @@ -1143,32 +1143,7 @@ def test_serialize_to_file(tmpdir):

# Ensure the file contents match what is expected.
xml_file = tmpdir.join(xml_file_name)
actual = xml_file.read_binary()
actual = xml_file.read()

assert expected == actual

def test_serialize_to_file_unicode():
"""Serialize XML data to a file"""
value = {
'boolean': True,
'float': 3.14,
'int': 1,
'string': u'Ḩ̵̛͇̞͖̹̯͓̙̮͙͙̇̽̈̈́͌̅̔͆e̵̢̫̪̬͖̹̤̘̖̥̮͙̮͖̫̒̈́̓͋̈́ĺ̷̗͖̘̞̦̹͉̩͋͗̑̈́̚͝l̷̢̩̮̳̞̺̳̣̹̜̒̒̈́̈́̓̑́̅̚o̷̫̾,̸̙̪̰̘̩̹͈̼̔̀̀͋̈́̅̎̕͘͝͝ ̷͉͔̿͋͑́͑̅̎͆͌́͝͝W̶̡̯̫̞̭̰̩̦̝̹̰̥̱͑͌̃͂̽̑͐̔͋͑̽͘̚͜͝͝o̴̖̮̪̰̦̝̅̈́̌̇͆͆̓̂̽̓̕̕̚͝r̸̛̭͈̞̤̟̮̿͛͑̍̌͛̓̆̊l̵̡͎̗͈͚̠̝͉̭̩̳̅̀̾̍̾́̍̚ḑ̷̯̀̾́́͘!̴̨͖̥͕̣̮̩͍̜̈́̌̎̿̀̽̒͆̓͐̄̓͛͘'
}

processor = xml.dictionary('root', [
xml.boolean('boolean'),
xml.floating_point('float'),
xml.integer('int'),
xml.string('string'),
])

expected = u"""<root><boolean>True</boolean><float>3.14</float><int>1</int><string>Ḩ̵̛͇̞͖̹̯͓̙̮͙͙̇̽̈̈́͌̅̔͆e̵̢̫̪̬͖̹̤̘̖̥̮͙̮͖̫̒̈́̓͋̈́ĺ̷̗͖̘̞̦̹͉̩͋͗̑̈́̚͝l̷̢̩̮̳̞̺̳̣̹̜̒̒̈́̈́̓̑́̅̚o̷̫̾,̸̙̪̰̘̩̹͈̼̔̀̀͋̈́̅̎̕͘͝͝ ̷͉͔̿͋͑́͑̅̎͆͌́͝͝W̶̡̯̫̞̭̰̩̦̝̹̰̥̱͑͌̃͂̽̑͐̔͋͑̽͘̚͜͝͝o̴̖̮̪̰̦̝̅̈́̌̇͆͆̓̂̽̓̕̕̚͝r̸̛̭͈̞̤̟̮̿͛͑̍̌͛̓̆̊l̵̡͎̗͈͚̠̝͉̭̩̳̅̀̾̍̾́̍̚ḑ̷̯̀̾́́͘!̴̨͖̥͕̣̮̩͍̜̈́̌̎̿̀̽̒͆̓͐̄̓͛͘</string></root>"""

xml.serialize_to_file(processor, value, 'tests/test_files/test_unicode_write.xml')

# Ensure the file contents match what is expected.
with open('tests/test_files/test_unicode_write.xml','r',encoding='utf-8') as xml_file:
actual = xml_file.read()

assert expected == actual
91 changes: 91 additions & 0 deletions tests/test_text_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
"""Tests handling of text encoding"""
import os

import pytest

import declxml as xml


_PROCESSOR = xml.dictionary('root', [
xml.string('value')
])


_VALUE = {
'value': u'Hello, 世界!',
}

_XML_STRING = u"""<root><value>Hello, 世界!</value></root>"""

_XML_STRING_INDENTED = u"""<?xml version="1.0" encoding="utf-8"?>
<root>
<value>Hello, 世界!</value>
</root>
"""

_ENCODINGS = [
'utf-8',
'utf-16',
'utf-32',
'cp950',
'gb18030',
]


def test_parse_from_string():
"""Parse a unicode string"""
actual = xml.parse_from_string(_PROCESSOR, _XML_STRING)

assert _VALUE == actual


@pytest.mark.parametrize('encoding', _ENCODINGS)
def test_parse_from_file(tmpdir, encoding):
"""Tests parsing an XML file"""
xml_file = tmpdir.join('data.xml')
xml_file.write_text(_XML_STRING, encoding)

actual = xml.parse_from_file(_PROCESSOR, xml_file.strpath, encoding=encoding)

assert _VALUE == actual


@pytest.mark.parametrize('encoding', _ENCODINGS)
def test_serialize_to_file(tmpdir, encoding):
xml_file_name = 'data.xml'
xml_file_path = os.path.join(tmpdir.strpath, xml_file_name)

xml.serialize_to_file(_PROCESSOR, _VALUE, xml_file_path, encoding=encoding)

xml_file = tmpdir.join(xml_file_name)
actual = xml_file.read_text(encoding)

assert _XML_STRING == actual


@pytest.mark.parametrize('encoding', _ENCODINGS)
def test_serialize_to_file_indented(tmpdir, encoding):
xml_file_name = 'data.xml'
xml_file_path = os.path.join(tmpdir.strpath, xml_file_name)

xml.serialize_to_file(_PROCESSOR, _VALUE, xml_file_path, indent=' ', encoding=encoding)

xml_file = tmpdir.join(xml_file_name)
actual = xml_file.read_text(encoding)

assert _XML_STRING_INDENTED == actual


def test_serialize_to_string():
"""Serialize a value to a unicode string"""
actual = xml.serialize_to_string(_PROCESSOR, _VALUE)

assert _XML_STRING == actual


def test_serialize_to_string_indent():
"""Serialize a value to a unicode string"""
actual = xml.serialize_to_string(_PROCESSOR, _VALUE, indent=' ')

assert _XML_STRING_INDENTED == actual

0 comments on commit 0cbc21a

Please sign in to comment.