Skip to content

Commit c645c2a

Browse files
committed
[importer/datacite] Remove datacite namespace
1 parent c8f97a7 commit c645c2a

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

odmltools/importers/import_datacite.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545

4646

4747
VERSION = "0.1.0"
48+
# DataCite namespaces that need to be removed from the individual XML tags before the
49+
# XML file can be properly processed.
50+
COLLAPSE_NS = ['http://datacite.org/schema/kernel-4']
4851

4952

5053
class ParserException(Exception):
@@ -76,9 +79,14 @@ def dict_from_xml(xml_file):
7679
:return: dictionary containing the contents of the xml file.
7780
"""
7881

82+
ns_remove = {}
83+
for nspace in COLLAPSE_NS:
84+
ns_remove[nspace] = None
85+
7986
try:
8087
with open(xml_file) as file:
81-
doc = xmltodict.parse(file.read())
88+
doc = xmltodict.parse(file.read(),
89+
process_namespaces=True, namespaces=ns_remove)
8290
except ExpatError as exc:
8391
msg = "[Error] Could not load file '%s': %s" % (xml_file,
8492
exp_err.messages[exc.code])

0 commit comments

Comments
 (0)