forked from invoice-x/factur-x-ng
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfacturx.py
228 lines (191 loc) · 8.36 KB
/
facturx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
import io
import json
import os
import copy
import os.path
from datetime import datetime
from io import BytesIO
import yaml
from PyPDF2 import PdfFileReader
from PyPDF2.generic import IndirectObject
from lxml import etree
from .flavors import xml_flavor
from .logger import logger
from .pdfwriter import FacturXPDFWriter
# Python 2 and 3 compat
try:
file_types = (file, io.IOBase)
except NameError:
file_types = (io.IOBase,)
unicode = str
__all__ = ['FacturX']
class FacturX(object):
"""Represents an electronic PDF invoice with embedded XML metadata following the
Factur-X standard.
The source of truth is always the underlying XML tree. No copy of field
data is kept. Manipulation of the XML tree is either done via Python-style
dict access (available for the most common fields) or by directly accessing
the XML data on `FacturX.xml`.
Attributes:
- xml: xml tree of machine-readable representation.
- pdf: underlying graphical PDF representation.
- flavor: which flavor (Factur-x) to use.
"""
def __init__(self, pdf_invoice, flavor='factur-x', level='minimum'):
# Read PDF from path, pointer or string
if isinstance(pdf_invoice, str) and pdf_invoice.endswith('.pdf') and os.path.isfile(pdf_invoice):
with open(pdf_invoice, 'rb') as f:
pdf_file = BytesIO(f.read())
elif isinstance(pdf_invoice, file_types):
pdf_file = pdf_invoice
else:
raise TypeError(
"The first argument of the method get_facturx_xml_from_pdf must "
"be either a string or a file (it is a %s)." % type(pdf_invoice))
xml = self._xml_from_file(pdf_file)
self.pdf = pdf_file
# PDF has metadata embedded
if xml is not None:
# 'Read existing XML from PDF
self.xml = xml
self.flavor = xml_flavor.XMLFlavor(xml)
else:
# No metadata embedded. Create from template.
# 'PDF does not have XML embedded. Adding from template.'
self.flavor, self.xml = xml_flavor.XMLFlavor.from_template(flavor, level)
self.flavor.check_xsd(self.xml)
self._namespaces = self.xml.nsmap
self.already_added_field = {}
def read_xml(self):
"""Use XML data from external file. Replaces existing XML or template."""
pass
def _xml_from_file(self, pdf_file):
pdf = PdfFileReader(pdf_file)
pdf_root = pdf.trailer['/Root']
if '/Names' not in pdf_root or '/EmbeddedFiles' not in pdf_root['/Names']:
# 'No existing XML file found.'
return None
for file in pdf_root['/Names']['/EmbeddedFiles']['/Names']:
if isinstance(file, IndirectObject):
obj = file.getObject()
if obj['/F'] in xml_flavor.valid_xmp_filenames():
xml_root = etree.fromstring(obj['/EF']['/F'].getData())
xml_content = xml_root
return xml_content
def __getitem__(self, field_name):
path = self.flavor.get_xml_path(field_name)
value = self.xml.xpath(path, namespaces=self._namespaces)
if value:
value = value[0].text
if 'date' in field_name:
value = datetime.strptime(value, '%Y%m%d')
return value
def __setitem__(self, field_name, value):
path = self.flavor.get_xml_path(field_name)
res = self.xml.xpath(path, namespaces=self._namespaces)
if not res:
# The node is not defined at all in the parsed xml
logger.warning("{} is not defined in {}".format(path, self.flavor.name))
return
current_el = res[-1]
parent_tag = current_el.getparent().tag
self._handle_duplicated_node(current_el, parent_tag)
self._write_element(current_el, field_name, value)
self._save_to_registry(current_el, parent_tag)
def _handle_duplicated_node(self, current_el, parent_tag):
# method meant to handle cardinality 1.n (ApplicableTradeTax or IncludedSupplyChainTradeLineItem)
# we get the sibling and duplicate it
if parent_tag in self.already_added_field and current_el in self.already_added_field[parent_tag]:
parent_el = current_el.getparent()
parent_el.addnext(copy.copy(parent_el))
def _write_element(self, current_el, field_name, value):
# if we have type cast worries, it must be handled here
if 'date' in field_name:
assert isinstance(value, datetime), 'Please pass date values as DateTime() object.'
value = value.strftime('%Y%m%d')
current_el.attrib['format'] = '102'
current_el.text = value
else:
current_el.text = str(value)
def _save_to_registry(self, current_el, parent_tag):
if parent_tag not in self.already_added_field:
self.already_added_field[parent_tag] = []
elif current_el in self.already_added_field[parent_tag]:
self.already_added_field[parent_tag] = [el for el in self.already_added_field[parent_tag] if
el != current_el]
else:
self.already_added_field[parent_tag].append(current_el)
def is_valid(self):
"""Make every effort to validate the current XML.
Checks:
- all required fields are present and have values.
- XML is valid
- ...
Returns: true/false (validation passed/failed)
"""
# validate against XSD
try:
self.flavor.check_xsd(self.xml)
except Exception:
return False
# Check for required fields
fields_data = xml_flavor.FIELDS
for field in fields_data.keys():
if fields_data[field]['_required']:
r = self.xml.xpath(fields_data[field]['_path'][self.flavor.name], namespaces=self._namespaces)
if not len(r) or r[0].text is None:
if '_default' in fields_data[field].keys():
self[field] = fields_data[field]['_default']
else:
logger.warning("Required field '%s' is not present", field)
return False
# Check for codes (ISO:3166, ISO:4217)
codes_to_check = [
('currency', 'currency'),
('country', 'seller_country'),
('country', 'buyer_country'),
('country', 'shipping_country')
]
for code_type, field_name in codes_to_check:
if self[field_name] and not self.flavor.valid_code(code_type, self[field_name]):
logger.warning("Field %s is not a valid %s code." % (field_name, code_type))
return False
return True
def write_pdf(self, path):
pdfwriter = FacturXPDFWriter(self)
with open(path, 'wb') as output_f:
pdfwriter.write(output_f)
return True
@property
def xml_str(self):
"""Calculate MD5 checksum of XML file. Used for PDF attachment."""
return etree.tostring(self.xml, pretty_print=True)
def write_xml(self, path):
with open(path, 'wb') as f:
f.write(self.xml_str)
def to_dict(self):
"""Get all available fields as dict."""
fields_data = xml_flavor.FIELDS
flavor = self.flavor.name
output_dict = {}
for field in fields_data.keys():
try:
if fields_data[field]['_path'][flavor] is not None:
r = self.xml.xpath(fields_data[field]['_path'][flavor],
namespaces=self._namespaces)
output_dict[field] = r[0].text
except IndexError:
output_dict[field] = None
return output_dict
def write_json(self, json_file_path='output.json'):
json_output = self.to_dict()
if self.is_valid():
with open(json_file_path, 'w') as json_file:
logger.info("Exporting JSON to %s", json_file_path)
json.dump(json_output, json_file, indent=4, sort_keys=True)
def write_yaml(self, yml_file_path='output.yml'):
yml_output = self.to_dict()
if self.is_valid():
with open(yml_file_path, 'w') as yml_file:
logger.info("Exporting YAML to %s", yml_file_path)
yaml.dump(yml_output, yml_file, default_flow_style=False)