Skip to content

Commit 645a84b

Browse files
committed
init: initlize the first version program
1 parent 62174fc commit 645a84b

File tree

10,062 files changed

+1420375
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

10,062 files changed

+1420375
-0
lines changed

InvoiceExtract.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/bin/env python
2+
3+
import pdfplumber
4+
from openpyxl import Workbook
5+
import re
6+
import sys
7+
import os
8+
9+
def extract_text_from_pdf(pdf_path):
10+
with pdfplumber.open(pdf_path) as pdf:
11+
text = ""
12+
for page in pdf.pages:
13+
text += page.extract_text()
14+
return text
15+
16+
def extract_info_from_text(pdf_text):
17+
billing_date = re.search(r"开票日期\s*[::]\s*(.*)", pdf_text).group(1).replace(" ", "")
18+
invoice_code = re.search(r"发票代码\s*[::]\s*(\d*)", pdf_text).group(1)
19+
invoice_number = re.search(r"发票号码\s*[::]\s*(\d*)", pdf_text).group(1)
20+
invoice_issuer = re.findall(r"名 称\s*[::]\s*(\w*)", pdf_text)[1]
21+
total_amount = re.search(r"小写(.*)", pdf_text).group(1).replace(" ", "")[1:]
22+
23+
return billing_date, invoice_code, invoice_number, invoice_issuer, total_amount
24+
25+
def process_pdf_path(pdf_path, out_path):
26+
pdf_files = []
27+
28+
for file in os.listdir(pdf_path):
29+
if file.endswith('.pdf'):
30+
pdf_files.append(os.path.join(pdf_path, file))
31+
32+
workbook = Workbook()
33+
sheet = workbook.active
34+
sheet.append(['开票日期', '发票代码', '发票号码', '开票方', '票面金额'])
35+
36+
for pdf_file in pdf_files:
37+
pdf_text = extract_text_from_pdf(pdf_file)
38+
try:
39+
sheet.append(extract_info_from_text(pdf_text))
40+
except AttributeError:
41+
print("Reading error file: " + pdf_file)
42+
43+
workbook.save(out_path)
44+
45+
46+
if __name__ == '__main__':
47+
args = sys.argv
48+
49+
try:
50+
pdf_path, out_path = args[1], args[2]
51+
except IndexError:
52+
print("Please enter the invoice path or output file path.")
53+
exit()
54+
55+
process_pdf_path(pdf_path, out_path)

Lib/site-packages/PIL/BdfFontFile.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#
2+
# The Python Imaging Library
3+
# $Id$
4+
#
5+
# bitmap distribution font (bdf) file parser
6+
#
7+
# history:
8+
# 1996-05-16 fl created (as bdf2pil)
9+
# 1997-08-25 fl converted to FontFile driver
10+
# 2001-05-25 fl removed bogus __init__ call
11+
# 2002-11-20 fl robustification (from Kevin Cazabon, Dmitry Vasiliev)
12+
# 2003-04-22 fl more robustification (from Graham Dumpleton)
13+
#
14+
# Copyright (c) 1997-2003 by Secret Labs AB.
15+
# Copyright (c) 1997-2003 by Fredrik Lundh.
16+
#
17+
# See the README file for information on usage and redistribution.
18+
#
19+
20+
"""
21+
Parse X Bitmap Distribution Format (BDF)
22+
"""
23+
24+
25+
from . import FontFile, Image
26+
27+
bdf_slant = {
28+
"R": "Roman",
29+
"I": "Italic",
30+
"O": "Oblique",
31+
"RI": "Reverse Italic",
32+
"RO": "Reverse Oblique",
33+
"OT": "Other",
34+
}
35+
36+
bdf_spacing = {"P": "Proportional", "M": "Monospaced", "C": "Cell"}
37+
38+
39+
def bdf_char(f):
40+
# skip to STARTCHAR
41+
while True:
42+
s = f.readline()
43+
if not s:
44+
return None
45+
if s[:9] == b"STARTCHAR":
46+
break
47+
id = s[9:].strip().decode("ascii")
48+
49+
# load symbol properties
50+
props = {}
51+
while True:
52+
s = f.readline()
53+
if not s or s[:6] == b"BITMAP":
54+
break
55+
i = s.find(b" ")
56+
props[s[:i].decode("ascii")] = s[i + 1 : -1].decode("ascii")
57+
58+
# load bitmap
59+
bitmap = []
60+
while True:
61+
s = f.readline()
62+
if not s or s[:7] == b"ENDCHAR":
63+
break
64+
bitmap.append(s[:-1])
65+
bitmap = b"".join(bitmap)
66+
67+
# The word BBX
68+
# followed by the width in x (BBw), height in y (BBh),
69+
# and x and y displacement (BBxoff0, BByoff0)
70+
# of the lower left corner from the origin of the character.
71+
width, height, x_disp, y_disp = [int(p) for p in props["BBX"].split()]
72+
73+
# The word DWIDTH
74+
# followed by the width in x and y of the character in device pixels.
75+
dwx, dwy = [int(p) for p in props["DWIDTH"].split()]
76+
77+
bbox = (
78+
(dwx, dwy),
79+
(x_disp, -y_disp - height, width + x_disp, -y_disp),
80+
(0, 0, width, height),
81+
)
82+
83+
try:
84+
im = Image.frombytes("1", (width, height), bitmap, "hex", "1")
85+
except ValueError:
86+
# deal with zero-width characters
87+
im = Image.new("1", (width, height))
88+
89+
return id, int(props["ENCODING"]), bbox, im
90+
91+
92+
class BdfFontFile(FontFile.FontFile):
93+
"""Font file plugin for the X11 BDF format."""
94+
95+
def __init__(self, fp):
96+
super().__init__()
97+
98+
s = fp.readline()
99+
if s[:13] != b"STARTFONT 2.1":
100+
msg = "not a valid BDF file"
101+
raise SyntaxError(msg)
102+
103+
props = {}
104+
comments = []
105+
106+
while True:
107+
s = fp.readline()
108+
if not s or s[:13] == b"ENDPROPERTIES":
109+
break
110+
i = s.find(b" ")
111+
props[s[:i].decode("ascii")] = s[i + 1 : -1].decode("ascii")
112+
if s[:i] in [b"COMMENT", b"COPYRIGHT"]:
113+
if s.find(b"LogicalFontDescription") < 0:
114+
comments.append(s[i + 1 : -1].decode("ascii"))
115+
116+
while True:
117+
c = bdf_char(fp)
118+
if not c:
119+
break
120+
id, ch, (xy, dst, src), im = c
121+
if 0 <= ch < len(self.glyph):
122+
self.glyph[ch] = xy, dst, src, im

0 commit comments

Comments
 (0)