repoog
diff --git a/‎InvoiceExtract.py
Lines changed: 55 additions & 0 deletions b/‎InvoiceExtract.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎Lib/site-packages/PIL/BdfFontFile.py
Lines changed: 122 additions & 0 deletions b/‎Lib/site-packages/PIL/BdfFontFile.py
Lines changed: 122 additions & 0 deletions
@@ -0,0 +1,55 @@
+#!/bin/env python
+
+import pdfplumber
+from openpyxl import Workbook
+import re
+import sys
+import os
+
+def extract_text_from_pdf(pdf_path):
+    with pdfplumber.open(pdf_path) as pdf:
+        text = ""
+        for page in pdf.pages:
+            text += page.extract_text()
+    return text
+
+def extract_info_from_text(pdf_text):
+    billing_date = re.search(r"开票日期\s*[:：]\s*(.*)", pdf_text).group(1).replace(" ", "")
+    invoice_code = re.search(r"发票代码\s*[:：]\s*(\d*)", pdf_text).group(1)
+    invoice_number = re.search(r"发票号码\s*[:：]\s*(\d*)", pdf_text).group(1)
+    invoice_issuer = re.findall(r"名 称\s*[:：]\s*(\w*)", pdf_text)[1]
+    total_amount = re.search(r"小写(.*)", pdf_text).group(1).replace(" ", "")[1:]
+
+    return billing_date, invoice_code, invoice_number, invoice_issuer, total_amount
+
+def process_pdf_path(pdf_path, out_path):
+    pdf_files = []
+    
+    for file in os.listdir(pdf_path):
+        if file.endswith('.pdf'):
+            pdf_files.append(os.path.join(pdf_path, file))
+
+    workbook = Workbook()
+    sheet = workbook.active
+    sheet.append(['开票日期', '发票代码', '发票号码', '开票方', '票面金额'])
+
+    for pdf_file in pdf_files:
+        pdf_text = extract_text_from_pdf(pdf_file)
+        try:
+            sheet.append(extract_info_from_text(pdf_text))
+        except AttributeError:
+            print("Reading error file: " + pdf_file)
+
+        workbook.save(out_path)
+
+
+if __name__ == '__main__':
+    args = sys.argv
+
+    try:
+        pdf_path, out_path = args[1], args[2]
+    except IndexError:
+        print("Please enter the invoice path or output file path.")    
+        exit()
+
+    process_pdf_path(pdf_path, out_path)
@@ -0,0 +1,122 @@
+#
+# The Python Imaging Library
+# $Id$
+#
+# bitmap distribution font (bdf) file parser
+#
+# history:
+# 1996-05-16 fl   created (as bdf2pil)
+# 1997-08-25 fl   converted to FontFile driver
+# 2001-05-25 fl   removed bogus __init__ call
+# 2002-11-20 fl   robustification (from Kevin Cazabon, Dmitry Vasiliev)
+# 2003-04-22 fl   more robustification (from Graham Dumpleton)
+#
+# Copyright (c) 1997-2003 by Secret Labs AB.
+# Copyright (c) 1997-2003 by Fredrik Lundh.
+#
+# See the README file for information on usage and redistribution.
+#
+
+"""
+Parse X Bitmap Distribution Format (BDF)
+"""
+
+
+from . import FontFile, Image
+
+bdf_slant = {
+    "R": "Roman",
+    "I": "Italic",
+    "O": "Oblique",
+    "RI": "Reverse Italic",
+    "RO": "Reverse Oblique",
+    "OT": "Other",
+}
+
+bdf_spacing = {"P": "Proportional", "M": "Monospaced", "C": "Cell"}
+
+
+def bdf_char(f):
+    # skip to STARTCHAR
+    while True:
+        s = f.readline()
+        if not s:
+            return None
+        if s[:9] == b"STARTCHAR":
+            break
+    id = s[9:].strip().decode("ascii")
+
+    # load symbol properties
+    props = {}
+    while True:
+        s = f.readline()
+        if not s or s[:6] == b"BITMAP":
+            break
+        i = s.find(b" ")
+        props[s[:i].decode("ascii")] = s[i + 1 : -1].decode("ascii")
+
+    # load bitmap
+    bitmap = []
+    while True:
+        s = f.readline()
+        if not s or s[:7] == b"ENDCHAR":
+            break
+        bitmap.append(s[:-1])
+    bitmap = b"".join(bitmap)
+
+    # The word BBX
+    # followed by the width in x (BBw), height in y (BBh),
+    # and x and y displacement (BBxoff0, BByoff0)
+    # of the lower left corner from the origin of the character.
+    width, height, x_disp, y_disp = [int(p) for p in props["BBX"].split()]
+
+    # The word DWIDTH
+    # followed by the width in x and y of the character in device pixels.
+    dwx, dwy = [int(p) for p in props["DWIDTH"].split()]
+
+    bbox = (
+        (dwx, dwy),
+        (x_disp, -y_disp - height, width + x_disp, -y_disp),
+        (0, 0, width, height),
+    )
+
+    try:
+        im = Image.frombytes("1", (width, height), bitmap, "hex", "1")
+    except ValueError:
+        # deal with zero-width characters
+        im = Image.new("1", (width, height))
+
+    return id, int(props["ENCODING"]), bbox, im
+
+
+class BdfFontFile(FontFile.FontFile):
+    """Font file plugin for the X11 BDF format."""
+
+    def __init__(self, fp):
+        super().__init__()
+
+        s = fp.readline()
+        if s[:13] != b"STARTFONT 2.1":
+            msg = "not a valid BDF file"
+            raise SyntaxError(msg)
+
+        props = {}
+        comments = []
+
+        while True:
+            s = fp.readline()
+            if not s or s[:13] == b"ENDPROPERTIES":
+                break
+            i = s.find(b" ")
+            props[s[:i].decode("ascii")] = s[i + 1 : -1].decode("ascii")
+            if s[:i] in [b"COMMENT", b"COPYRIGHT"]:
+                if s.find(b"LogicalFontDescription") < 0:
+                    comments.append(s[i + 1 : -1].decode("ascii"))
+
+        while True:
+            c = bdf_char(fp)
+            if not c:
+                break
+            id, ch, (xy, dst, src), im = c
+            if 0 <= ch < len(self.glyph):
+                self.glyph[ch] = xy, dst, src, im