Skip to content

Commit d5a5eea

Browse files
authored
MAINT: Use grouped constants instead of string literals (#745)
This allows us to leverage the IDE. * Documentation: We can now document what the constants are good for and give background information around them * Homographs: We can distinguish literals which have the same name, but different contexts * Typos: We can hopefully avoid typos like decodeParams -> decodeParms. For users of PyPDF2, this doesn't change anything. We still use string literals. For documentation we should also keep doing that.
1 parent 87aafd6 commit d5a5eea

File tree

15 files changed

+393
-170
lines changed

15 files changed

+393
-170
lines changed

.github/workflows/github-ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
pip install .
4141
- name: Test with flake8
4242
run: |
43-
flake8 . --ignore=E203,W503,W504,E,F403,F405
43+
flake8 . --ignore=E203,W503,W504,E,F403,F405 --exclude build
4444
if: matrix.python-version != '2.7'
4545
- name: Test with pytest
4646
run: |

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ clean:
1414
rm -rf Tests/__pycache__ PyPDF2/__pycache__ Image9.png htmlcov docs/_build dist dont_commit_merged.pdf dont_commit_writer.pdf PyPDF2.egg-info PyPDF2_pdfLocation.txt
1515

1616
test:
17-
pytest Tests --cov --cov-report term-missing -vv --cov-report html
17+
pytest Tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=30
1818

1919
mutation-test:
2020
mutmut run

PyPDF2/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from .pdf import PdfFileReader, PdfFileWriter
1+
from ._version import __version__
22
from .merger import PdfFileMerger
33
from .pagerange import PageRange, parse_filename_page_ranges
4-
from ._version import __version__
4+
from .pdf import PdfFileReader, PdfFileWriter
55

66
__all__ = [
77
"__version__",

PyPDF2/constants.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
"""
2+
See Portable Document Format Reference Manual, 1993. ISBN 0-201-62628-4.
3+
4+
See https://ia802202.us.archive.org/8/items/pdfy-0vt8s-egqFwDl7L2/PDF%20Reference%201.0.pdf
5+
6+
PDF Reference, third edition, Version 1.4, 2001. ISBN 0-201-75839-3.
7+
8+
PDF Reference, sixth edition, Version 1.7, 2006.
9+
"""
10+
11+
12+
class PagesAttributes:
13+
"""Page Attributes, Table 6.2, Page 52"""
14+
15+
TYPE = "/Type" # name, required; must be /Pages
16+
KIDS = "/Kids" # array, required; List of indirect references
17+
COUNT = "/Count" # integer, required; the number of all nodes und this node
18+
PARENT = "/Parent" # dictionary, required; indirect reference to pages object
19+
20+
21+
class PageAttributes:
22+
"""Page attributes, Table 6.3, Page 53"""
23+
24+
TYPE = "/Type" # name, required; must be /Page
25+
MEDIABOX = "/MediaBox" # array, required; rectangle specifying page size
26+
PARENT = "/Parent" # dictionary, required; a pages object
27+
RESOURCES = "/Resources" # dictionary, required if there are any
28+
CONTENTS = "/Contents" # stream or array, optional
29+
CROPBOX = "/CropBox" # array, optional; rectangle
30+
ROTATE = "/Rotate" # integer, optional; page rotation in degrees
31+
THUMB = "/Thumb" # stream, optional; indirect reference to image of the page
32+
ANNOTS = "/Annots" # array, optional; an array of annotations
33+
34+
35+
class Ressources:
36+
PROCSET = "/ProcSet" # Chapter 6.8.1
37+
FONT = "/Font" # Chapter 6.8.2
38+
# encoding
39+
# font descriptors : 6.8.4
40+
COLOR_SPACE = "/ColorSpace" # Chapter 6.8.5
41+
XOBJECT = "/XObject" # Chapter 6.8.6
42+
43+
44+
class StreamAttributes:
45+
"""Table 4.2"""
46+
47+
LENGTH = "/Length" # integer, required
48+
FILTER = "/Filter" # name or array of names, optional
49+
DECODE_PARMS = "/DecodeParms" # variable, optional -- 'decodeParams is wrong
50+
51+
52+
class FilterTypes:
53+
"""
54+
Table 4.3 of the 1.4 Manual
55+
56+
Page 354 of the 1.7 Manual
57+
"""
58+
59+
ASCII_HEX_DECODE = "/ASCIIHexDecode" # abbreviation: AHx
60+
ASCII_85_DECODE = "/ASCII85Decode" # abbreviation: A85
61+
LZW_DECODE = "/LZWDecode" # abbreviation: LZW
62+
FLATE_DECODE = "/FlateDecode" # abbreviation: Fl, PDF 1.2
63+
RUN_LENGTH_DECODE = "/RunLengthDecode" # abbreviation: RL
64+
CCITT_FAX_DECODE = "/CCITTFaxDecode" # abbreviation: CCF
65+
DCT_DECODE = "/DCTDecode" # abbreviation: DCT
66+
67+
68+
class FilterTypeAbbreviations:
69+
"""
70+
Table 4.44 of the 1.7 Manual (page 353ff)
71+
"""
72+
73+
AHx = "/AHx"
74+
A85 = "/A85"
75+
LZW = "/LZW"
76+
FL = "/Fl" # FlateDecode
77+
RL = "/RL"
78+
CCF = "/CCF"
79+
DCT = "/DCT"
80+
81+
82+
class LzwFilterParameters:
83+
"""Table 4.4"""
84+
85+
PREDICTOR = "/Predictor" # integer
86+
COLUMNS = "/Columns" # integer
87+
COLORS = "/Colors" # integer
88+
BITS_PER_COMPONENT = "/BitsPerComponent" # integer
89+
EARLY_CHANGE = "/EarlyChange" # integer
90+
91+
92+
class CcittFaxDecodeParameters:
93+
"""Table 4.5"""
94+
95+
K = "/K" # integer
96+
END_OF_LINE = "/EndOfLine" # boolean
97+
ENCODED_BYTE_ALIGN = "/EncodedByteAlign" # boolean
98+
COLUMNS = "/Columns" # integer
99+
ROWS = "/Rows" # integer
100+
END_OF_BLOCK = "/EndOfBlock" # boolean
101+
BLACK_IS_1 = "/BlackIs1" # boolean
102+
DAMAGED_ROWS_BEFORE_ERROR = "/DamagedRowsBeforeError" # integer
103+
104+
105+
class ImageAttributes:
106+
"""Table 6.20."""
107+
108+
TYPE = "/Type" # name, required; must be /XObject
109+
SUBTYPE = "/Subtype" # name, required; must be /Image
110+
NAME = "/Name" # name, required
111+
WIDTH = "/Width" # integer, required
112+
HEIGHT = "/Height" # integer, required
113+
BITS_PER_COMPONENT = "/BitsPerComponent" # integer, required
114+
COLOR_SPACE = "/ColorSpace" # name, required
115+
DECODE = "/Decode" # array, optional
116+
INTERPOLATE = "/Interpolate" # boolean, optional
117+
IMAGE_MASK = "/ImageMask" # boolean, optional
118+
119+
120+
class ColorSpaces:
121+
DEVICE_RGB = "/DeviceRGB"
122+
DEVICE_CMYK = "/DeviceCMYK"
123+
DEVICE_GRAY = "/DeviceGray"
124+
125+
126+
class TypArguments:
127+
"""Table 8.2 of the PDF 1.7 reference"""
128+
129+
LEFT = "/Left"
130+
RIGHT = "/Right"
131+
BOTTOM = "/Bottom"
132+
TOP = "/Top"
133+
134+
135+
class TypFitArguments:
136+
"""Table 8.2 of the PDF 1.7 reference"""
137+
138+
FIT = "/Fit"
139+
FIT_V = "/FitV"
140+
FIT_BV = "/FitBV"
141+
FIT_B = "/FitB"
142+
FIT_H = "/FitH"
143+
FIT_BH = "/FitBH"
144+
FIT_R = "/FitR"
145+
146+
147+
class PageLayouts:
148+
"""Page 84, PDF 1.4 reference"""
149+
150+
SINGLE_PAGE = "/SinglePage"
151+
ONE_COLUMN = "/OneColumn"
152+
TWO_COLUMN_LEFT = "/TwoColumnLeft"
153+
TWO_COLUMN_RIGHT = "/TwoColumnRight"
154+
155+
156+
class GraphicsStateParameters:
157+
"""Table 4.8 of the 1.7 reference"""
158+
159+
TYPE = "/Type" # name, optional
160+
LW = "/LW" # number, optional
161+
# TODO: Many more!
162+
FONT = "/Font" # array, optional
163+
S_MASK = "/SMask" # dictionary or name, optional
164+
165+
166+
class CatalogDictionary:
167+
"""Table 3.25 in the 1.7 reference"""
168+
169+
TYPE = "/Type" # name, required; must be /Catalog
170+
# TODO: Many more!
171+
172+
173+
PDF_KEYS = [
174+
PagesAttributes,
175+
PageAttributes,
176+
Ressources,
177+
ImageAttributes,
178+
StreamAttributes,
179+
FilterTypes,
180+
LzwFilterParameters,
181+
TypArguments,
182+
TypFitArguments,
183+
PageLayouts,
184+
GraphicsStateParameters,
185+
CatalogDictionary,
186+
]

PyPDF2/filters.py

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,23 @@
3131
__author_email__ = "biziqe@mathieu.fenniak.net"
3232

3333
import math
34+
from sys import version_info
35+
36+
from PyPDF2.constants import CcittFaxDecodeParameters as CCITT
37+
from PyPDF2.constants import ColorSpaces
38+
from PyPDF2.constants import FilterTypeAbbreviations as FTA
39+
from PyPDF2.constants import FilterTypes as FT
40+
from PyPDF2.constants import ImageAttributes as IA
41+
from PyPDF2.constants import LzwFilterParameters as LZW
42+
from PyPDF2.constants import StreamAttributes as SA
3443

3544
from .utils import PdfReadError, ord_, paethPredictor
36-
from sys import version_info
45+
3746
if version_info < ( 3, 0 ):
3847
from cStringIO import StringIO
3948
else:
4049
from io import StringIO
50+
4151
import struct
4252

4353
try:
@@ -110,13 +120,13 @@ def decode(data, decodeParms):
110120
predictor = 1
111121
if decodeParms:
112122
try:
113-
predictor = decodeParms.get("/Predictor", 1)
123+
predictor = decodeParms.get(LZW.PREDICTOR, 1)
114124
except AttributeError:
115125
pass # usually an array with a null object was read
116126

117127
# predictor 1 == no predictor
118128
if predictor != 1:
119-
columns = decodeParms["/Columns"]
129+
columns = decodeParms[LZW.COLUMNS]
120130
# PNG prediction:
121131
if predictor >= 10 and predictor <= 15:
122132
output = StringIO()
@@ -261,7 +271,7 @@ def decode(self):
261271
return baos
262272

263273
@staticmethod
264-
def decode(data,decodeParams=None):
274+
def decode(data, decodeParms=None):
265275
return LZWDecode.decoder(data).decode()
266276

267277

@@ -363,7 +373,7 @@ def decode(data, decodeParms=None, height=0):
363373
else:
364374
CCITTgroup = 3
365375

366-
width = decodeParms["/Columns"]
376+
width = decodeParms[CCITT.COLUMNS]
367377
imgSize = len(data)
368378
tiff_header_struct = '<2shlh' + 'hhll' * 8 + 'h'
369379
tiffHeader = struct.pack(tiff_header_struct,
@@ -388,7 +398,7 @@ def decode(data, decodeParms=None, height=0):
388398

389399
def decodeStreamData(stream):
390400
from .generic import NameObject
391-
filters = stream.get("/Filter", ())
401+
filters = stream.get(SA.FILTER, ())
392402

393403
if len(filters) and not isinstance(filters[0], NameObject):
394404
# we have a single filter instance
@@ -397,24 +407,24 @@ def decodeStreamData(stream):
397407
# If there is not data to decode we should not try to decode the data.
398408
if data:
399409
for filterType in filters:
400-
if filterType == "/FlateDecode" or filterType == "/Fl":
401-
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
402-
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
410+
if filterType == FT.FLATE_DECODE or filterType == FTA.FL:
411+
data = FlateDecode.decode(data, stream.get(SA.DECODE_PARMS))
412+
elif filterType == FT.ASCII_HEX_DECODE or filterType == FTA.AHx:
403413
data = ASCIIHexDecode.decode(data)
404-
elif filterType == "/LZWDecode" or filterType == "/LZW":
405-
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
406-
elif filterType == "/ASCII85Decode" or filterType == "/A85":
414+
elif filterType == FT.LZW_DECODE or filterType == FTA.LZW:
415+
data = LZWDecode.decode(data, stream.get(SA.DECODE_PARMS))
416+
elif filterType == FT.ASCII_85_DECODE or filterType == FTA.A85:
407417
data = ASCII85Decode.decode(data)
408-
elif filterType == "/DCTDecode":
418+
elif filterType == FT.DCT_DECODE:
409419
data = DCTDecode.decode(data)
410420
elif filterType == "/JPXDecode":
411421
data = JPXDecode.decode(data)
412-
elif filterType == "/CCITTFaxDecode":
413-
height = stream.get("/Height", ())
414-
data = CCITTFaxDecode.decode(data, stream.get("/DecodeParms"), height)
422+
elif filterType == FT.CCITT_FAX_DECODE:
423+
height = stream.get(IA.HEIGHT, ())
424+
data = CCITTFaxDecode.decode(data, stream.get(SA.DECODE_PARMS), height)
415425
elif filterType == "/Crypt":
416-
decodeParams = stream.get("/DecodeParams", {})
417-
if "/Name" not in decodeParams and "/Type" not in decodeParams:
426+
decodeParms = stream.get(SA.DECODE_PARMS, {})
427+
if "/Name" not in decodeParms and "/Type" not in decodeParms:
418428
pass
419429
else:
420430
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
@@ -434,34 +444,37 @@ def _xobj_to_image(x_object_obj):
434444
:return: Tuple[file extension, bytes]
435445
"""
436446
import io
447+
437448
from PIL import Image
438449

439-
size = (x_object_obj["/Width"], x_object_obj["/Height"])
450+
from PyPDF2.constants import GraphicsStateParameters as G
451+
452+
size = (x_object_obj[IA.WIDTH], x_object_obj[IA.HEIGHT])
440453
data = x_object_obj.getData()
441-
if x_object_obj["/ColorSpace"] == "/DeviceRGB":
454+
if x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB:
442455
mode = "RGB"
443456
else:
444457
mode = "P"
445458
extension = None
446-
if "/Filter" in x_object_obj:
447-
if x_object_obj["/Filter"] == "/FlateDecode":
459+
if SA.FILTER in x_object_obj:
460+
if x_object_obj[SA.FILTER] == FT.FLATE_DECODE:
448461
extension = ".png"
449462
img = Image.frombytes(mode, size, data)
450-
if "/SMask" in x_object_obj: # add alpha channel
451-
alpha = Image.frombytes("L", size, x_object_obj["/SMask"].getData())
463+
if G.S_MASK in x_object_obj: # add alpha channel
464+
alpha = Image.frombytes("L", size, x_object_obj[G.S_MASK].getData())
452465
img.putalpha(alpha)
453466
img_byte_arr = io.BytesIO()
454467
img.save(img_byte_arr, format="PNG")
455468
data = img_byte_arr.getvalue()
456-
elif x_object_obj["/Filter"] in (["/LZWDecode"], ['/ASCII85Decode'], ['/CCITTFaxDecode']):
469+
elif x_object_obj[SA.FILTER] in ([FT.LZW_DECODE], [FT.ASCII_85_DECODE], [FT.CCITT_FAX_DECODE]):
457470
from PyPDF2.utils import b_
458471
extension = ".png"
459472
data = b_(data)
460-
elif x_object_obj["/Filter"] == "/DCTDecode":
473+
elif x_object_obj[SA.FILTER] == FT.DCT_DECODE:
461474
extension = ".jpg"
462-
elif x_object_obj["/Filter"] == "/JPXDecode":
475+
elif x_object_obj[SA.FILTER] == "/JPXDecode":
463476
extension = ".jp2"
464-
elif x_object_obj["/Filter"] == "/CCITTFaxDecode":
477+
elif x_object_obj[SA.FILTER] == FT.CCITT_FAX_DECODE:
465478
extension = ".tiff"
466479
else:
467480
extension = ".png"

0 commit comments

Comments
 (0)