Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: More Unit tests #831

Merged
merged 12 commits into from
Apr 28, 2022
163 changes: 95 additions & 68 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import re
import warnings
import logging
import sys

from PyPDF2.constants import FilterTypes as FT
from PyPDF2.constants import StreamAttributes as SA
Expand Down Expand Up @@ -406,7 +407,7 @@ def readStringFromStream(stream):
# line break was escaped:
tok = b_('')
else:
raise PdfReadError(r"Unexpected escaped string: %s" % tok)
raise PdfReadError(r"Unexpected escaped string: {}".format(tok.decode('utf8')))
txt += tok
return createStringObject(txt)

Expand All @@ -419,9 +420,10 @@ class ByteStringObject(utils.bytes_type, PdfObject): # type: ignore
/O) is clearly not text, but is still stored in a "String" object.
"""

# For compatibility with TextStringObject.original_bytes. This method
# self.
original_bytes = property(lambda self: self)
@property
def original_bytes(self):
"""For compatibility with TextStringObject.original_bytes."""
return self

def writeToStream(self, stream, encryption_key):
bytearr = self
Expand All @@ -443,11 +445,15 @@ class TextStringObject(utils.string_type, PdfObject): # type: ignore
autodetect_pdfdocencoding = False
autodetect_utf16 = False

# It is occasionally possible that a text string object gets created where
# a byte string object was expected due to the autodetection mechanism --
# if that occurs, this "original_bytes" property can be used to
# back-calculate what the original encoded bytes were.
original_bytes = property(lambda self: self.get_original_bytes())
@property
def original_bytes(self):
"""
It is occasionally possible that a text string object gets created where
a byte string object was expected due to the autodetection mechanism --
if that occurs, this "original_bytes" property can be used to
back-calculate what the original encoded bytes were.
"""
return self.get_original_bytes()

def get_original_bytes(self):
# We're a text string object, but the library is trying to get our raw
Expand Down Expand Up @@ -555,11 +561,15 @@ def getXmpMetadata(self):
self[NameObject("/Metadata")] = metadata
return metadata

# Read-only property that accesses the {@link
# #DictionaryObject.getXmpData getXmpData} function.
# <p>
# Stability: Added in v1.12, will exist for all future v1.x releases.
xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
@property
def xmpMetadata(self):
"""
Read-only property that accesses the {@link
#DictionaryObject.getXmpData getXmpData} function.
<p>
Stability: Added in v1.12, will exist for all future v1.x releases.
"""
return self.getXmpMetadata()

def writeToStream(self, stream, encryption_key):
stream.write(b_("<<\n"))
Expand Down Expand Up @@ -615,13 +625,15 @@ def readFromStream(stream, pdf):
# patch provided by Danial Sandler
while eol == b_(' '):
eol = stream.read(1)
assert eol in (b_("\n"), b_("\r"))
if eol not in (b_("\n"), b_("\r")):
raise PdfStreamError("Stream data must be followed by a newline")
if eol == b_("\r"):
# read \n after
if stream.read(1) != b_('\n'):
stream.seek(-1, 1)
# this is a stream object, not a dictionary
assert SA.LENGTH in data
if SA.LENGTH not in data:
raise PdfStreamError("Stream length not defined")
length = data[SA.LENGTH]
if isinstance(length, IndirectObject):
t = stream.tell()
Expand Down Expand Up @@ -668,13 +680,19 @@ def __iter__(self):

def children(self):
if not self.hasChildren():
raise StopIteration
if sys.version_info >= (3, 5): # PEP 479
return
else:
raise StopIteration

child = self['/First']
while True:
yield child
if child == self['/Last']:
raise StopIteration
if sys.version_info >= (3, 5): # PEP 479
return
else:
raise StopIteration
child = child['/Next']

def addChild(self, child, pdf):
Expand Down Expand Up @@ -979,61 +997,70 @@ def __init__(self, data):
except KeyError:
pass

fieldType = property(lambda self: self.get("/FT"))
"""
Read-only property accessing the type of this field.
"""

parent = property(lambda self: self.get("/Parent"))
"""
Read-only property accessing the parent of this field.
"""

kids = property(lambda self: self.get("/Kids"))
"""
Read-only property accessing the kids of this field.
"""

name = property(lambda self: self.get("/T"))
"""
Read-only property accessing the name of this field.
"""

altName = property(lambda self: self.get("/TU"))
"""
Read-only property accessing the alternate name of this field.
"""
@property
def fieldType(self):
"""Read-only property accessing the type of this field."""
return self.get("/FT")

@property
def parent(self):
"""Read-only property accessing the parent of this field."""
return self.get("/Parent")

@property
def kids(self):
"""Read-only property accessing the kids of this field."""
return self.get("/Kids")

@property
def name(self):
"""Read-only property accessing the name of this field."""
return self.get("/T")

@property
def altName(self):
"""Read-only property accessing the alternate name of this field."""
return self.get("/TU")

@property
def mappingName(self):
"""
Read-only property accessing the mapping name of this field. This
name is used by PyPDF2 as a key in the dictionary returned by
:meth:`getFields()<PyPDF2.PdfFileReader.getFields>`
"""
return self.get("/TM")

mappingName = property(lambda self: self.get("/TM"))
"""
Read-only property accessing the mapping name of this field. This
name is used by PyPDF2 as a key in the dictionary returned by
:meth:`getFields()<PyPDF2.PdfFileReader.getFields>`
"""
@property
def flags(self):
"""
Read-only property accessing the field flags, specifying various
characteristics of the field (see Table 8.70 of the PDF 1.7 reference).
"""
return self.get("/Ff")

flags = property(lambda self: self.get("/Ff"))
"""
Read-only property accessing the field flags, specifying various
characteristics of the field (see Table 8.70 of the PDF 1.7 reference).
"""
@property
def value(self):
"""
Read-only property accessing the value of this field. Format
varies based on field type.
"""
return self.get("/V")

value = property(lambda self: self.get("/V"))
"""
Read-only property accessing the value of this field. Format
varies based on field type.
"""
@property
def defaultValue(self):
"""Read-only property accessing the default value of this field."""
return self.get("/DV")

defaultValue = property(lambda self: self.get("/DV"))
"""
Read-only property accessing the default value of this field.
"""

additionalActions = property(lambda self: self.get("/AA"))
"""
Read-only property accessing the additional actions dictionary.
This dictionary defines the field's behavior in response to trigger events.
See Section 8.5.2 of the PDF 1.7 reference.
"""
@property
def additionalActions(self):
"""
Read-only property accessing the additional actions dictionary.
This dictionary defines the field's behavior in response to trigger events.
See Section 8.5.2 of the PDF 1.7 reference.
"""
self.get("/AA")


class Destination(TreeObject):
Expand Down
39 changes: 20 additions & 19 deletions PyPDF2/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1858,12 +1858,12 @@ def read(self, stream):
self._read_standard_xref_table(stream)
readNonWhitespace(stream)
stream.seek(-1, 1)
newTrailer = readObject(stream, self)
for key, value in list(newTrailer.items()):
new_trailer = readObject(stream, self)
for key, value in list(new_trailer.items()):
if key not in self.trailer:
self.trailer[key] = value
if "/Prev" in newTrailer:
startxref = newTrailer["/Prev"]
if "/Prev" in new_trailer:
startxref = new_trailer["/Prev"]
else:
break
elif xref_issue_nr:
Expand Down Expand Up @@ -1929,7 +1929,8 @@ def read(self, stream):
if pid == id - self.xrefIndex:
self._zeroXref(gen)
break
# if not, then either it's just plain wrong, or the non-zero-index is actually correct
# if not, then either it's just plain wrong, or the
# non-zero-index is actually correct
stream.seek(loc, 0) # return to where it was

def _read_standard_xref_table(self, stream):
Expand Down Expand Up @@ -2010,21 +2011,21 @@ def _read_pdf15_xref_stream(self, stream):
xrefstream = readObject(stream, self)
assert xrefstream["/Type"] == "/XRef"
self.cacheIndirectObject(generation, idnum, xrefstream)
streamData = BytesIO(b_(xrefstream.getData()))
stream_data = BytesIO(b_(xrefstream.getData()))
# Index pairs specify the subsections in the dictionary. If
# none create one subsection that spans everything.
idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
entrySizes = xrefstream.get("/W")
assert len(entrySizes) >= 3
if self.strict and len(entrySizes) > 3:
raise PdfReadError("Too many entry sizes: %s" % entrySizes)
entry_sizes = xrefstream.get("/W")
assert len(entry_sizes) >= 3
if self.strict and len(entry_sizes) > 3:
raise PdfReadError("Too many entry sizes: %s" % entry_sizes)

def getEntry(i):
def get_entry(i):
# Reads the correct number of bytes for each entry. See the
# discussion of the W parameter in PDF spec table 17.
if entrySizes[i] > 0:
d = streamData.read(entrySizes[i])
return convertToInt(d, entrySizes[i])
if entry_sizes[i] > 0:
d = stream_data.read(entry_sizes[i])
return convertToInt(d, entry_sizes[i])

# PDF Spec Table 17: A value of zero for an element in the
# W array indicates...the default value shall be used
Expand All @@ -2038,7 +2039,7 @@ def used_before(num, generation):
return num in self.xref.get(generation, []) or num in self.xref_objStm

# Iterate through each subsection
self._read_xref_subsections(idx_pairs, getEntry, used_before)
self._read_xref_subsections(idx_pairs, get_entry, used_before)
return xrefstream

@staticmethod
Expand Down Expand Up @@ -2078,13 +2079,13 @@ def _rebuild_xref_table(self, stream):
if generation not in self.xref:
self.xref[generation] = {}
self.xref[generation][idnum] = m.start(1)
trailerPos = f_.rfind(b"trailer") - len(f_) + 7
stream.seek(trailerPos, 2)
trailer_pos = f_.rfind(b"trailer") - len(f_) + 7
stream.seek(trailer_pos, 2)
# code below duplicated
readNonWhitespace(stream)
stream.seek(-1, 1)
newTrailer = readObject(stream, self)
for key, value in list(newTrailer.items()):
new_trailer = readObject(stream, self)
for key, value in list(new_trailer.items()):
if key not in self.trailer:
self.trailer[key] = value

Expand Down
Loading