py-pdf · MartinThoma · Apr 28, 2022 · Apr 27, 2022 · Apr 27, 2022 · Apr 28, 2022
diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py
@@ -38,6 +38,7 @@
 import re
 import warnings
 import logging
+import sys
 
 from PyPDF2.constants import FilterTypes as FT
 from PyPDF2.constants import StreamAttributes as SA
@@ -406,7 +407,7 @@ def readStringFromStream(stream):
                     # line break was escaped:
                     tok = b_('')
                 else:
-                    raise PdfReadError(r"Unexpected escaped string: %s" % tok)
+                    raise PdfReadError(r"Unexpected escaped string: {}".format(tok.decode('utf8')))
         txt += tok
     return createStringObject(txt)
 
@@ -419,9 +420,10 @@ class ByteStringObject(utils.bytes_type, PdfObject):  # type: ignore
     /O) is clearly not text, but is still stored in a "String" object.
     """
 
-    # For compatibility with TextStringObject.original_bytes.  This method
-    #  self.
-    original_bytes = property(lambda self: self)
+    @property
+    def original_bytes(self):
+        """For compatibility with TextStringObject.original_bytes."""
+        return self
 
     def writeToStream(self, stream, encryption_key):
         bytearr = self
@@ -443,11 +445,15 @@ class TextStringObject(utils.string_type, PdfObject):  # type: ignore
     autodetect_pdfdocencoding = False
     autodetect_utf16 = False
 
-    # It is occasionally possible that a text string object gets created where
-    # a byte string object was expected due to the autodetection mechanism --
-    # if that occurs, this "original_bytes" property can be used to
-    # back-calculate what the original encoded bytes were.
-    original_bytes = property(lambda self: self.get_original_bytes())
+    @property
+    def original_bytes(self):
+        """
+        It is occasionally possible that a text string object gets created where
+        a byte string object was expected due to the autodetection mechanism --
+        if that occurs, this "original_bytes" property can be used to
+        back-calculate what the original encoded bytes were.
+        """
+        return self.get_original_bytes()
 
     def get_original_bytes(self):
         # We're a text string object, but the library is trying to get our raw
@@ -555,11 +561,15 @@ def getXmpMetadata(self):
             self[NameObject("/Metadata")] = metadata
         return metadata
 
-    # Read-only property that accesses the {@link
-    # #DictionaryObject.getXmpData getXmpData} function.
-    # <p>
-    # Stability: Added in v1.12, will exist for all future v1.x releases.
-    xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
+    @property
+    def xmpMetadata(self):
+        """
+        Read-only property that accesses the {@link
+        #DictionaryObject.getXmpData getXmpData} function.
+        <p>
+        Stability: Added in v1.12, will exist for all future v1.x releases.
+        """
+        return self.getXmpMetadata()
 
     def writeToStream(self, stream, encryption_key):
         stream.write(b_("<<\n"))
@@ -615,13 +625,15 @@ def readFromStream(stream, pdf):
             # patch provided by Danial Sandler
             while eol == b_(' '):
                 eol = stream.read(1)
-            assert eol in (b_("\n"), b_("\r"))
+            if eol not in (b_("\n"), b_("\r")):
+                raise PdfStreamError("Stream data must be followed by a newline")
             if eol == b_("\r"):
                 # read \n after
                 if stream.read(1)  != b_('\n'):
                     stream.seek(-1, 1)
             # this is a stream object, not a dictionary
-            assert SA.LENGTH in data
+            if SA.LENGTH not in data:
+                raise PdfStreamError("Stream length not defined")
             length = data[SA.LENGTH]
             if isinstance(length, IndirectObject):
                 t = stream.tell()
@@ -668,13 +680,19 @@ def __iter__(self):
 
     def children(self):
         if not self.hasChildren():
-            raise StopIteration
+            if sys.version_info >= (3, 5):  # PEP 479
+                return
+            else:
+                raise StopIteration
 
         child = self['/First']
         while True:
             yield child
             if child == self['/Last']:
-                raise StopIteration
+                if sys.version_info >= (3, 5):  # PEP 479
+                    return
+                else:
+                    raise StopIteration
             child = child['/Next']
 
     def addChild(self, child, pdf):
@@ -979,61 +997,70 @@ def __init__(self, data):
             except KeyError:
                 pass
 
-    fieldType = property(lambda self: self.get("/FT"))
-    """
-    Read-only property accessing the type of this field.
-    """
-
-    parent = property(lambda self: self.get("/Parent"))
-    """
-    Read-only property accessing the parent of this field.
-    """
-
-    kids = property(lambda self: self.get("/Kids"))
-    """
-    Read-only property accessing the kids of this field.
-    """
-
-    name = property(lambda self: self.get("/T"))
-    """
-    Read-only property accessing the name of this field.
-    """
-
-    altName = property(lambda self: self.get("/TU"))
-    """
-    Read-only property accessing the alternate name of this field.
-    """
+    @property
+    def fieldType(self):
+        """Read-only property accessing the type of this field."""
+        return self.get("/FT")
+
+    @property
+    def parent(self):
+        """Read-only property accessing the parent of this field."""
+        return self.get("/Parent")
+
+    @property
+    def kids(self):
+        """Read-only property accessing the kids of this field."""
+        return self.get("/Kids")
+
+    @property
+    def name(self):
+        """Read-only property accessing the name of this field."""
+        return self.get("/T")
+
+    @property
+    def altName(self):
+        """Read-only property accessing the alternate name of this field."""
+        return self.get("/TU")
+
+    @property
+    def mappingName(self):
+        """
+        Read-only property accessing the mapping name of this field. This
+        name is used by PyPDF2 as a key in the dictionary returned by
+        :meth:`getFields()<PyPDF2.PdfFileReader.getFields>`
+        """
+        return self.get("/TM")
 
-    mappingName = property(lambda self: self.get("/TM"))
-    """
-    Read-only property accessing the mapping name of this field. This
-    name is used by PyPDF2 as a key in the dictionary returned by
-    :meth:`getFields()<PyPDF2.PdfFileReader.getFields>`
-    """
+    @property
+    def flags(self):
+        """
+        Read-only property accessing the field flags, specifying various
+        characteristics of the field (see Table 8.70 of the PDF 1.7 reference).
+        """
+        return self.get("/Ff")
 
-    flags = property(lambda self: self.get("/Ff"))
-    """
-    Read-only property accessing the field flags, specifying various
-    characteristics of the field (see Table 8.70 of the PDF 1.7 reference).
-    """
+    @property
+    def value(self):
+        """
+        Read-only property accessing the value of this field. Format
+        varies based on field type.
+        """
+        return self.get("/V")
 
-    value = property(lambda self: self.get("/V"))
-    """
-    Read-only property accessing the value of this field. Format
-    varies based on field type.
-    """
+    @property
+    def defaultValue(self):
+        """Read-only property accessing the default value of this field."""
+        return self.get("/DV")
 
-    defaultValue = property(lambda self: self.get("/DV"))
-    """
-    Read-only property accessing the default value of this field.
-    """
 
-    additionalActions = property(lambda self: self.get("/AA"))
-    """
-    Read-only property accessing the additional actions dictionary.
-    This dictionary defines the field's behavior in response to trigger events.
-    See Section 8.5.2 of the PDF 1.7 reference.
-    """
+    @property
+    def additionalActions(self):
+        """
+        Read-only property accessing the additional actions dictionary.
+        This dictionary defines the field's behavior in response to trigger events.
+        See Section 8.5.2 of the PDF 1.7 reference.
+        """
+        self.get("/AA")
 
 
 class Destination(TreeObject):

diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
@@ -1858,12 +1858,12 @@ def read(self, stream):
                 self._read_standard_xref_table(stream)
                 readNonWhitespace(stream)
                 stream.seek(-1, 1)
-                newTrailer = readObject(stream, self)
-                for key, value in list(newTrailer.items()):
+                new_trailer = readObject(stream, self)
+                for key, value in list(new_trailer.items()):
                     if key not in self.trailer:
                         self.trailer[key] = value
-                if "/Prev" in newTrailer:
-                    startxref = newTrailer["/Prev"]
+                if "/Prev" in new_trailer:
+                    startxref = new_trailer["/Prev"]
                 else:
                     break
             elif xref_issue_nr:
@@ -1929,7 +1929,8 @@ def read(self, stream):
                     if pid == id - self.xrefIndex:
                         self._zeroXref(gen)
                         break
-                    # if not, then either it's just plain wrong, or the non-zero-index is actually correct
+                    # if not, then either it's just plain wrong, or the
+                    # non-zero-index is actually correct
             stream.seek(loc, 0) # return to where it was
 
     def _read_standard_xref_table(self, stream):
@@ -2010,21 +2011,21 @@ def _read_pdf15_xref_stream(self, stream):
         xrefstream = readObject(stream, self)
         assert xrefstream["/Type"] == "/XRef"
         self.cacheIndirectObject(generation, idnum, xrefstream)
-        streamData = BytesIO(b_(xrefstream.getData()))
+        stream_data = BytesIO(b_(xrefstream.getData()))
         # Index pairs specify the subsections in the dictionary. If
         # none create one subsection that spans everything.
         idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
-        entrySizes = xrefstream.get("/W")
-        assert len(entrySizes) >= 3
-        if self.strict and len(entrySizes) > 3:
-            raise PdfReadError("Too many entry sizes: %s" % entrySizes)
+        entry_sizes = xrefstream.get("/W")
+        assert len(entry_sizes) >= 3
+        if self.strict and len(entry_sizes) > 3:
+            raise PdfReadError("Too many entry sizes: %s" % entry_sizes)
 
-        def getEntry(i):
+        def get_entry(i):
             # Reads the correct number of bytes for each entry. See the
             # discussion of the W parameter in PDF spec table 17.
-            if entrySizes[i] > 0:
-                d = streamData.read(entrySizes[i])
-                return convertToInt(d, entrySizes[i])
+            if entry_sizes[i] > 0:
+                d = stream_data.read(entry_sizes[i])
+                return convertToInt(d, entry_sizes[i])
 
             # PDF Spec Table 17: A value of zero for an element in the
             # W array indicates...the default value shall be used
@@ -2038,7 +2039,7 @@ def used_before(num, generation):
             return num in self.xref.get(generation, []) or num in self.xref_objStm
 
         # Iterate through each subsection
-        self._read_xref_subsections(idx_pairs, getEntry, used_before)
+        self._read_xref_subsections(idx_pairs, get_entry, used_before)
         return xrefstream
 
     @staticmethod
@@ -2078,13 +2079,13 @@ def _rebuild_xref_table(self, stream):
             if generation not in self.xref:
                 self.xref[generation] = {}
             self.xref[generation][idnum] = m.start(1)
-        trailerPos = f_.rfind(b"trailer") - len(f_) + 7
-        stream.seek(trailerPos, 2)
+        trailer_pos = f_.rfind(b"trailer") - len(f_) + 7
+        stream.seek(trailer_pos, 2)
         # code below duplicated
         readNonWhitespace(stream)
         stream.seek(-1, 1)
-        newTrailer = readObject(stream, self)
-        for key, value in list(newTrailer.items()):
+        new_trailer = readObject(stream, self)
+        for key, value in list(new_trailer.items()):
             if key not in self.trailer:
                 self.trailer[key] = value