Description
Trying to gather all PDF annotations in a PDF file with 479 pages.
Open the PDF file
pdf_file = open('some-file.pdf', 'rb')
Create a PDF viewer
viewer = SimplePDFViewer(pdf_file)
Loop through all pages in the PDF file
for canvas in viewer:
# etc
And getting the following error:
File "path\to\extract-comments.py", line 15, in
for canvas in viewer:
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\viewer\pdfviewer.py", line 27, in next
self.viewer.render()
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\viewer\simple.py", line 204, in render
super(SimplePDFViewer, self).render()
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\viewer\pdfviewer.py", line 119, in render
self.notify(obj)
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\viewer\pdfviewer.py", line 110, in notify
handler(obj)
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\viewer\simple.py", line 140, in on_TJ
s = self.decode_string(arr[i])
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\viewer\simple.py", line 100, in decode_string
s = self.decoder.decode_string(s)
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\codecs\decoder.py", line 146, in decode_string
return self.decode_hexstring(s_hex)
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\codecs\decoder.py", line 125, in decode_hexstring
ch = self.cmap.bf_ranges[code]
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\types\cmap.py", line 323, in getitem
return r[item]
File "C:\Users\dccarvalho\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pdfreader\types\cmap.py", line 85, in getitem
return chr(self.map_to_start + (HexString(item).as_int - self.int_begin))
OverflowError: Python int too large to convert to C int