From 87aafd6d2b932f0597f3f03c916c52ce10bc6901 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 16 Apr 2022 09:08:02 +0200 Subject: [PATCH] DOC: Working with annotations (#764) See #107 --- docs/index.rst | 17 ++++--- docs/user/adding-pdf-annotations.md | 16 +++++++ docs/user/reading-pdf-annotations.md | 67 ++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 docs/user/adding-pdf-annotations.md create mode 100644 docs/user/reading-pdf-annotations.md diff --git a/docs/index.rst b/docs/index.rst index 1b339516c..e5cdcd238 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,13 +25,8 @@ You can contribute to `PyPDF2 on Github `_. user/merging-pdfs user/cropping-and-transforming user/add-watermark - - -.. toctree:: - :caption: Scripts - :maxdepth: 1 - - user/pdfcat + user/reading-pdf-annotations + user/adding-pdf-annotations .. toctree:: @@ -59,6 +54,14 @@ You can contribute to `PyPDF2 on Github `_. user/faq +.. toctree:: + :caption: Scripts + :maxdepth: 1 + + user/pdfcat + + + Indices and tables ================== diff --git a/docs/user/adding-pdf-annotations.md b/docs/user/adding-pdf-annotations.md new file mode 100644 index 000000000..215dcfdc7 --- /dev/null +++ b/docs/user/adding-pdf-annotations.md @@ -0,0 +1,16 @@ +# Adding PDF Annotations + +## Attachments + +```python +from PyPDF2 import PdfFileWriter + +writer = PdfFileWriter() +writer.addBlankPage(width=200, height=200) + +data = b"any bytes - typically read from a file" +writer.addAttachment("smile.png", data) + +with open("output.pdf", "wb") as output_stream: + writer.write(output_stream) +``` diff --git a/docs/user/reading-pdf-annotations.md b/docs/user/reading-pdf-annotations.md new file mode 100644 index 000000000..e84abd0a1 --- /dev/null +++ b/docs/user/reading-pdf-annotations.md @@ -0,0 +1,67 @@ +# Reading PDF Annotations + +PDF 1.7 defines 25 different annotation types: + +* Text +* Link +* FreeText +* Line, Square, Circle, Polygon, PolyLine, Highlight, Underline, Squiggly, StrikeOut +* Stamp, Caret, Ink +* Popup +* FileAttachment +* Sound, Movie +* Widget, Screen +* PrinterMark +* TrapNet +* Watermark +* 3D + +Reading the most common ones is described here. + +## Text + +```python +from PyPDF2 import PdfFileReader + +reader = PdfFileReader("example.pdf") + +for page in reader.pages: + if "/Annots" in page: + for annot in page["/Annots"]: + subtype = annot.getObject()["/Subtype"] + if subtype == "/Text": + print(annot.getObject()["/Contents"]) +``` + +## Highlights + +```python +from PyPDF2 import PdfFileReader + +reader = PdfFileReader("commented.pdf") + +for page in reader.pages: + if "/Annots" in page: + for annot in page["/Annots"]: + subtype = annot.getObject()["/Subtype"] + if subtype == "/Highlight": + coords = annot.getObject()["/QuadPoints"] + x1, y1, x2, y2, x3, y3, x4, y4 = coords +``` + +## Attachments + +```python +from PyPDF2 import PdfFileReader + +reader = PdfFileReader("example.pdf") + +attachments = {} +for page in reader.pages: + if "/Annots" in page: + for annotation in page["/Annots"]: + subtype = annot.getObject()["/Subtype"] + if subtype == "/FileAttachment": + fileobj = annotobj["/FS"] + attachments[fileobj["/F"]] = fileobj["/EF"]["/F"].getData() +```