-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into practical-k-means
- Loading branch information
Showing
22 changed files
with
415 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Creating and Modifying PDFs With Python | ||
|
||
This folder contains resources and materials for Real Python's [Creating and Modifying PDFs With Python](https://realpython.com/creating-modifying-pdf/) tutorial. | ||
|
||
There are two subfolders in this folder: | ||
|
||
1. **`practice_files/`:** Contains the sample PDFs used in the chapter | ||
2. **`source_code/`:** Contains source code from the chapter | ||
|
||
The source code files are organized by section of the article, and the start of each subsection is indicated with comments. | ||
|
||
The content of the companion tutorial was adapted from the "Creating and Modifying PDF Files" chapter of the book [*Python Basics: A Practical Introduction to Python 3*](https://realpython.com/products/python-basics-book/). If you enjoy this tutorial, check out the full book! |
Binary file not shown.
Binary file added
BIN
+70.5 KB
creating-and-modifying-pdfs/practice_files/expense_reports/Expense report 1.pdf
Binary file not shown.
Binary file added
BIN
+70.3 KB
creating-and-modifying-pdfs/practice_files/expense_reports/Expense report 2.pdf
Binary file not shown.
Binary file added
BIN
+70.9 KB
creating-and-modifying-pdfs/practice_files/expense_reports/Expense report 3.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+1.81 MB
creating-and-modifying-pdfs/practice_files/quarterly_report/full_report.pdf
Binary file not shown.
Binary file added
BIN
+927 KB
creating-and-modifying-pdfs/practice_files/quarterly_report/report.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
65 changes: 65 additions & 0 deletions
65
creating-and-modifying-pdfs/source_code/01-extracting-text-from-a-pdf.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# --------------- | ||
# Open a PDF File | ||
# --------------- | ||
|
||
from PyPDF2 import PdfFileReader | ||
|
||
# You might need to change this to match the path on your computer | ||
from pathlib import Path | ||
|
||
pdf_path = ( | ||
Path.home() | ||
/ "creating-and-modifying-pdfs" | ||
/ "practice_files" | ||
/ "Pride_and_Prejudice.pdf" | ||
) | ||
|
||
pdf = PdfFileReader(str(pdf_path)) | ||
|
||
print(pdf.getNumPages()) | ||
|
||
print(pdf.documentInfo) | ||
|
||
print(pdf.documentInfo.title) | ||
|
||
|
||
# --------------------------- | ||
# Extracting Text From a Page | ||
# --------------------------- | ||
|
||
first_page = pdf.getPage(0) | ||
|
||
print(type(first_page)) | ||
|
||
print(first_page.extractText()) | ||
|
||
for page in pdf.pages: | ||
print(page.extractText()) | ||
|
||
|
||
# ----------------------- | ||
# Putting It All Together | ||
# ----------------------- | ||
|
||
from pathlib import Path # noqa | ||
from PyPDF2 import PdfFileReader # noqa | ||
|
||
# Change the path below to the correct path for your computer. | ||
pdf_path = ( | ||
Path.home() | ||
/ "creating-and-modifying-pdfs" | ||
/ "practice-files" | ||
/ "Pride_and_Prejudice.pdf" | ||
) | ||
|
||
pdf_reader = PdfFileReader(str(pdf_path)) | ||
output_file_path = Path.home() / "Pride_and_Prejudice.txt" | ||
|
||
with output_file_path.open(mode="w") as output_file: | ||
title = pdf_reader.documentInfo.title | ||
num_pages = pdf_reader.getNumPages() | ||
output_file.write(f"{title}\\nNumber of pages: {num_pages}\\n\\n") | ||
|
||
for page in pdf_reader.pages: | ||
text = page.extractText() | ||
output_file.write(text) |
72 changes: 72 additions & 0 deletions
72
creating-and-modifying-pdfs/source_code/02-extracting-pages-from-a-pdf.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# ----------------------------- | ||
# Using the PdfFileWriter Class | ||
# ----------------------------- | ||
|
||
from PyPDF2 import PdfFileWriter | ||
|
||
pdf_writer = PdfFileWriter() | ||
|
||
page = pdf_writer.addBlankPage(width=72, height=72) | ||
|
||
print(type(page)) | ||
|
||
from pathlib import Path # noqa | ||
|
||
with Path("blank.pdf").open(mode="wb") as output_file: | ||
pdf_writer.write(output_file) | ||
|
||
|
||
# ----------------------------------- | ||
# Extracting a Single Page From a PDF | ||
# ----------------------------------- | ||
|
||
from pathlib import Path # noqa | ||
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa | ||
|
||
# Change the path to work on your computer if necessary | ||
pdf_path = ( | ||
Path.home() | ||
/ "creating-and-modifying-pdfs" | ||
/ "practice_files" | ||
/ "Pride_and_Prejudice.pdf" | ||
) | ||
input_pdf = PdfFileReader(str(pdf_path)) | ||
|
||
first_page = input_pdf.getPage(0) | ||
|
||
pdf_writer = PdfFileWriter() | ||
pdf_writer.addPage(first_page) | ||
|
||
with Path("first_page.pdf").open(mode="wb") as output_file: | ||
pdf_writer.write(output_file) | ||
|
||
|
||
# ------------------------------------ | ||
# Extracting Multiple Pages From a PDF | ||
# ------------------------------------ | ||
|
||
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa | ||
from pathlib import Path # noqa | ||
|
||
pdf_path = ( | ||
Path.home() | ||
/ "creating-and-modifying-pdfs" | ||
/ "practice_files" | ||
/ "Pride_and_Prejudice.pdf" | ||
) | ||
input_pdf = PdfFileReader(str(pdf_path)) | ||
|
||
pdf_writer = PdfFileWriter() | ||
for n in range(1, 4): | ||
page = input_pdf.getPage(n) | ||
pdf_writer.addPage(page) | ||
|
||
print(pdf_writer.getNumPages()) | ||
|
||
pdf_writer = PdfFileWriter() | ||
|
||
for page in input_pdf.pages[1:4]: | ||
pdf_writer.addPage(page) | ||
|
||
with Path("chapter1_slice.pdf").open(mode="wb") as output_file: | ||
pdf_writer.write(output_file) |
61 changes: 61 additions & 0 deletions
61
creating-and-modifying-pdfs/source_code/03-concatenating-and-merging-pdfs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# ----------------------------- | ||
# Using the PdfFileMerger Class | ||
# ----------------------------- | ||
|
||
from PyPDF2 import PdfFileMerger | ||
|
||
pdf_merger = PdfFileMerger() | ||
|
||
# --------------------------------- | ||
# Concatenating PDFs With .append() | ||
# --------------------------------- | ||
|
||
from pathlib import Path # noqa | ||
|
||
reports_dir = ( | ||
Path.home() | ||
/ "creating-and-modifying-pdfs" | ||
/ "practice_files" | ||
/ "expense_reports" | ||
) | ||
|
||
for path in reports_dir.glob("*.pdf"): | ||
print(path.name) | ||
|
||
expense_reports = list(reports_dir.glob("*.pdf")) | ||
expense_reports.sort() | ||
|
||
for path in expense_reports: | ||
print(path.name) | ||
|
||
for path in expense_reports: | ||
pdf_merger.append(str(path)) | ||
|
||
with Path("expense_reports.pdf").open(mode="wb") as output_file: | ||
pdf_merger.write(output_file) | ||
|
||
|
||
# -------------------------- | ||
# Merging PDFs With .merge() | ||
# -------------------------- | ||
|
||
from pathlib import Path # noqa | ||
from PyPDF2 import PdfFileMerger # noqa | ||
|
||
report_dir = ( | ||
Path.home() | ||
/ "creating-and-modifying-pdfs" | ||
/ "practice_files" | ||
/ "quarterly_report" | ||
) | ||
|
||
report_path = report_dir / "report.pdf" | ||
toc_path = report_dir / "toc.pdf" | ||
|
||
pdf_merger = PdfFileMerger() | ||
pdf_merger.append(str(report_path)) | ||
|
||
pdf_merger.merge(1, str(toc_path)) | ||
|
||
with Path("full_report.pdf").open(mode="wb") as output_file: | ||
pdf_merger.write(output_file) |
104 changes: 104 additions & 0 deletions
104
creating-and-modifying-pdfs/source_code/04-rotating-and-cropping-PDF-pages.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# -------------- | ||
# Rotating Pages | ||
# -------------- | ||
|
||
from pathlib import Path | ||
from PyPDF2 import PdfFileReader, PdfFileWriter | ||
|
||
pdf_path = ( | ||
Path.home() / "creating-and-modifying-pdfs" / "practice_files" / "ugly.pdf" | ||
) | ||
|
||
pdf_reader = PdfFileReader(str(pdf_path)) | ||
pdf_writer = PdfFileWriter() | ||
|
||
for n in range(pdf_reader.getNumPages()): | ||
page = pdf_reader.getPage(n) | ||
if n % 2 == 0: | ||
page.rotateClockwise(90) | ||
pdf_writer.addPage(page) | ||
|
||
with Path("ugly_rotated.pdf").open(mode="wb") as output_file: | ||
pdf_writer.write(output_file) | ||
|
||
pdf_reader = PdfFileReader(str(pdf_path)) | ||
|
||
print(pdf_reader.getPage(0)) | ||
|
||
page = pdf_reader.getPage(0) | ||
print(page["/Rotate"]) | ||
|
||
page = pdf_reader.getPage(1) | ||
print(page["/Rotate"]) | ||
|
||
page = pdf_reader.getPage(0) | ||
print(page["/Rotate"]) | ||
|
||
page.rotateClockwise(90) | ||
print(page["/Rotate"]) | ||
|
||
pdf_reader = PdfFileReader(str(pdf_path)) | ||
pdf_writer = PdfFileWriter() | ||
|
||
for page in pdf_reader.pages: | ||
if page["/Rotate"] == -90: | ||
page.rotateClockwise(90) | ||
pdf_writer.addPage(page) | ||
|
||
with Path("ugly_rotated2.pdf").open(mode="wb") as output_file: | ||
pdf_writer.write(output_file) | ||
|
||
|
||
# -------------- | ||
# Cropping Pages | ||
# -------------- | ||
|
||
from pathlib import Path # noqa | ||
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa | ||
|
||
pdf_path = ( | ||
Path.home() | ||
/ "creating-and-modifying-pdfs" | ||
/ "practice_files" | ||
/ "half_and_half.pdf" | ||
) | ||
|
||
pdf_reader = PdfFileReader(str(pdf_path)) | ||
first_page = pdf_reader.getPage(0) | ||
|
||
print(first_page.mediaBox) | ||
print(first_page.mediaBox.lowerLeft) | ||
print(first_page.mediaBox.lowerRight) | ||
print(first_page.mediaBox.upperLeft) | ||
print(first_page.mediaBox.upperRight) | ||
print(first_page.mediaBox.upperRight[0]) | ||
print(first_page.mediaBox.upperRight[1]) | ||
|
||
first_page.mediaBox.upperLeft = (0, 480) | ||
print(first_page.mediaBox.upperLeft) | ||
print(first_page.mediaBox.upperRight) | ||
|
||
pdf_writer = PdfFileWriter() | ||
pdf_writer.addPage(first_page) | ||
with Path("cropped_page.pdf").open(mode="wb") as output_file: | ||
pdf_writer.write(output_file) | ||
|
||
pdf_reader = PdfFileReader(str(pdf_path)) | ||
pdf_writer = PdfFileWriter() | ||
|
||
first_page = pdf_reader.getPage(0) | ||
|
||
import copy # noqa | ||
|
||
left_side = copy.deepcopy(first_page) | ||
current_coords = left_side.mediaBox.upperRight | ||
new_coords = (current_coords[0] / 2, current_coords[1]) | ||
left_side.mediaBox.upperRight = new_coords | ||
|
||
right_side = copy.deepcopy(first_page) | ||
right_side.mediaBox.upperLeft = new_coords | ||
|
||
pdf_writer.addPage(left_side) | ||
pdf_writer.addPage(right_side) | ||
with Path("cropped_pages.pdf").open(mode="wb") as output_file: | ||
pdf_writer.write(output_file) |
46 changes: 46 additions & 0 deletions
46
creating-and-modifying-pdfs/source_code/05-encrypting-and-decrypting-pdfs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# --------------- | ||
# Encrypting PDFs | ||
# --------------- | ||
|
||
from pathlib import Path | ||
from PyPDF2 import PdfFileReader, PdfFileWriter | ||
|
||
pdf_path = ( | ||
Path.home() | ||
/ "creating-and-modifying-pdfs" | ||
/ "practice_files" | ||
/ "newsletter.pdf" | ||
) | ||
|
||
pdf_reader = PdfFileReader(str(pdf_path)) | ||
|
||
pdf_writer = PdfFileWriter() | ||
pdf_writer.appendPagesFromReader(pdf_reader) | ||
|
||
pdf_writer.encrypt(user_pwd="SuperSecret") | ||
|
||
output_path = Path.home() / "newsletter_protected.pdf" | ||
with output_path.open(mode="wb") as output_file: | ||
pdf_writer.write(output_file) | ||
|
||
user_pwd = "SuperSecret" | ||
owner_pwd = "ReallySuperSecret" | ||
pdf_writer.encrypt(user_pwd=user_pwd, owner_pwd=owner_pwd) | ||
|
||
|
||
# --------------- | ||
# Decrypting PDFs | ||
# --------------- | ||
|
||
from pathlib import Path # noqa | ||
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa | ||
|
||
pdf_path = Path.home() / "newsletter_protected.pdf" | ||
|
||
pdf_reader = PdfFileReader(str(pdf_path)) | ||
|
||
print(pdf_reader.getPage(0)) # Raises PdfReadError | ||
|
||
print(pdf_reader.decrypt(password="SuperSecret")) | ||
|
||
print(pdf_reader.getPage(0)) |
Oops, something went wrong.