Skip to content

Commit

Permalink
Merge branch 'master' into practical-k-means
Browse files Browse the repository at this point in the history
  • Loading branch information
somacdivad authored May 21, 2020
2 parents 99c19c6 + 4110820 commit 81e47f1
Show file tree
Hide file tree
Showing 22 changed files with 415 additions and 0 deletions.
12 changes: 12 additions & 0 deletions creating-and-modifying-pdfs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Creating and Modifying PDFs With Python

This folder contains resources and materials for Real Python's [Creating and Modifying PDFs With Python](https://realpython.com/creating-modifying-pdf/) tutorial.

There are two subfolders in this folder:

1. **`practice_files/`:** Contains the sample PDFs used in the chapter
2. **`source_code/`:** Contains source code from the chapter

The source code files are organized by section of the article, and the start of each subsection is indicated with comments.

The content of the companion tutorial was adapted from the "Creating and Modifying PDF Files" chapter of the book [*Python Basics: A Practical Introduction to Python 3*](https://realpython.com/products/python-basics-book/). If you enjoy this tutorial, check out the full book!
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# ---------------
# Open a PDF File
# ---------------

from PyPDF2 import PdfFileReader

# You might need to change this to match the path on your computer
from pathlib import Path

pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "Pride_and_Prejudice.pdf"
)

pdf = PdfFileReader(str(pdf_path))

print(pdf.getNumPages())

print(pdf.documentInfo)

print(pdf.documentInfo.title)


# ---------------------------
# Extracting Text From a Page
# ---------------------------

first_page = pdf.getPage(0)

print(type(first_page))

print(first_page.extractText())

for page in pdf.pages:
print(page.extractText())


# -----------------------
# Putting It All Together
# -----------------------

from pathlib import Path # noqa
from PyPDF2 import PdfFileReader # noqa

# Change the path below to the correct path for your computer.
pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice-files"
/ "Pride_and_Prejudice.pdf"
)

pdf_reader = PdfFileReader(str(pdf_path))
output_file_path = Path.home() / "Pride_and_Prejudice.txt"

with output_file_path.open(mode="w") as output_file:
title = pdf_reader.documentInfo.title
num_pages = pdf_reader.getNumPages()
output_file.write(f"{title}\\nNumber of pages: {num_pages}\\n\\n")

for page in pdf_reader.pages:
text = page.extractText()
output_file.write(text)
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# -----------------------------
# Using the PdfFileWriter Class
# -----------------------------

from PyPDF2 import PdfFileWriter

pdf_writer = PdfFileWriter()

page = pdf_writer.addBlankPage(width=72, height=72)

print(type(page))

from pathlib import Path # noqa

with Path("blank.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)


# -----------------------------------
# Extracting a Single Page From a PDF
# -----------------------------------

from pathlib import Path # noqa
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa

# Change the path to work on your computer if necessary
pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "Pride_and_Prejudice.pdf"
)
input_pdf = PdfFileReader(str(pdf_path))

first_page = input_pdf.getPage(0)

pdf_writer = PdfFileWriter()
pdf_writer.addPage(first_page)

with Path("first_page.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)


# ------------------------------------
# Extracting Multiple Pages From a PDF
# ------------------------------------

from PyPDF2 import PdfFileReader, PdfFileWriter # noqa
from pathlib import Path # noqa

pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "Pride_and_Prejudice.pdf"
)
input_pdf = PdfFileReader(str(pdf_path))

pdf_writer = PdfFileWriter()
for n in range(1, 4):
page = input_pdf.getPage(n)
pdf_writer.addPage(page)

print(pdf_writer.getNumPages())

pdf_writer = PdfFileWriter()

for page in input_pdf.pages[1:4]:
pdf_writer.addPage(page)

with Path("chapter1_slice.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# -----------------------------
# Using the PdfFileMerger Class
# -----------------------------

from PyPDF2 import PdfFileMerger

pdf_merger = PdfFileMerger()

# ---------------------------------
# Concatenating PDFs With .append()
# ---------------------------------

from pathlib import Path # noqa

reports_dir = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "expense_reports"
)

for path in reports_dir.glob("*.pdf"):
print(path.name)

expense_reports = list(reports_dir.glob("*.pdf"))
expense_reports.sort()

for path in expense_reports:
print(path.name)

for path in expense_reports:
pdf_merger.append(str(path))

with Path("expense_reports.pdf").open(mode="wb") as output_file:
pdf_merger.write(output_file)


# --------------------------
# Merging PDFs With .merge()
# --------------------------

from pathlib import Path # noqa
from PyPDF2 import PdfFileMerger # noqa

report_dir = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "quarterly_report"
)

report_path = report_dir / "report.pdf"
toc_path = report_dir / "toc.pdf"

pdf_merger = PdfFileMerger()
pdf_merger.append(str(report_path))

pdf_merger.merge(1, str(toc_path))

with Path("full_report.pdf").open(mode="wb") as output_file:
pdf_merger.write(output_file)
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# --------------
# Rotating Pages
# --------------

from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter

pdf_path = (
Path.home() / "creating-and-modifying-pdfs" / "practice_files" / "ugly.pdf"
)

pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()

for n in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(n)
if n % 2 == 0:
page.rotateClockwise(90)
pdf_writer.addPage(page)

with Path("ugly_rotated.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)

pdf_reader = PdfFileReader(str(pdf_path))

print(pdf_reader.getPage(0))

page = pdf_reader.getPage(0)
print(page["/Rotate"])

page = pdf_reader.getPage(1)
print(page["/Rotate"])

page = pdf_reader.getPage(0)
print(page["/Rotate"])

page.rotateClockwise(90)
print(page["/Rotate"])

pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()

for page in pdf_reader.pages:
if page["/Rotate"] == -90:
page.rotateClockwise(90)
pdf_writer.addPage(page)

with Path("ugly_rotated2.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)


# --------------
# Cropping Pages
# --------------

from pathlib import Path # noqa
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa

pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "half_and_half.pdf"
)

pdf_reader = PdfFileReader(str(pdf_path))
first_page = pdf_reader.getPage(0)

print(first_page.mediaBox)
print(first_page.mediaBox.lowerLeft)
print(first_page.mediaBox.lowerRight)
print(first_page.mediaBox.upperLeft)
print(first_page.mediaBox.upperRight)
print(first_page.mediaBox.upperRight[0])
print(first_page.mediaBox.upperRight[1])

first_page.mediaBox.upperLeft = (0, 480)
print(first_page.mediaBox.upperLeft)
print(first_page.mediaBox.upperRight)

pdf_writer = PdfFileWriter()
pdf_writer.addPage(first_page)
with Path("cropped_page.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)

pdf_reader = PdfFileReader(str(pdf_path))
pdf_writer = PdfFileWriter()

first_page = pdf_reader.getPage(0)

import copy # noqa

left_side = copy.deepcopy(first_page)
current_coords = left_side.mediaBox.upperRight
new_coords = (current_coords[0] / 2, current_coords[1])
left_side.mediaBox.upperRight = new_coords

right_side = copy.deepcopy(first_page)
right_side.mediaBox.upperLeft = new_coords

pdf_writer.addPage(left_side)
pdf_writer.addPage(right_side)
with Path("cropped_pages.pdf").open(mode="wb") as output_file:
pdf_writer.write(output_file)
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# ---------------
# Encrypting PDFs
# ---------------

from pathlib import Path
from PyPDF2 import PdfFileReader, PdfFileWriter

pdf_path = (
Path.home()
/ "creating-and-modifying-pdfs"
/ "practice_files"
/ "newsletter.pdf"
)

pdf_reader = PdfFileReader(str(pdf_path))

pdf_writer = PdfFileWriter()
pdf_writer.appendPagesFromReader(pdf_reader)

pdf_writer.encrypt(user_pwd="SuperSecret")

output_path = Path.home() / "newsletter_protected.pdf"
with output_path.open(mode="wb") as output_file:
pdf_writer.write(output_file)

user_pwd = "SuperSecret"
owner_pwd = "ReallySuperSecret"
pdf_writer.encrypt(user_pwd=user_pwd, owner_pwd=owner_pwd)


# ---------------
# Decrypting PDFs
# ---------------

from pathlib import Path # noqa
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa

pdf_path = Path.home() / "newsletter_protected.pdf"

pdf_reader = PdfFileReader(str(pdf_path))

print(pdf_reader.getPage(0)) # Raises PdfReadError

print(pdf_reader.decrypt(password="SuperSecret"))

print(pdf_reader.getPage(0))
Loading

0 comments on commit 81e47f1

Please sign in to comment.