Skip to content

Commit

Permalink
DEV: Add Benchmark for Performance Testing (#781)
Browse files Browse the repository at this point in the history
We want to track performance over time only for what actually
is in main.

Closes #761
  • Loading branch information
MartinThoma authored Apr 21, 2022
1 parent 2f01f77 commit f0f1fa3
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 1 deletion.
48 changes: 48 additions & 0 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: Benchmarking PyPDF2
on:
push:
branches:
- main

permissions:
contents: write
deployments: write

jobs:
benchmark:
name: Run pytest-benchmark
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install requirements (Python 3)
if: matrix.python-version != '2.7'
run: |
pip install -r requirements/ci.txt
- name: Install PyPDF2
run: |
pip install .
- name: Run benchmark
run: |
pytest Tests/bench.py --benchmark-json output.json
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@v1
with:
name: Python Benchmark with pytest-benchmark
tool: 'pytest'
output-file-path: output.json
# Use personal access token instead of GITHUB_TOKEN due to https://github.community/t/github-action-not-triggering-gh-pages-upon-push/16096
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true
# Show alert with commit comment on detecting possible performance regression
alert-threshold: '200%'
comment-on-alert: true
fail-on-alert: true
alert-comment-cc-users: '@MartinThoma'
104 changes: 104 additions & 0 deletions Tests/bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import os

import PyPDF2
from PyPDF2 import PdfFileReader
from PyPDF2.generic import Destination

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")


def page_ops(pdf_path, password):
pdf_path = os.path.join(RESOURCE_ROOT, pdf_path)

reader = PdfFileReader(pdf_path)

if password:
reader.decrypt(password)

page = reader.pages[0]
page.mergeRotatedScaledPage(page, 90, 1, 1)
page.mergeScaledTranslatedPage(page, 1, 1, 1)
page.mergeRotatedScaledTranslatedPage(page, 90, 1, 1, 1, 1)
page.addTransformation([1, 0, 0, 0, 0, 0])
page.scale(2, 2)
page.scaleBy(0.5)
page.scaleTo(100, 100)
page.compressContentStreams()
page.extractText()


def test_page_operations(benchmark):
"""
Apply various page operations.
Rotation, scaling, translation, content stream compression, text extraction
"""
benchmark(page_ops, "libreoffice-writer-password.pdf", "openpassword")


def merge():
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf")
pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")

file_merger = PyPDF2.PdfFileMerger()

# string path:
file_merger.append(pdf_path)
file_merger.append(outline)
file_merger.append(pdf_path, pages=PyPDF2.pagerange.PageRange(slice(0, 0)))
file_merger.append(pdf_forms)

# Merging an encrypted file
pdfr = PyPDF2.PdfFileReader(pdf_pw)
pdfr.decrypt("openpassword")
file_merger.append(pdfr)

# PdfFileReader object:
file_merger.append(PyPDF2.PdfFileReader(pdf_path, "rb"), bookmark=True)

# File handle
with open(pdf_path, "rb") as fh:
file_merger.append(fh)

bookmark = file_merger.addBookmark("A bookmark", 0)
file_merger.addBookmark("deeper", 0, parent=bookmark)
file_merger.addMetadata({"author": "Martin Thoma"})
file_merger.addNamedDestination("title", 0)
file_merger.setPageLayout("/SinglePage")
file_merger.setPageMode("/UseThumbs")

tmp_path = "dont_commit_merged.pdf"
file_merger.write(tmp_path)
file_merger.close()

# Check if bookmarks are correct
pdfr = PyPDF2.PdfFileReader(tmp_path)
assert [el.title for el in pdfr.getOutlines() if isinstance(el, Destination)] == [
"Foo",
"Bar",
"Baz",
"Foo",
"Bar",
"Baz",
"Foo",
"Bar",
"Baz",
"True",
"A bookmark",
]

# Clean up
os.remove(tmp_path)


def test_merge(benchmark):
"""
Apply various page operations.
Rotation, scaling, translation, content stream compression, text extraction
"""
benchmark(merge)
16 changes: 16 additions & 0 deletions docs/dev/intro.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Developer Intro

PyPDF2 is a library and hence its users are developers. This document is not for
the users, but for people who want to work on PyPDF2 itself.

## Installing Requirements

```
pip install -r requirements/dev.txt
```

## Benchmarks

We need to keep an eye on performance and thus we have a few benchmarks.

See [py-pdf.github.io/PyPDF2/dev/bench](https://py-pdf.github.io/PyPDF2/dev/bench/)
5 changes: 5 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ You can contribute to `PyPDF2 on Github <https://github.com/py-pdf/PyPDF2>`_.
modules/Field
modules/PageRange

.. toctree::
:caption: PyPDF Developers
:maxdepth: 1

dev/intro

.. toctree::
:caption: About PyPDF2
Expand Down
3 changes: 2 additions & 1 deletion requirements/ci.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ flake8
flake8_implicit_str_concat
flake8-bugbear
pillow
pytest
pytest
pytest-benchmark
6 changes: 6 additions & 0 deletions requirements/ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,19 @@ pluggy==1.0.0
# via pytest
py==1.11.0
# via pytest
py-cpuinfo==8.0.0
# via pytest-benchmark
pycodestyle==2.8.0
# via flake8
pyflakes==2.4.0
# via flake8
pyparsing==3.0.7
# via packaging
pytest==7.0.1
# via
# -r requirements/ci.in
# pytest-benchmark
pytest-benchmark==3.4.1
# via -r requirements/ci.in
tomli==1.2.3
# via pytest
Expand Down

0 comments on commit f0f1fa3

Please sign in to comment.