From 963b25159a69eb3264cb7179082fe6ffce16126b Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 26 Apr 2022 17:02:57 +0200 Subject: [PATCH] TST: Use external repository for larger/more PDFs for testing (#820) * Use submodule so that the connection is clear. Ensure that Flake8 issues of the submodule don't show up here * As a first step, just try to get the number of pages from the non-encrypted PDFs * Create an "external" pytest marker which allows people to deactivate tests that need the submodule --- .github/workflows/github-ci.yaml | 4 +++- .gitmodules | 3 +++ Tests/test_page.py | 25 +++++++++++++++++++++++++ docs/dev/intro.md | 17 +++++++++++++++++ pytest.ini | 3 ++- sample-files | 1 + 6 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 .gitmodules create mode 160000 sample-files diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 6ed659f4d..7c7260c39 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -26,6 +26,8 @@ jobs: steps: - name: Checkout Code uses: actions/checkout@v3 + with: + submodules: 'recursive' - name: Setup Python uses: actions/setup-python@v3 with: @@ -46,7 +48,7 @@ jobs: pip install . - name: Test with flake8 run: | - flake8 . --ignore=E203,W503,W504,E,F403,F405 --exclude build + flake8 . --ignore=E203,W503,W504,E,F403,F405 --exclude build,sample-files if: matrix.python-version != '2.7' - name: Test with pytest run: | diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..10cd5293e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "sample-files"] + path = sample-files + url = https://github.com/py-pdf/sample-files diff --git a/Tests/test_page.py b/Tests/test_page.py index c5ea98662..69efea831 100644 --- a/Tests/test_page.py +++ b/Tests/test_page.py @@ -1,4 +1,5 @@ import os +import json import pytest @@ -7,6 +8,30 @@ TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) PROJECT_ROOT = os.path.dirname(TESTS_ROOT) RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources") +EXTERNAL_ROOT = os.path.join(PROJECT_ROOT, "sample-files") + + +def get_all_sample_files(): + with open(os.path.join(EXTERNAL_ROOT, "files.json")) as fp: + data = fp.read() + meta = json.loads(data) + return meta + + +all_files_meta = get_all_sample_files() + + +@pytest.mark.external +@pytest.mark.parametrize( + "meta", + [m for m in all_files_meta["data"] if not m["encrypted"]], + ids=[m["path"] for m in all_files_meta["data"] if not m["encrypted"]], +) +def test_read(meta): + pdf_path = os.path.join(EXTERNAL_ROOT, meta["path"]) + reader = PdfFileReader(pdf_path) + reader.pages[0] + assert len(reader.pages) == meta["pages"] @pytest.mark.parametrize( diff --git a/docs/dev/intro.md b/docs/dev/intro.md index 627b0a57c..c19a06067 100644 --- a/docs/dev/intro.md +++ b/docs/dev/intro.md @@ -15,6 +15,23 @@ pip install -r requirements/dev.txt pytest . ``` +We have the following pytest markers defined: + +* `no_py27`: Flag for tests that fail under Python 2.7 only +* `external`: Tests which use files from [the `sample-files` git submodule](https://github.com/py-pdf/sample-files) + +You can locally choose not to run those via `pytest -m "not external"`. + +## The sample-files git submodule +The reason for having the submodule `sample-files` is that we want to keep +the size of the PyPDF2 repository small while we also want to have an extensive +test suite. Those two goals contradict each other. + +The `Resources` folder should contain a select set of core examples that cover +most cases we typically want to test for. The `sample-files` might cover a lot +more edge cases, the behavior we get when file sizes get bigger, different +PDF producers. + ## Tools: git and pre-commit Git is a command line application for version control. If you don't know it, diff --git a/pytest.ini b/pytest.ini index c7afa9968..2b994a831 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,4 @@ [pytest] markers = - no_py27: Flag for tests that fail under Python 2.7 only \ No newline at end of file + no_py27: Flag for tests that fail under Python 2.7 only + external: Tests which use files from https://github.com/py-pdf/sample-files diff --git a/sample-files b/sample-files new file mode 160000 index 000000000..6e3a1bb2c --- /dev/null +++ b/sample-files @@ -0,0 +1 @@ +Subproject commit 6e3a1bb2c72eaa3406f5e596479953950f91152c