diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index a47241ed6..6ed659f4d 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -51,8 +51,12 @@ jobs: - name: Test with pytest run: | python -m coverage run --parallel-mode -m pytest Tests -vv - if: matrix.python-version != '3.10.1' - - name: Test with pytest + if: matrix.python-version != '3.10.1' && matrix.python-version != 2.7 + - name: Test with pytest (2.7) + run: | + python -m coverage run --parallel-mode -m pytest Tests -vv -m "not no_py27" + if: matrix.python-version == 2.7 + - name: Test with pytest (OO flag) run: | python -OO -m coverage run --parallel-mode -m pytest Tests -vv if: matrix.python-version == '3.10.1' diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 165dd68ce..44f8b92d9 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -40,7 +40,7 @@ from PyPDF2.constants import ImageAttributes as IA from PyPDF2.constants import LzwFilterParameters as LZW from PyPDF2.constants import StreamAttributes as SA -from PyPDF2.errors import PdfReadError +from PyPDF2.errors import PdfReadError, PdfStreamError from PyPDF2.utils import ord_, paethPredictor if version_info < ( 3, 0 ): @@ -194,12 +194,26 @@ def encode(data): class ASCIIHexDecode(object): + """ + The ASCIIHexDecode filter decodes data that has been encoded in ASCII + hexadecimal form into a base-7 ASCII format. + """ + @staticmethod def decode(data, decodeParms=None): + """ + :param data: a str sequence of hexadecimal-encoded values to be + converted into a base-7 ASCII string + :param decodeParms: + :return: a string conversion in base-7 ASCII, where each of its values + v is such that 0 <= ord(v) <= 127. + """ retval = "" char = "" x = 0 while True: + if x >= len(data): + raise PdfStreamError("Unexpected EOD in ASCIIHexDecode") c = data[x] if c == ">": break diff --git a/Tests/test_filters.py b/Tests/test_filters.py new file mode 100644 index 000000000..ada1db4a2 --- /dev/null +++ b/Tests/test_filters.py @@ -0,0 +1,59 @@ +from PyPDF2.filters import ASCIIHexDecode +import string +from PyPDF2.errors import PdfStreamError +import pytest + + +@pytest.mark.parametrize( + "input,expected", + [ + (">", ""), + ( + "6162636465666768696a6b6c6d6e6f707172737475767778797a>", + string.ascii_lowercase, + ), + ( + "4142434445464748494a4b4c4d4e4f505152535455565758595a>", + string.ascii_uppercase, + ), + ( + "6162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a>", + string.ascii_letters, + ), + ("30313233343536373839>", string.digits), + ( + "3 031323334353637 3839>", + string.digits, + ), # Same as previous, but whitespaced + ("30313233343536373839616263646566414243444546>", string.hexdigits), + ("20090a0d0b0c>", string.whitespace), + ], + ids=[ + "empty", + "ascii_lowercase", + "ascii_uppercase", + "ascii_letters", + "digits", + "digits_whitespace", + "hexdigits", + "whitespace", + ], +) +@pytest.mark.no_py27 +def test_expected_results(input, expected): + """ + Feeds a bunch of values to ASCIIHexDecode.decode() and ensures the + correct output is returned. + TO-DO What is decode() supposed to do for such inputs as ">>", ">>>" or + any other not terminated by ">"? (For the latter case, an exception + is currently raised.) + """ + + assert ASCIIHexDecode.decode(input) == expected + + +def test_no_eod(): + """Ensuring an exception is raised when no EOD character is present""" + with pytest.raises(PdfStreamError) as exc: + ASCIIHexDecode.decode("") + assert exc.value.args[0] == "Unexpected EOD in ASCIIHexDecode" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..c7afa9968 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + no_py27: Flag for tests that fail under Python 2.7 only \ No newline at end of file