Skip to content

Commit

Permalink
src/ tests/: removed code for mupdf prior to 1.24.0.
Browse files Browse the repository at this point in the history
  • Loading branch information
julian-smith-artifex-com authored and JorjMcKie committed Dec 10, 2024
1 parent f6f74ab commit df94728
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 194 deletions.
113 changes: 5 additions & 108 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4943,8 +4943,6 @@ def language(self):
lang = mupdf.pdf_document_language(pdf)
if lang == mupdf.FZ_LANG_UNSET:
return
if mupdf_version_tuple < (1, 23, 7):
assert 0, 'not implemented yet'
return mupdf.fz_string_from_text_language2(lang)

@property
Expand Down Expand Up @@ -9927,9 +9925,6 @@ def __init__(self, *args):
elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or
args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))):
# create pixmap as scaled copy of another one
if mupdf_version_tuple < (1, 23, 8):
assert 0, f'Cannot handle {args=} because fz_scale_pixmap() and fz_scale_pixmap_cached() are not declared in MuPDF headers'

if len(args) == 3:
spix, w, h = args
bbox = mupdf.FzIrect(mupdf.fz_infinite_irect)
Expand Down Expand Up @@ -10647,12 +10642,7 @@ def shrink(self, factor):
@property
def size(self):
"""Pixmap size."""
if mupdf_version_tuple >= (1, 23, 8):
return mupdf.fz_pixmap_size( self.this)
# fz_pixmap_size() is not publicly visible, so we implement it
# ourselves. fixme: we don't add on sizeof(fz_pixmap).
pm = self.this
return pm.n() * pm.w() * pm.h()
return mupdf.fz_pixmap_size( self.this)

@property
def stride(self):
Expand Down Expand Up @@ -16212,12 +16202,8 @@ class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
def __init__(self):
super().__init__()
self.use_virtual_image_filter()
if mupdf_version_tuple >= (1, 23, 11):
def image_filter(self, ctx, ctm, name, image, scissor):
JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
else:
def image_filter(self, ctx, ctm, name, image):
JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
def image_filter(self, ctx, ctm, name, image, scissor):
JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)

sanitize_filter_options = SanitizeFilterOptions()

Expand Down Expand Up @@ -20877,97 +20863,8 @@ def paper_sizes():
"tabloid-extra": (864, 1296),
}

if mupdf_version_tuple >= (1, 23, 8):
def pdf_lookup_page_loc(doc, needle):
return mupdf.pdf_lookup_page_loc(doc, needle)

else:
def pdf_lookup_page_loc_imp(doc, node, skip, parentp, indexp):
assert isinstance(node, mupdf.PdfObj)
assert isinstance(skip, list) and len(skip) == 1
assert isinstance(indexp, list) and len(indexp) == 1
assert isinstance(parentp, list) and len(parentp) == 1 and isinstance(parentp[0], mupdf.PdfObj)
# Copy of MuPDF's internal pdf_lookup_page_loc_imp().
hit = None
stack = []
try:
while 1:
kids = mupdf.pdf_dict_get(node, PDF_NAME('Kids'))
len_ = mupdf.pdf_array_len( kids)

if len_ == 0:
raise Exception("malformed page tree")

# Every node we need to unmark goes into the stack
stack.append(node)

if mupdf.pdf_mark_obj( node):
raise Exception( "cycle in page tree")

for i in range(len_):
kid = mupdf.pdf_array_get( kids, i)
type_ = mupdf.pdf_dict_get( kid, PDF_NAME('Type'))
if type_.m_internal:
a = mupdf.pdf_name_eq( type_, PDF_NAME('Pages'))
else:
a = (
mupdf.pdf_dict_get( kid, PDF_NAME('Kids')).m_internal
and not mupdf.pdf_dict_get( kid, PDF_NAME('MediaBox')).m_internal
)
if a:
count = mupdf.pdf_dict_get_int( kid, PDF_NAME('Count'))
if (skip[0] < count):
node = kid
break
else:
skip[0] -= count
else:
if type_.m_internal:
a = not mupdf.pdf_name_eq( type_, PDF_NAME('Page'))
else:
a = not mupdf.pdf_dict_get( kid, PDF_NAME('MediaBox')).m_internal
if a:
mupdf.fz_warn( f"non-page object in page tree ({mupdf.pdf_to_name( type_)})")
if skip[0] == 0:
parentp[0] = node
indexp[0] = i
hit = kid
break
else:
skip[0] -= 1

# If i < len && hit != NULL the desired page was found in the
# Kids array, done. If i < len && hit == NULL the found page tree
# node contains a Kids array that contains the desired page, loop
# back to top to extract it. When i == len the Kids array has been
# exhausted without finding the desired page, give up.
if not ((hit is None or hit.m_internal is None) and i < len_):
break
finally:
for i in range(len(stack), 0, -1): # (i = stack_len; i > 0; i--)
mupdf.pdf_unmark_obj( stack[i-1])

return hit

def pdf_lookup_page_loc(doc, needle):
'''
Copy of MuPDF's internal pdf_lookup_page_loc().
'''
root = mupdf.pdf_dict_get( mupdf.pdf_trailer( doc), PDF_NAME('Root'))
node = mupdf.pdf_dict_get( root, PDF_NAME('Pages'))
skip = [needle]

if not node.m_internal:
raise Exception("cannot find page tree")
parentp = [mupdf.PdfObj()]
indexp = [0]
hit = pdf_lookup_page_loc_imp(doc, node, skip, parentp, indexp)
skip = skip[0]
parentp = parentp[0]
indexp = indexp[0]
if not hit.m_internal:
raise Exception("cannot find page %d in page tree" % needle+1)
return hit, parentp, indexp # We don't seem to return skip.
def pdf_lookup_page_loc(doc, needle):
return mupdf.pdf_lookup_page_loc(doc, needle)


def pdfobj_string(o, prefix=''):
Expand Down
24 changes: 8 additions & 16 deletions tests/test_2548.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ def test_2548():
Old MuPDF version did not detect the loop.
"""
print(f'test_2548(): {pymupdf.mupdf_version_tuple=}')
if pymupdf.mupdf_version_tuple < (1, 23, 4):
print(f'test_2548(): Not testing #2548 because infinite hang before mupdf-1.23.4.')
return
pymupdf.TOOLS.mupdf_warnings(reset=True)
doc = pymupdf.open(f'{root}/tests/resources/test_2548.pdf')
e = False
Expand All @@ -35,17 +32,12 @@ def test_2548():
# This checks that PyMuPDF 1.23.7 fixes this bug, and also that earlier
# versions with updated MuPDF also fix the bug.
rebased = hasattr(pymupdf, 'mupdf')
if pymupdf.mupdf_version_tuple >= (1, 23, 7):
if pymupdf.mupdf_version_tuple >= (1, 25):
expected = 'format error: cycle in structure tree\nstructure tree broken, assume tree is missing'
elif pymupdf.mupdf_version_tuple >= (1, 24):
expected = 'Loop found in structure tree. Ignoring structure.'
else:
expected = 'structure tree broken, assume tree is missing: cycle in structure tree'
if rebased:
assert wt == expected, f'expected:\n {expected!r}\nwt:\n {wt!r}\n'
assert not e
if pymupdf.mupdf_version_tuple >= (1, 25):
expected = 'format error: cycle in structure tree\nstructure tree broken, assume tree is missing'
elif pymupdf.mupdf_version_tuple >= (1, 24):
expected = 'Loop found in structure tree. Ignoring structure.'
else:
assert e
if rebased:
assert not wt
expected = 'structure tree broken, assume tree is missing: cycle in structure tree'
if rebased:
assert wt == expected, f'expected:\n {expected!r}\nwt:\n {wt!r}\n'
assert not e
3 changes: 0 additions & 3 deletions tests/test_font.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ def test_fontname():
assert detected

def test_2608():
if pymupdf.mupdf_version_tuple <= (1, 23, 4):
print( f'Not running test_2608 because mupdf too old: {pymupdf.mupdf_version_tuple=}')
return
flags = (pymupdf.TEXT_DEHYPHENATE | pymupdf.TEXT_MEDIABOX_CLIP)
with pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/2201.00069.pdf')) as doc:
page = doc[0]
Expand Down
25 changes: 5 additions & 20 deletions tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,24 +646,16 @@ def show(l):
# With mupdf later than 1.23.4, this special page contains no invalid
# Unicodes.
#
if pymupdf.mupdf_version_tuple > (1, 23, 4):
print(f'Checking no occurrence of 0xFFFD, {pymupdf.mupdf_version_tuple=}.')
assert chr(0xFFFD) not in set1
else:
print(f'Checking occurrence of 0xFFFD, {pymupdf.mupdf_version_tuple=}.')
assert chr(0xFFFD) in set1
print(f'Checking no occurrence of 0xFFFD, {pymupdf.mupdf_version_tuple=}.')
assert chr(0xFFFD) not in set1

def test_2553_2():
doc = pymupdf.open(f"{scriptdir}/resources/test_2553-2.pdf")
page = doc[0]

# extract plain text, ensure that there are no 0xFFFD characters
text = page.get_text()
if pymupdf.mupdf_version_tuple >= (1, 23, 7):
assert chr(0xfffd) not in text
else:
# Bug not fixed in MuPDF.
assert chr(0xfffd) in text
assert chr(0xfffd) not in text

def test_2635():
"""Rendering a page before and after cleaning it should yield the same pixmap."""
Expand Down Expand Up @@ -723,15 +715,8 @@ def assert_rects_approx_eq(a, b):
assert_rects_approx_eq(page.cropbox, pymupdf.Rect(30.0, 30.0, 565.3200073242188, 811.9199829101562))
assert_rects_approx_eq(page.mediabox, pymupdf.Rect(0.0, 0.0, 595.3200073242188, 841.9199829101562))
print(f'test_2710(): {pymupdf.mupdf_version_tuple=}')
if pymupdf.mupdf_version_tuple < (1, 23, 5):
print(f'test_2710(): Not Checking page.rect and rect.')
elif pymupdf.mupdf_version_tuple < (1, 24.0):
print(f'test_2710(): Checking page.rect and rect.')
assert_rects_approx_eq(page.rect, pymupdf.Rect(0.0, 0.0, 535.3200073242188, 781.9199829101562))
assert_rects_approx_eq(rect, pymupdf.Rect(0.7872352600097656, 64.7560043334961, 124.85531616210938, 78.1622543334961))
else:
# 2023-11-05: Currently broken in mupdf master.
print(f'test_2710(): Not Checking page.rect and rect.')
# 2023-11-05: Currently broken in mupdf master.
print(f'test_2710(): Not Checking page.rect and rect.')
rebased = hasattr(pymupdf, 'mupdf')
if rebased:
wt = pymupdf.TOOLS.mupdf_warnings()
Expand Down
4 changes: 0 additions & 4 deletions tests/test_pixmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,6 @@ def test_3058():
assert 1800000 < s < 2600000, f'Unexpected size of {path}: {s}'

def test_3072():
if pymupdf.mupdf_version_tuple < (1, 23, 10):
print(f'test_3072(): Not running because known to hang on MuPDF < 1.23.10.')
return

path = os.path.abspath(f'{__file__}/../../tests/resources/test_3072.pdf')
out = os.path.abspath(f'{__file__}/../../tests')

Expand Down
78 changes: 35 additions & 43 deletions tests/test_textextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,33 +102,32 @@ def test_extract4():
pymupdf.mupdf.fz_close_document_writer(writer)
print(f'Have written to: {out}')

if pymupdf.mupdf_version_tuple >= (1, 23, 4):
def get_text(page, space_guess):
buffer_ = pymupdf.mupdf.FzBuffer( 10)
out = pymupdf.mupdf.FzOutput( buffer_)
writer = pymupdf.mupdf.FzDocumentWriter(
out,
'text,space-guess={space_guess}',
pymupdf.mupdf.FzDocumentWriter.OutputType_DOCX,
)
device = pymupdf.mupdf.fz_begin_page(writer, pymupdf.mupdf.fz_bound_page(page))
pymupdf.mupdf.fz_run_page(page, device, pymupdf.mupdf.FzMatrix(), pymupdf.mupdf.FzCookie())
pymupdf.mupdf.fz_end_page(writer)
pymupdf.mupdf.fz_close_document_writer(writer)
text = buffer_.fz_buffer_extract()
text = text.decode('utf8')
n = text.count(' ')
print(f'{space_guess=}: {n=}')
return text, n
page = document[4]
text0, n0 = get_text(page, 0)
text1, n1 = get_text(page, 0.5)
text2, n2 = get_text(page, 0.001)
text2, n2 = get_text(page, 0.1)
text2, n2 = get_text(page, 0.3)
text2, n2 = get_text(page, 0.9)
text2, n2 = get_text(page, 5.9)
assert text1 == text0
def get_text(page, space_guess):
buffer_ = pymupdf.mupdf.FzBuffer( 10)
out = pymupdf.mupdf.FzOutput( buffer_)
writer = pymupdf.mupdf.FzDocumentWriter(
out,
'text,space-guess={space_guess}',
pymupdf.mupdf.FzDocumentWriter.OutputType_DOCX,
)
device = pymupdf.mupdf.fz_begin_page(writer, pymupdf.mupdf.fz_bound_page(page))
pymupdf.mupdf.fz_run_page(page, device, pymupdf.mupdf.FzMatrix(), pymupdf.mupdf.FzCookie())
pymupdf.mupdf.fz_end_page(writer)
pymupdf.mupdf.fz_close_document_writer(writer)
text = buffer_.fz_buffer_extract()
text = text.decode('utf8')
n = text.count(' ')
print(f'{space_guess=}: {n=}')
return text, n
page = document[4]
text0, n0 = get_text(page, 0)
text1, n1 = get_text(page, 0.5)
text2, n2 = get_text(page, 0.001)
text2, n2 = get_text(page, 0.1)
text2, n2 = get_text(page, 0.3)
text2, n2 = get_text(page, 0.9)
text2, n2 = get_text(page, 5.9)
assert text1 == text0

def test_2954():
'''
Expand Down Expand Up @@ -194,22 +193,15 @@ def get(flags=None):
text_none, n_fffd_none = get()
text_0, n_fffd_0 = get(flags0)

if pymupdf.mupdf_version_tuple >= (1, 23, 9):
text_1, n_fffd_1 = get(flags0 | pymupdf.TEXT_CID_FOR_UNKNOWN_UNICODE)

assert n_fffd_none == n_fffd_good
assert n_fffd_0 == n_fffd_bad
assert n_fffd_1 == n_fffd_good

assert check_good(text_none)
assert not check_good(text_0)
assert check_good(text_1)
else:
assert n_fffd_none == n_fffd_bad
assert n_fffd_0 == n_fffd_bad

assert not check_good(text_none)
assert not check_good(text_0)
text_1, n_fffd_1 = get(flags0 | pymupdf.TEXT_CID_FOR_UNKNOWN_UNICODE)

assert n_fffd_none == n_fffd_good
assert n_fffd_0 == n_fffd_bad
assert n_fffd_1 == n_fffd_good

assert check_good(text_none)
assert not check_good(text_0)
assert check_good(text_1)


def test_3027():
Expand Down

0 comments on commit df94728

Please sign in to comment.