Address #3087, fix Page.insert_image() with mask specified.

src/__init__.py: Page._insert_image(): avoid problems caused by mupdf.fz_new_image_from_compressed_buffer() being unusable from Python, due to decode and colorkey args being incorrectly treated as out-params. Also avoid double free of fz_compressed_buffer due to not being ref-counted, but used by two images. The fix follows classic, and probably results in a small memory leak. src/extra.i: Added fz_new_image_from_compressed_buffer(), wrapper for mupdf::fz_new_image_from_compressed_buffer(), without problematic decode and colorkey args. tests/test_insertimage.py: Added test_3087(). tests/resources/test_3087.pdf: New, for use by test_3087().
pymupdf · Jan 25, 2024 · 892fe31 · 892fe31
1 parent 803dd0b
commit 892fe31
Show file tree

Hide file tree

Showing 4 changed files with 121 additions and 12 deletions.
diff --git a/src/__init__.py b/src/__init__.py
@@ -7804,24 +7804,68 @@ def _insert_image(self,
             #log( 'do_have_imask')
             # mupdf.FzCompressedBuffer is not copyable, so
             # mupdf.fz_compressed_image_buffer() does not work - it cannot
-            # return by value. So we need to construct locally from a raw
-            # fz_compressed_buffer.
-            #cbuf1 = mupdf.fz_compressed_image_buffer(image)
-            cbuf1 = mupdf.FzCompressedBuffer( mupdf.ll_fz_compressed_image_buffer( image.m_internal))
-            if not cbuf1.m_internal:
+            # return by value. And sharing a fz_compressed_buffer betwen two
+            # `fz_image`'s doesn't work, so we use a raw fz_compressed_buffer
+            # here, not a mupdf.FzCompressedBuffer.
+            #
+            cbuf1 = mupdf.ll_fz_compressed_image_buffer( image.m_internal)
+            if not cbuf1:
                 raise ValueError( "uncompressed image cannot have mask")
             bpc = image.bpc()
             colorspace = image.colorspace()
             xres, yres = mupdf.fz_image_resolution(image)
             mask = mupdf.fz_new_image_from_buffer(maskbuf)
-            zimg = mupdf.fz_new_image_from_compressed_buffer(
-                    w, h,
-                    bpc, colorspace, xres, yres, 1, 0, None,
-                    None, cbuf1, mask
-                    )
-            image = zimg
-            #goto have_image()
+            if mupdf_version_tuple >= (1, 24):
+                zimg = mupdf.ll_fz_new_image_from_compressed_buffer2(
+                        w,
+                        h,
+                        bpc,
+                        colorspace.m_internal,
+                        xres,
+                        yres,
+                        1,  # interpolate
+                        0,  # imagemask,
+                        None,   # decode
+                        None,   # colorkey
+                        cbuf1,
+                        mask.m_internal,
+                        )
+            else:
+                # mupdf.ll_fz_new_image_from_compressed_buffer() is not usable.
+                zimg = extra.fz_new_image_from_compressed_buffer(
+                        w,
+                        h,
+                        bpc,
+                        colorspace.m_internal,
+                        xres,
+                        yres,
+                        1,  # interpolate
+                        0,  # imagemask,
+                        cbuf1,
+                        mask.m_internal,
+                        )
+
+            zimg = mupdf.FzImage(zimg)
 
+            # `image` and `zimage` both have pointers to the same
+            # `fz_compressed_buffer`, which is not reference counted, and they
+            # both think that they own it.
+            #
+            # So we do what the classic implementataion does, and simply ensure
+            # that `fz_drop_image(image)` is never called. This will leak
+            # some of `image`'s allocations (for example the main `fz_image`
+            # allocation), but it's not trivial to avoid this.
+            #
+            # Perhaps we could manually set `fz_image`'s
+            # `fz_compressed_buffer*` to null? Trouble is we'd have to
+            # cast the `fz_image*` to a `fz_compressed_image*` to see the
+            # `fz_compressed_buffer*`, which is probably not possible from
+            # Python?
+            #
+            image.m_internal = None
+
+            image = zimg
+
         if do_have_image:
             #log( 'do_have_image')
             ref = mupdf.pdf_add_image(pdf, image)

diff --git a/src/extra.i b/src/extra.i
@@ -4294,6 +4294,39 @@ no_more_matches:;
     return quads;
 }
 
+/* MuPDF-1.23.x has an incorrect and unusable
+fz_new_image_from_compressed_buffer() wrapper that thinks the `decode` and
+`colorkey` args are out-params. So we provide an alternative wrapper where
+we always set these to args to null, which is sufficient for PyMuPDF caller
+`Document._insert_image()`. */
+fz_image* fz_new_image_from_compressed_buffer(
+        int w,
+        int h,
+        int bpc,
+        fz_colorspace *colorspace,
+        int xres,
+        int yres,
+        int interpolate,
+        int imagemask,
+        fz_compressed_buffer *buffer,
+        fz_image *mask
+        )
+{
+    return mupdf::ll_fz_new_image_from_compressed_buffer(
+            w,
+            h,
+            bpc,
+            colorspace,
+            xres,
+            yres,
+            interpolate,
+            imagemask,
+            nullptr,
+            nullptr,
+            buffer,
+            mask
+            );
+}
 
 %}
 
@@ -4468,3 +4501,16 @@ int pixmap_n(mupdf::FzPixmap& pixmap);
 PyObject* JM_search_stext_page(fz_stext_page *page, const char *needle);
 
 PyObject *set_pixel(fz_pixmap* pm, int x, int y, PyObject *color);
+
+fz_image* fz_new_image_from_compressed_buffer(
+        int w,
+        int h,
+        int bpc,
+        fz_colorspace *colorspace,
+        int xres,
+        int yres,
+        int interpolate,
+        int imagemask,
+        fz_compressed_buffer *buffer,
+        fz_image *mask
+        );
diff --git a/tests/resources/test_3087.pdf b/tests/resources/test_3087.pdf
diff --git a/tests/test_insertimage.py b/tests/test_insertimage.py
@@ -45,3 +45,22 @@ def test_compress():
             deflate_fonts=True,
             pretty=True,
             )
+
+def test_3087():
+    path = os.path.abspath(f'{__file__}/../../tests/resources/test_3087.pdf')
+
+    doc = fitz.open(path)
+    page = doc[0]
+    print(page.get_images())
+    base = doc.extract_image(5)["image"]
+    mask = doc.extract_image(5)["image"]
+    page = doc.new_page()
+    page.insert_image(page.rect, stream=base, mask=mask)
+
+    doc = fitz.open(path)
+    page = doc[0]
+    print(page.get_images())
+    base = doc.extract_image(5)["image"]
+    mask = doc.extract_image(6)["image"]
+    page = doc.new_page()
+    page.insert_image(page.rect, stream=base, mask=mask)