From 3337390f29747766fa1e7114855b961dcf3a7f38 Mon Sep 17 00:00:00 2001 From: "Jorj X. McKie" Date: Wed, 30 Nov 2022 13:55:41 -0400 Subject: [PATCH] Fixes #2094 & #2087 - 2nd go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue 2087: `fitz.i (extract_image)ยด: the type of JPX images with more than one `/Filter` are not correctly recognized if inspecting the raw stream. Fixing this by extracting the decoded stream: we already know the type from the PDF dict. Issue 2094: Rectangle recognition `(helper-devices.i (jm_checkrect())` was wrong in not confirming that also x-coordinates are the same in respective corners. Also simplified rectangle orientation detection. --- fitz/fitz.i | 12 +++++++++--- fitz/helper-devices.i | 38 ++++++++++++-------------------------- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/fitz/fitz.i b/fitz/fitz.i index 52fa03031..c2a21f2fd 100644 --- a/fitz/fitz.i +++ b/fitz/fitz.i @@ -2775,14 +2775,16 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not if (pdf_is_jpx_image(gctx, obj)) { img_type = FZ_IMAGE_JPX; + res = pdf_load_stream(gctx, obj); ext = "jpx"; } if (JM_is_jbig2_image(gctx, obj)) { img_type = FZ_IMAGE_JBIG2; + res = pdf_load_stream(gctx, obj); ext = "jb2"; } - res = pdf_load_raw_stream(gctx, obj); if (img_type == FZ_IMAGE_UNKNOWN) { + res = pdf_load_raw_stream(gctx, obj); unsigned char *c = NULL; fz_buffer_storage(gctx, res, &c); img_type = fz_recognize_image_format(gctx, c); @@ -2795,9 +2797,10 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not res = fz_new_buffer_from_image_as_png(gctx, img, fz_default_color_params); ext = "png"; - } else /*if (smask == 0)*/ { + } else { img = fz_new_image_from_buffer(gctx, res); } + fz_image_resolution(img, &xres, &yres); width = img->w; height = img->h; @@ -2835,7 +2838,8 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not fz_catch(gctx) { Py_CLEAR(rc); - Py_RETURN_NONE; + fz_warn(gctx, fz_caught_message(gctx)); + Py_RETURN_FALSE; } if (!rc) Py_RETURN_NONE; @@ -12332,6 +12336,7 @@ struct Archive } return (struct Archive *) arch; } + Archive(PyObject *a0=NULL, const char *path=NULL) { fz_archive *arch=NULL; @@ -13566,6 +13571,7 @@ struct Story return ret; } + void draw( struct DeviceWrapper* device, PyObject* matrix=NULL) { fz_matrix ctm2 = JM_matrix_from_py( matrix); diff --git a/fitz/helper-devices.i b/fitz/helper-devices.i index e4253bfe9..6668a6989 100644 --- a/fitz/helper-devices.i +++ b/fitz/helper-devices.i @@ -101,6 +101,7 @@ jm_checkrect() dev_linecount = 0; // reset line count long orientation = 0; fz_point ll, lr, ur, ul; + fz_rect r; PyObject *rect; PyObject *line0, *line2; PyObject *items = PyDict_GetItem(dev_pathdict, dictkey_items); @@ -109,15 +110,13 @@ jm_checkrect() line0 = PyList_GET_ITEM(items, len - 3); ll = JM_point_from_py(PyTuple_GET_ITEM(line0, 1)); lr = JM_point_from_py(PyTuple_GET_ITEM(line0, 2)); - + // no need to extract "line1"! line2 = PyList_GET_ITEM(items, len - 1); ur = JM_point_from_py(PyTuple_GET_ITEM(line2, 1)); ul = JM_point_from_py(PyTuple_GET_ITEM(line2, 2)); /* --------------------------------------------------------------------- - Three connected lines: at least a quad! Check whether even a rect. - For this, the lines must be parallel to the axes. Assumption: For decomposing rects, MuPDF always starts with a horizontal line, followed by a vertical line, followed by a horizontal line. @@ -125,34 +124,21 @@ jm_checkrect() as '+1' for anti-clockwise, '-1' for clockwise orientation. --------------------------------------------------------------------- */ - if (ll.y != lr.y) { // not horizontal - goto drop_out; - } - if (lr.x != ur.x) { // not vertical - goto drop_out; - } - if (ur.y != ul.y) { // not horizontal - goto drop_out; + if (ll.y != lr.y || + ll.x != ul.x || + ur.y != ul.y || + ur.x != lr.x) { + goto drop_out; // not a rectangle } - // we have a rect, determine orientation - if (ll.x < lr.x) { // move left to right - if (lr.y > ur.y) { // move upwards - orientation = 1; - } else { - orientation = -1; - } - } else { // move right to left - if (lr.y < ur.y) { // move downwards + + // we have a rect, replace last 3 "l" items by one "re" item. + if (ul.y < lr.y) { + r = fz_make_rect(ul.x, ul.y, lr.x, lr.y); orientation = 1; } else { + r = fz_make_rect(ll.x, ll.y, ur.x, ur.y); orientation = -1; } - } - // Replace last 3 "l" items by one "re" item. - fz_rect r = fz_make_rect(ul.x, ul.y, ul.x, ul.y); - r = fz_include_point_in_rect(r, ur); - r = fz_include_point_in_rect(r, ll); - r = fz_include_point_in_rect(r, lr); rect = PyTuple_New(3); PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("re")); PyTuple_SET_ITEM(rect, 1, JM_py_from_rect(r));