From c9a17d771150577c726f41f46de722218d64f5bd Mon Sep 17 00:00:00 2001 From: Jorj McKie Date: Tue, 1 Jun 2021 08:46:15 -0400 Subject: [PATCH] Upgrade to version 1.18.14. --- PKG-INFO | 6 +- README.md | 8 +- changes.rst | 35 +- docs/_static/jquery.js | 10 +- docs/_static/underscore.js | 2 +- docs/annot.rst | 2 +- docs/changes.rst | 35 +- docs/conf.py | 2 +- docs/deprecated.rst | 919 ++++++++------------------------- docs/document.rst | 102 ++-- docs/faq.rst | 16 +- docs/font.rst | 62 ++- docs/functions.rst | 139 +++-- docs/installation.rst | 8 +- docs/irect.rst | 29 +- docs/matrix.rst | 22 +- docs/new-annots.py | 2 +- docs/pixmap.rst | 21 +- docs/quad.rst | 18 +- docs/rect.rst | 28 +- docs/textpage.rst | 2 +- docs/tutorial.rst | 16 +- docs/version.rst | 2 +- docs/wheelnames.txt | 12 +- docs/znames.rst | 37 +- fitz/__init__.py | 502 +++++++++++------- fitz/__main__.py | 16 +- fitz/fitz.i | 329 +++++++++--- fitz/helper-fields.i | 4 +- fitz/helper-geo-py.i | 116 ++--- fitz/helper-other.i | 4 +- fitz/helper-python.i | 186 +++---- fitz/helper-select.i | 26 +- fitz/utils.py | 204 ++++---- fitz/version.i | 6 +- setup.py | 19 +- tests/README.md | 55 +- tests/resources/symbols.txt | 16 +- tests/test_annots.py | 166 ++++++ tests/test_drawings.py | 8 +- tests/test_embeddedfiles.py | 24 + tests/test_font.py | 26 + tests/test_general.py | 75 +++ tests/test_geometry.py | 280 +++++++++- tests/test_linequad.py | 9 +- tests/test_nonpdf.py | 2 +- tests/test_optional_content.py | 62 +++ tests/test_pagedelete.py | 9 +- tests/test_pixmap.py | 40 +- tests/test_textbox.py | 48 +- tests/test_textextract.py | 28 + tests/test_textsearch.py | 1 + tests/test_toc.py | 13 +- tests/test_widgets.py | 139 +++++ 54 files changed, 2333 insertions(+), 1615 deletions(-) create mode 100644 tests/test_annots.py create mode 100644 tests/test_embeddedfiles.py create mode 100644 tests/test_font.py create mode 100644 tests/test_general.py create mode 100644 tests/test_optional_content.py create mode 100644 tests/test_textextract.py create mode 100644 tests/test_widgets.py diff --git a/PKG-INFO b/PKG-INFO index 56b8d01de..94e57531a 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: PyMuPDF -Version: 1.18.13 +Version: 1.18.14 Author: Jorj McKie Author-email: jorj.x.mckie@outlook.de License: GNU AFFERO GPL 3.0 @@ -10,7 +10,7 @@ Home-page: https://github.com/pymupdf/PyMuPDF Download-url: https://github.com/pymupdf/PyMuPDF Summary: PyMuPDF is a Python binding for the document renderer and toolkit MuPDF Description: - Release date: May 5, 2021 + Release date: June 1, 2021 Authors ======= @@ -21,7 +21,7 @@ Description: Introduction ============ - PyMuPDF (current version 1.18.13) is a Python binding with support for `MuPDF `_ (current version 1.18.*), a lightweight PDF, XPS, and E-book viewer, renderer and toolkit, which is maintained and developed by Artifex Software, Inc. + PyMuPDF (current version 1.18.14) is a Python binding with support for `MuPDF `_ (current version 1.18.*), a lightweight PDF, XPS, and E-book viewer, renderer and toolkit, which is maintained and developed by Artifex Software, Inc. MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB and FB2 (e-books) formats, and it is known for its top performance and high rendering quality. diff --git a/README.md b/README.md index a0226d70a..d6f4693d4 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# PyMuPDF 1.18.13 +# PyMuPDF 1.18.14 ![logo](https://github.com/pymupdf/PyMuPDF/blob/master/demo/pymupdf.jpg) -Release date: May 5, 2021 +Release date: June 1, 2021 **Travis-CI:** [![Build Status](https://travis-ci.org/JorjMcKie/py-mupdf.svg?branch=master)](https://travis-ci.org/JorjMcKie/py-mupdf) @@ -15,7 +15,7 @@ On **[PyPI](https://pypi.org/project/PyMuPDF)** since August 2016: [![Downloads] # Introduction -PyMuPDF (current version 1.18.13) is a Python binding with support for [MuPDF](https://mupdf.com/) (current version 1.18.*), a lightweight PDF, XPS, and E-book viewer, renderer, and toolkit, which is maintained and developed by Artifex Software, Inc. +PyMuPDF (current version 1.18.14) is a Python binding with support for [MuPDF](https://mupdf.com/) (current version 1.18.*), a lightweight PDF, XPS, and E-book viewer, renderer, and toolkit, which is maintained and developed by Artifex Software, Inc. MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB and FB2 (e-books) formats, and it is known for its top performance and high rendering quality. @@ -65,7 +65,7 @@ Our **documentation**, written using Sphinx, is available in various formats fro # Installation -For Windows, Linux and Mac OSX platforms, there are wheels in the [download](https://pypi.org/project/PyMuPDF/#files) section of PyPI. This includes Python 64bit versions **3.6 through 3.9**. For Windows only, 32bit versions are available too. Since version 1.18.13 there also exist wheels for the Linux ARM architecture - look for platform tag ``manylinux2014_aarch64``. +For Windows, Linux and Mac OSX platforms, there are wheels in the [download](https://pypi.org/project/PyMuPDF/#files) section of PyPI. This includes Python 64bit versions **3.6 through 3.9**. For Windows only, 32bit versions are available too. Since version 1.18.14 there also exist wheels for the Linux ARM architecture - look for platform tag ``manylinux2014_aarch64``. If your platform is not supported with one of our wheels, you need to generate PyMuPDF yourself as follows. This requires the development version of Python. diff --git a/changes.rst b/changes.rst index eb1228dd2..65ca31952 100644 --- a/changes.rst +++ b/changes.rst @@ -1,6 +1,25 @@ Change Logs =============== +Changes in Version 1.18.14 +--------------------------- +* **Finished** implementing new, "snake_cased" names for methods and properties, that were "camelCased" and awkward in many aspects. At the end of this documentation, there is section :ref:`Deprecated` with more background and a mapping of old to new names. + +* **Fixed** issue `#1053 `_. :meth:`Page.insert_image`: when given, include image mask in the hash computation. + +* **Fixed** issue `#1043 `_. Added ``Pixmap.getPNGdata`` to the aliases of :meth:`Pixmap.tobytes`. + +* **Fixed** an internal error when computing the envelopping rectangle of drawn paths as returned by :meth:`Page.get_drawings`. + +* **Fixed** an internal error occasionally causing loops when outputting text via :meth:`TextWriter.fill_textbox`. + +* **Added** :meth:`Font.char_lengths`, which returns a tuple of character widths of a string. + +* **Added** more ways to specify pages in :meth:`Document.delete_pages`. Now a sequence (list, tuple or range) can be specified, and the Python ``del`` statement can be used. In the latter case, Python ``slices`` are also accepted. + +* **Changed** :meth:`Document.del_toc_item`, which disables a single item of the TOC: previously, the title text was removed. Instead, now the complete item will be shown grayed-out by supporting viewers. + + Changes in Version 1.18.13 --------------------------- * **Fixed** issue `#1014 `_. @@ -426,13 +445,13 @@ Minor changes compared to version 1.16.2. The code of the "dict" and "rawdict" v Changes in Version 1.16.2 --------------------------- * **Changed** text extraction methods of :ref:`Page` to allow detail control of the amount of extracted data. -* **Added** :meth:`planishLine` which maps a given line (defined as a pair of points) to the x-axis. +* **Added** :meth:`planish_line` which maps a given line (defined as a pair of points) to the x-axis. * **Fixed** an issue (w/o Github number) which brought down the interpreter when encountering certain non-UTF-8 encodable characters while using :meth:`Page.getText` with te "dict" option. * **Fixed** issue #362 ("Memory Leak with getText('rawDICT')"). Changes in Version 1.16.1 --------------------------- -* **Added** property :attr:`Quad.isConvex` which checks whether a line is contained in the quad if it connects two points of it. +* **Added** property :attr:`Quad.is_convex` which checks whether a line is contained in the quad if it connects two points of it. * **Changed** :meth:`Document.insert_pdf` to now allow dropping or including links and annotations independently during the copy. Fixes issue #352 ("Corrupt PDF data and ..."), which seemed to intermittently occur when using the method for some problematic PDF files. * **Fixed** a bug which, in matrix division using the syntax *"m1/m2"*, caused matrix *"m1"* to be **replaced** by the result instead of delivering a new matrix. * **Fixed** issue #354 ("SyntaxWarning with Python 3.8"). We now always use *"=="* for literals (instead of the *"is"* Python keyword). @@ -540,7 +559,7 @@ Changes in Version 1.14.9 * **Added** new low-level method :meth:`Document._getTrailerString`, which returns the trailer object of a PDF. This is much like :meth:`Document._getXrefString` except that the PDF trailer has no / needs no :data:`xref` to identify it. * **Added** new parameters for text insertion methods. You can now set stroke and fill colors of glyphs (text characters) independently, as well as the thickness of the glyph border. A new parameter *render_mode* controls the use of these colors, and whether the text should be visible at all. * **Fixed** issue #258 ("Copying image streams to new PDF without size increase"): For JPX images embedded in a PDF, :meth:`Document.extractImage` will now return them in their original format. Previously, the MuPDF base library was used, which returns them in PNG format (entailing a massive size increase). -* **Fixed** issue #259 ("Morphing text to fit inside rect"). Clarified use of :meth:`getTextlength` and removed extra line breaks for long words. +* **Fixed** issue #259 ("Morphing text to fit inside rect"). Clarified use of :meth:`get_text_length` and removed extra line breaks for long words. Changes in Version 1.14.8 --------------------------- @@ -553,7 +572,7 @@ Changes in Version 1.14.7 --------------------------- * **Added** :meth:`Pixmap.set_pixel` to change one pixel value. * **Added** documentation for image conversion in the :ref:`FAQ`. -* **Added** new function :meth:`getTextlength` to determine the string length for a given font. +* **Added** new function :meth:`get_text_length` to determine the string length for a given font. * **Added** Postscript image output (changed :meth:`Pixmap.save` and :meth:`Pixmap.tobytes`). * **Changed** :meth:`Pixmap.save` and :meth:`Pixmap.tobytes` to ensure valid combinations of colorspace, alpha and output format. * **Changed** :meth:`Pixmap.save`: the desired format is now inferred from the filename. @@ -634,7 +653,7 @@ Changes in Version 1.13.17 * **Changed** method :meth:`Document.extractImage` to now return more meta information about the extracted imgage. Also, its performance has been greatly improved. Several demo scripts have been changed to make use of this method. * **Changed** method :meth:`Document._getXrefStream` to now return *None* if the object is no stream and no longer raise an exception if otherwise. * **Added** method :meth:`Document._deleteObject` which deletes a PDF object identified by its :data:`xref`. Only to be used by the experienced PDF expert. -* **Added** a method :meth:`PaperRect` which returns a :ref:`Rect` for a supplied paper format string. Example: *fitz.PaperRect("letter") = fitz.Rect(0.0, 0.0, 612.0, 792.0)*. +* **Added** a method :meth:`paper_rect` which returns a :ref:`Rect` for a supplied paper format string. Example: *fitz.paper_rect("letter") = fitz.Rect(0.0, 0.0, 612.0, 792.0)*. * **Added** a :ref:`FAQ` chapter to this document. Changes in Version 1.13.16 @@ -950,7 +969,7 @@ Please have a look at MuPDF's website to see which changes and enhancements are Changes in version 1.9.1 compared to version 1.8.0 are the following: -* New methods *getRectArea()* for both *fitz.Rect* and *fitz.IRect* +* New methods *get_area()* for both *fitz.Rect* and *fitz.IRect* * Pixmaps can now be created directly from files using the new constructor *fitz.Pixmap(filename)*. * The Pixmap constructor *fitz.Pixmap(image)* has been extended accordingly. * *fitz.Rect* can now be created with all possible combinations of points and coordinates. @@ -964,9 +983,9 @@ Changes in version 1.9.1 compared to version 1.8.0 are the following: * New pixmap method *samplesRGB()* providing a *samples* version with alpha bytes stripped off (RGB colorspaces only). * New pixmap method *samplesAlpha()* providing the alpha bytes only of the *samples* area. * New iterator *fitz.Pages(doc)* over a document's set of pages. -* New matrix methods *invert()* (calculate inverted matrix), *concat()* (calculate matrix product), *preTranslate()* (perform a shift operation). +* New matrix methods *invert()* (calculate inverted matrix), *concat()* (calculate matrix product), *pretranslate()* (perform a shift operation). * New *IRect* methods *intersect()* (intersection with another rectangle), *translate()* (perform a shift operation). -* New *Rect* methods *intersect()* (intersection with another rectangle), *transform()* (transformation with a matrix), *includePoint()* (enlarge rectangle to also contain a point), *includeRect()* (enlarge rectangle to also contain another one). +* New *Rect* methods *intersect()* (intersection with another rectangle), *transform()* (transformation with a matrix), *include_point()* (enlarge rectangle to also contain a point), *include_rect()* (enlarge rectangle to also contain another one). * Documented *Point.transform()* (transform a point with a matrix). * *Matrix*, *IRect*, *Rect* and *Point* classes now support compact, algebraic formulations for manipulating such objects. * Incremental saves for changes are possible now using the call pattern *doc.save(doc.name, incremental=True)*. diff --git a/docs/_static/jquery.js b/docs/_static/jquery.js index 28a8d0009..7d859515a 100644 --- a/docs/_static/jquery.js +++ b/docs/_static/jquery.js @@ -379,7 +379,7 @@ for (b in a); return b === w || aa.call(a, b) }, - isEmptyObject: function (a) { + is_emptyObject: function (a) { for (var b in a) return false; return true }, @@ -642,7 +642,7 @@ if (b) { if (e) { delete e[b]; - c.isEmptyObject(e) && c.removeData(a) + c.is_emptyObject(e) && c.removeData(a) } } else { if (c.support.deleteExpando) delete a[c.expando]; @@ -1006,11 +1006,11 @@ } } } - if (c.isEmptyObject(C)) { + if (c.is_emptyObject(C)) { if (b = z.handle) b.elem = null; delete z.events; delete z.handle; - c.isEmptyObject(z) && c.removeData(a) + c.is_emptyObject(z) && c.removeData(a) } } } @@ -3098,7 +3098,7 @@ }, animate: function (a, b, d, f) { var e = c.speed(b, d, f); - if (c.isEmptyObject(a)) return this.each(e.complete); + if (c.is_emptyObject(a)) return this.each(e.complete); return this[e.queue === false ? "each" : "queue"](function () { var j = c.extend({}, e), i, o = this.nodeType === 1 && c(this).is(":hidden"), diff --git a/docs/_static/underscore.js b/docs/_static/underscore.js index 3e115f54b..4aa6d0ec2 100644 --- a/docs/_static/underscore.js +++ b/docs/_static/underscore.js @@ -340,7 +340,7 @@ if (!b.isEqual(a[f], c[f])) return false; return true }; - b.isEmpty = function (a) { + b.is_empty = function (a) { return b.keys(a).length == 0 }; diff --git a/docs/annot.rst b/docs/annot.rst index ad24cbb42..47d5c766e 100644 --- a/docs/annot.rst +++ b/docs/annot.rst @@ -228,7 +228,7 @@ There is a parent-child relationship between an annotation and its page. If the :arg str name: the new name. - .. caution:: If you set the name of a 'Stamp' annotation, then this will **not change** the rectangle, nor will the text be layouted in any way. If you choose a standard text from :ref:`StampIcons` (the **exact** name piece after "STAMP_"), you should receive the original layout. An **arbitrary text** will not be changed to upper case, but be written in font "Times-Bold" as is, horizontally centered in **one line** and be shortened to fit. To get your text fully displayed, its length using fontsize 20 must not exceed 190 pixels. So please make sure that the following inequality is true: ``fitz.getTextlength(text, fontname="tibo", fontsize=20) <= 190``. + .. caution:: If you set the name of a 'Stamp' annotation, then this will **not change** the rectangle, nor will the text be layouted in any way. If you choose a standard text from :ref:`StampIcons` (the **exact** name piece after "STAMP_"), you should receive the original layout. An **arbitrary text** will not be changed to upper case, but be written in font "Times-Bold" as is, horizontally centered in **one line** and be shortened to fit. To get your text fully displayed, its length using fontsize 20 must not exceed 190 pixels. So please make sure that the following inequality is true: ``fitz.get_text_length(text, fontname="tibo", fontsize=20) <= 190``. .. method:: set_rect(rect) diff --git a/docs/changes.rst b/docs/changes.rst index eb1228dd2..65ca31952 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,6 +1,25 @@ Change Logs =============== +Changes in Version 1.18.14 +--------------------------- +* **Finished** implementing new, "snake_cased" names for methods and properties, that were "camelCased" and awkward in many aspects. At the end of this documentation, there is section :ref:`Deprecated` with more background and a mapping of old to new names. + +* **Fixed** issue `#1053 `_. :meth:`Page.insert_image`: when given, include image mask in the hash computation. + +* **Fixed** issue `#1043 `_. Added ``Pixmap.getPNGdata`` to the aliases of :meth:`Pixmap.tobytes`. + +* **Fixed** an internal error when computing the envelopping rectangle of drawn paths as returned by :meth:`Page.get_drawings`. + +* **Fixed** an internal error occasionally causing loops when outputting text via :meth:`TextWriter.fill_textbox`. + +* **Added** :meth:`Font.char_lengths`, which returns a tuple of character widths of a string. + +* **Added** more ways to specify pages in :meth:`Document.delete_pages`. Now a sequence (list, tuple or range) can be specified, and the Python ``del`` statement can be used. In the latter case, Python ``slices`` are also accepted. + +* **Changed** :meth:`Document.del_toc_item`, which disables a single item of the TOC: previously, the title text was removed. Instead, now the complete item will be shown grayed-out by supporting viewers. + + Changes in Version 1.18.13 --------------------------- * **Fixed** issue `#1014 `_. @@ -426,13 +445,13 @@ Minor changes compared to version 1.16.2. The code of the "dict" and "rawdict" v Changes in Version 1.16.2 --------------------------- * **Changed** text extraction methods of :ref:`Page` to allow detail control of the amount of extracted data. -* **Added** :meth:`planishLine` which maps a given line (defined as a pair of points) to the x-axis. +* **Added** :meth:`planish_line` which maps a given line (defined as a pair of points) to the x-axis. * **Fixed** an issue (w/o Github number) which brought down the interpreter when encountering certain non-UTF-8 encodable characters while using :meth:`Page.getText` with te "dict" option. * **Fixed** issue #362 ("Memory Leak with getText('rawDICT')"). Changes in Version 1.16.1 --------------------------- -* **Added** property :attr:`Quad.isConvex` which checks whether a line is contained in the quad if it connects two points of it. +* **Added** property :attr:`Quad.is_convex` which checks whether a line is contained in the quad if it connects two points of it. * **Changed** :meth:`Document.insert_pdf` to now allow dropping or including links and annotations independently during the copy. Fixes issue #352 ("Corrupt PDF data and ..."), which seemed to intermittently occur when using the method for some problematic PDF files. * **Fixed** a bug which, in matrix division using the syntax *"m1/m2"*, caused matrix *"m1"* to be **replaced** by the result instead of delivering a new matrix. * **Fixed** issue #354 ("SyntaxWarning with Python 3.8"). We now always use *"=="* for literals (instead of the *"is"* Python keyword). @@ -540,7 +559,7 @@ Changes in Version 1.14.9 * **Added** new low-level method :meth:`Document._getTrailerString`, which returns the trailer object of a PDF. This is much like :meth:`Document._getXrefString` except that the PDF trailer has no / needs no :data:`xref` to identify it. * **Added** new parameters for text insertion methods. You can now set stroke and fill colors of glyphs (text characters) independently, as well as the thickness of the glyph border. A new parameter *render_mode* controls the use of these colors, and whether the text should be visible at all. * **Fixed** issue #258 ("Copying image streams to new PDF without size increase"): For JPX images embedded in a PDF, :meth:`Document.extractImage` will now return them in their original format. Previously, the MuPDF base library was used, which returns them in PNG format (entailing a massive size increase). -* **Fixed** issue #259 ("Morphing text to fit inside rect"). Clarified use of :meth:`getTextlength` and removed extra line breaks for long words. +* **Fixed** issue #259 ("Morphing text to fit inside rect"). Clarified use of :meth:`get_text_length` and removed extra line breaks for long words. Changes in Version 1.14.8 --------------------------- @@ -553,7 +572,7 @@ Changes in Version 1.14.7 --------------------------- * **Added** :meth:`Pixmap.set_pixel` to change one pixel value. * **Added** documentation for image conversion in the :ref:`FAQ`. -* **Added** new function :meth:`getTextlength` to determine the string length for a given font. +* **Added** new function :meth:`get_text_length` to determine the string length for a given font. * **Added** Postscript image output (changed :meth:`Pixmap.save` and :meth:`Pixmap.tobytes`). * **Changed** :meth:`Pixmap.save` and :meth:`Pixmap.tobytes` to ensure valid combinations of colorspace, alpha and output format. * **Changed** :meth:`Pixmap.save`: the desired format is now inferred from the filename. @@ -634,7 +653,7 @@ Changes in Version 1.13.17 * **Changed** method :meth:`Document.extractImage` to now return more meta information about the extracted imgage. Also, its performance has been greatly improved. Several demo scripts have been changed to make use of this method. * **Changed** method :meth:`Document._getXrefStream` to now return *None* if the object is no stream and no longer raise an exception if otherwise. * **Added** method :meth:`Document._deleteObject` which deletes a PDF object identified by its :data:`xref`. Only to be used by the experienced PDF expert. -* **Added** a method :meth:`PaperRect` which returns a :ref:`Rect` for a supplied paper format string. Example: *fitz.PaperRect("letter") = fitz.Rect(0.0, 0.0, 612.0, 792.0)*. +* **Added** a method :meth:`paper_rect` which returns a :ref:`Rect` for a supplied paper format string. Example: *fitz.paper_rect("letter") = fitz.Rect(0.0, 0.0, 612.0, 792.0)*. * **Added** a :ref:`FAQ` chapter to this document. Changes in Version 1.13.16 @@ -950,7 +969,7 @@ Please have a look at MuPDF's website to see which changes and enhancements are Changes in version 1.9.1 compared to version 1.8.0 are the following: -* New methods *getRectArea()* for both *fitz.Rect* and *fitz.IRect* +* New methods *get_area()* for both *fitz.Rect* and *fitz.IRect* * Pixmaps can now be created directly from files using the new constructor *fitz.Pixmap(filename)*. * The Pixmap constructor *fitz.Pixmap(image)* has been extended accordingly. * *fitz.Rect* can now be created with all possible combinations of points and coordinates. @@ -964,9 +983,9 @@ Changes in version 1.9.1 compared to version 1.8.0 are the following: * New pixmap method *samplesRGB()* providing a *samples* version with alpha bytes stripped off (RGB colorspaces only). * New pixmap method *samplesAlpha()* providing the alpha bytes only of the *samples* area. * New iterator *fitz.Pages(doc)* over a document's set of pages. -* New matrix methods *invert()* (calculate inverted matrix), *concat()* (calculate matrix product), *preTranslate()* (perform a shift operation). +* New matrix methods *invert()* (calculate inverted matrix), *concat()* (calculate matrix product), *pretranslate()* (perform a shift operation). * New *IRect* methods *intersect()* (intersection with another rectangle), *translate()* (perform a shift operation). -* New *Rect* methods *intersect()* (intersection with another rectangle), *transform()* (transformation with a matrix), *includePoint()* (enlarge rectangle to also contain a point), *includeRect()* (enlarge rectangle to also contain another one). +* New *Rect* methods *intersect()* (intersection with another rectangle), *transform()* (transformation with a matrix), *include_point()* (enlarge rectangle to also contain a point), *include_rect()* (enlarge rectangle to also contain another one). * Documented *Point.transform()* (transform a point with a matrix). * *Matrix*, *IRect*, *Rect* and *Point* classes now support compact, algebraic formulations for manipulating such objects. * Incremental saves for changes are possible now using the call pattern *doc.save(doc.name, incremental=True)*. diff --git a/docs/conf.py b/docs/conf.py index f31ee8ba7..dd67cfe21 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ # built documents. # # The full version, including alpha/beta/rc tags. -release = "1.18.13" +release = "1.18.14" # The short X.Y version version = release diff --git a/docs/deprecated.rst b/docs/deprecated.rst index 675ac9343..adba701bf 100644 --- a/docs/deprecated.rst +++ b/docs/deprecated.rst @@ -1,702 +1,217 @@ -.. data:: CropBox - - :attr:`Page.cropbox` - -.. data:: CropBoxPosition - - :attr:`Page.cropbox_position` - -.. data:: MediaBox - - :attr:`Page.mediabox` - -.. data:: MediaBoxSize - - :attr:`Page.mediabox_size` - -.. data:: PDFCatalog - - :meth:`Document.pdf_catalog` - -.. data:: PDFTrailer - - :meth:`Document.pdf_trailer` - -.. data:: _isWrapped - - :attr:`Page.is_wrapped` - -.. data:: addCaretAnnot - - :meth:`Page.add_caret_annot` - -.. data:: addCircleAnnot - - :meth:`Page.add_circle_annot` - -.. data:: addFileAnnot - - :meth:`Page.add_file_annot` - -.. data:: addFreetextAnnot - - :meth:`Page.add_freetext_annot` - -.. data:: addHighlightAnnot - - :meth:`Page.add_highlight_annot` - -.. data:: addInkAnnot - - :meth:`Page.add_ink_annot` - -.. data:: addLineAnnot - - :meth:`Page.add_line_annot` - -.. data:: addPolygonAnnot - - :meth:`Page.add_polygon_annot` - -.. data:: addPolylineAnnot - - :meth:`Page.add_polyline_annot` - -.. data:: addRectAnnot - - :meth:`Page.add_rect_annot` - -.. data:: addRedactAnnot - - :meth:`Page.add_redact_annot` - -.. data:: addSquigglyAnnot - - :meth:`Page.add_squiggly_annot` - -.. data:: addStampAnnot - - :meth:`Page.add_stamp_annot` - -.. data:: addStrikeoutAnnot - - :meth:`Page.add_strikeout_annot` - -.. data:: addTextAnnot - - :meth:`Page.add_text_annot` - -.. data:: addUnderlineAnnot - - :meth:`Page.add_underline_annot` - -.. data:: addWidget - - :meth:`Page.add_widget` - -.. data:: chapterCount - - :attr:`Document.chapter_count` - -.. data:: chapterPageCount - - :meth:`Document.chapter_page_count` - -.. data:: cleanContents - - :meth:`Page.clean_contents` - -.. data:: clearWith - - :meth:`Pixmap.clear_with` - -.. data:: convertToPDF - - :meth:`Document.convert_to_pdf` - -.. data:: copyPage - - :meth:`Document.copy_page` - -.. data:: copyPixmap - - :meth:`Pixmap.copy` - -.. data:: deleteAnnot - - :meth:`Page.delete_annot` - -.. data:: deleteLink - - :meth:`Page.delete_link` - -.. data:: deletePage - - :meth:`Document.delete_page` - -.. data:: deletePageRange - - :meth:`Document.delete_pages` - -.. data:: deleteWidget - - :meth:`Page.delete_widget` - -.. data:: derotationMatrix - - :attr:`Page.derotation_matrix` - -.. data:: drawBezier - - :meth:`Page.draw_bezier` - - :meth:`Shape.draw_bezier` - -.. data:: drawCircle - - :meth:`Page.draw_circle` - - :meth:`Shape.draw_circle` - -.. data:: drawCurve - - :meth:`Page.draw_curve` - - :meth:`Shape.draw_curve` - -.. data:: drawLine - - :meth:`Page.draw_line` - - :meth:`Shape.draw_line` - -.. data:: drawOval - - :meth:`Page.draw_oval` - - :meth:`Shape.draw_oval` - -.. data:: drawPolyline - - :meth:`Page.draw_polyline` - - :meth:`Shape.draw_polyline` - -.. data:: drawQuad - - :meth:`Page.draw_quad` - - :meth:`Shape.draw_quad` - -.. data:: drawRect - - :meth:`Page.draw_rect` - - :meth:`Shape.draw_rect` - -.. data:: drawSector - - :meth:`Page.draw_sector` - - :meth:`Shape.draw_sector` - -.. data:: drawSquiggle - - :meth:`Page.draw_squiggle` - - :meth:`Shape.draw_squiggle` - -.. data:: drawZigzag - - :meth:`Page.draw_zigzag` - - :meth:`Shape.draw_zigzag` - -.. data:: embeddedFileAdd - - :meth:`Document.embfile_add` - -.. data:: embeddedFileCount - - :meth:`Document.embfile_count` - -.. data:: embeddedFileDel - - :meth:`Document.embfile_del` - -.. data:: embeddedFileGet - - :meth:`Document.embfile_get` - -.. data:: embeddedFileInfo - - :meth:`Document.embfile_info` - -.. data:: embeddedFileNames - - :meth:`Document.embfile_names` - -.. data:: embeddedFileUpd - - :meth:`Document.embfile_upd` - -.. data:: extractFont - - :meth:`Document.extract_font` - -.. data:: extractImage - - :meth:`Document.extract_image` - -.. data:: fileGet - - :meth:`Annot.get_file` - -.. data:: fileUpd - - :meth:`Annot.update_file` - -.. data:: fillTextbox - - :meth:`TextWriter.fill_textbox` - -.. data:: findBookmark - - :meth:`Document.find_bookmark` - -.. data:: firstAnnot - - :attr:`Page.first_annot` - -.. data:: firstLink - - :attr:`Page.first_link` - -.. data:: firstWidget - - :attr:`Page.first_widget` - -.. data:: fullcopyPage - - :meth:`Document.fullcopy_page` - -.. data:: gammaWith - - :meth:`Pixmap.gamma_with` - -.. data:: getCharWidths - - :meth:`Document.get_char_widths` - -.. data:: getContents - - :meth:`Page.get_contents` - -.. data:: getDisplayList - - :meth:`Page.get_displaylist` - -.. data:: getDrawings - - :meth:`Page.get_drawings` - -.. data:: getFontList - - :meth:`Page.get_fonts` - -.. data:: getImageBbox - - :meth:`Page.get_image_bbox` - -.. data:: getImageData - - :meth:`Pixmap.tobytes` - -.. data:: getImageList - - :meth:`Page.get_images` - -.. data:: getLinks - - :meth:`Page.get_links` - -.. data:: getOCGs - - :meth:`Document.get_ocgs` - -.. data:: getPNGData - - :meth:`Pixmap.tobytes` - -.. data:: getPageFontList - - :meth:`Document.get_page_fonts` - -.. data:: getPageImageList - - :meth:`Document.get_page_images` - -.. data:: getPagePixmap - - :meth:`Document.get_page_pixmap` - -.. data:: getPageText - - :meth:`Document.get_page_text` - -.. data:: getPageXObjectList - - :meth:`Document.get_page_xobjects` - -.. data:: getPixmap - - :meth:`Annot.get_pixmap` - - :meth:`DisplayList.get_pixmap` - - :meth:`Page.get_pixmap` - -.. data:: getSVGimage - - :meth:`Page.get_svg_image` - -.. data:: getSigFlags - - :meth:`Document.get_sigflags` - -.. data:: getText - - :meth:`Annot.get_text` - - :meth:`Page.get_text` - -.. data:: getTextBlocks - - :meth:`Page.get_text_blocks` - -.. data:: getTextPage - - :meth:`Annot.get_textpage` - - :meth:`DisplayList.get_textpage` - - :meth:`Page.get_textpage` - -.. data:: getTextWords - - :meth:`Page.get_text_words` - -.. data:: getTextbox - - :meth:`Annot.get_textbox` - - :meth:`Page.get_textbox` - -.. data:: getToC - - :meth:`Document.get_toc` - -.. data:: getXmlMetadata - - :meth:`Document.get_xml_metadata` - -.. data:: insertFont - - :meth:`Page.insert_font` - -.. data:: insertImage - - :meth:`Page.insert_image` - -.. data:: insertLink - - :meth:`Page.insert_link` - -.. data:: insertPDF - - :meth:`Document.insert_pdf` - -.. data:: insertPage - - :meth:`Document.insert_page` - -.. data:: insertText - - :meth:`Page.insert_text` - - :meth:`Shape.insert_text` - -.. data:: insertTextbox - - :meth:`Page.insert_textbox` - - :meth:`Shape.insert_textbox` - -.. data:: invertIRect - - :meth:`Pixmap.invert_irect` - -.. data:: isDirty - - :attr:`Document.is_dirty` - -.. data:: isFormPDF - - :attr:`Document.is_form_pdf` - -.. data:: isPDF - - :attr:`Document.is_pdf` - -.. data:: isReflowable - - :attr:`Document.is_reflowable` - -.. data:: isRepaired - - :attr:`Document.is_repaired` - -.. data:: isStream - - :meth:`Document.is_stream` - -.. data:: lastLocation - - :attr:`Document.last_location` - -.. data:: lineEnds - - :attr:`Annot.line_ends` - -.. data:: loadAnnot - - :meth:`Page.load_annot` - -.. data:: loadLinks - - :meth:`Page.load_links` - -.. data:: loadPage - - :meth:`Document.load_page` - -.. data:: makeBookmark - - :meth:`Document.make_bookmark` - -.. data:: metadataXML - - :meth:`Document.xref_xml_metadata` - -.. data:: movePage - - :meth:`Document.move_page` - -.. data:: needsPass - - :attr:`Document.needs_pass` - -.. data:: newPage - - :meth:`Document.new_page` - -.. data:: newShape - - :meth:`Page.new_shape` - -.. data:: nextLocation - - :meth:`Document.next_location` - -.. data:: pageCount - - :attr:`Document.page_count` - -.. data:: pageCropBox - - :meth:`Document.page_cropbox` - -.. data:: pageXref - - :meth:`Document.page_xref` - -.. data:: pillowData - - :meth:`Pixmap.pil_tobytes` - -.. data:: pillowWrite - - :meth:`Pixmap.pil_save` - -.. data:: previousLocation - - :meth:`Document.prev_location` - -.. data:: readContents - - :meth:`Page.read_contents` - -.. data:: resolveLink - - :meth:`Document.resolve_link` - -.. data:: rotationMatrix - - :attr:`Page.rotation_matrix` - -.. data:: searchFor - - :meth:`Page.search_for` - -.. data:: searchPageFor - - :meth:`Document.search_page_for` - -.. data:: setAlpha - - :meth:`Pixmap.set_alpha` - -.. data:: setBlendMode - - :meth:`Annot.set_blendmode` - -.. data:: setBorder - - :meth:`Annot.set_border` - -.. data:: setColors - - :meth:`Annot.set_colors` - -.. data:: setCropBox - - :meth:`Page.set_cropbox` - -.. data:: setFlags - - :meth:`Annot.set_flags` - -.. data:: setInfo - - :meth:`Annot.set_info` - -.. data:: setLanguage - - :meth:`Document.set_language` - -.. data:: setLineEnds - - :meth:`Annot.set_line_ends` - -.. data:: setMediaBox - - :meth:`Page.set_mediabox` - -.. data:: setMetadata - - :meth:`Document.set_metadata` - -.. data:: setName - - :meth:`Annot.set_name` - -.. data:: setOC - - :meth:`Annot.set_oc` - -.. data:: setOpacity - - :meth:`Annot.set_opacity` - -.. data:: setOrigin - - :meth:`Pixmap.set_origin` - -.. data:: setPixel - - :meth:`Pixmap.set_pixel` - -.. data:: setRect - - :meth:`Annot.set_rect` - - :meth:`Pixmap.set_rect` - -.. data:: setResolution - - :meth:`Pixmap.set_dpi` - -.. data:: setRotation - - :meth:`Page.set_rotation` - -.. data:: setToC - - :meth:`Document.set_toc` - -.. data:: setXmlMetadata - - :meth:`Document.set_xml_metadata` - -.. data:: showPDFpage - - :meth:`Page.show_pdf_page` - -.. data:: soundGet - - :meth:`Annot.get_sound` - -.. data:: tintWith - - :meth:`Pixmap.tint_with` - -.. data:: transformationMatrix - - :attr:`Page.transformation_matrix` - -.. data:: updateLink - - :meth:`Page.update_link` - -.. data:: updateObject - - :meth:`Document.update_object` - -.. data:: updateStream - - :meth:`Document.update_stream` - -.. data:: wrapContents - - :meth:`Page.wrap_contents` - -.. data:: writeImage - - :meth:`Pixmap.save` - -.. data:: writePNG - - :meth:`Pixmap.save` - -.. data:: writeText - - :meth:`Page.write_text` - - :meth:`TextWriter.write_text` - -.. data:: xrefLength - - :meth:`Document.xref_length` - -.. data:: xrefObject - - :meth:`Document.xref_object` - -.. data:: xrefStream - - :meth:`Document.xref_stream` - -.. data:: xrefStreamRaw - - :meth:`Document.xref_stream_raw` - +.. Deprecated Names: + +* :index:`_isWrapped` -- :attr:`Page.is_wrapped` +* :index:`addCaretAnnot` -- :meth:`Page.add_caret_annot` +* :index:`addCircleAnnot` -- :meth:`Page.add_circle_annot` +* :index:`addFileAnnot` -- :meth:`Page.add_file_annot` +* :index:`addFreetextAnnot` -- :meth:`Page.add_freetext_annot` +* :index:`addHighlightAnnot` -- :meth:`Page.add_highlight_annot` +* :index:`addInkAnnot` -- :meth:`Page.add_ink_annot` +* :index:`addLineAnnot` -- :meth:`Page.add_line_annot` +* :index:`addPolygonAnnot` -- :meth:`Page.add_polygon_annot` +* :index:`addPolylineAnnot` -- :meth:`Page.add_polyline_annot` +* :index:`addRectAnnot` -- :meth:`Page.add_rect_annot` +* :index:`addRedactAnnot` -- :meth:`Page.add_redact_annot` +* :index:`addSquigglyAnnot` -- :meth:`Page.add_squiggly_annot` +* :index:`addStampAnnot` -- :meth:`Page.add_stamp_annot` +* :index:`addStrikeoutAnnot` -- :meth:`Page.add_strikeout_annot` +* :index:`addTextAnnot` -- :meth:`Page.add_text_annot` +* :index:`addUnderlineAnnot` -- :meth:`Page.add_underline_annot` +* :index:`addWidget` -- :meth:`Page.add_widget` +* :index:`chapterCount` -- :attr:`Document.chapter_count` +* :index:`chapterPageCount` -- :meth:`Document.chapter_page_count` +* :index:`cleanContents` -- :meth:`Page.clean_contents` +* :index:`clearWith` -- :meth:`Pixmap.clear_with` +* :index:`convertToPDF` -- :meth:`Document.convert_to_pdf` +* :index:`copyPage` -- :meth:`Document.copy_page` +* :index:`copyPixmap` -- :meth:`Pixmap.copy` +* :index:`CropBox` -- :attr:`Page.cropbox` +* :index:`CropBoxPosition` -- :attr:`Page.cropbox_position` +* :index:`deleteAnnot` -- :meth:`Page.delete_annot` +* :index:`deleteLink` -- :meth:`Page.delete_link` +* :index:`deletePage` -- :meth:`Document.delete_page` +* :index:`deletePageRange` -- :meth:`Document.delete_pages` +* :index:`deleteWidget` -- :meth:`Page.delete_widget` +* :index:`derotationMatrix` -- :attr:`Page.derotation_matrix` +* :index:`drawBezier` -- :meth:`Page.draw_bezier` +* :index:`drawBezier` -- :meth:`Shape.draw_bezier` +* :index:`drawCircle` -- :meth:`Page.draw_circle` +* :index:`drawCircle` -- :meth:`Shape.draw_circle` +* :index:`drawCurve` -- :meth:`Page.draw_curve` +* :index:`drawCurve` -- :meth:`Shape.draw_curve` +* :index:`drawLine` -- :meth:`Page.draw_line` +* :index:`drawLine` -- :meth:`Shape.draw_line` +* :index:`drawOval` -- :meth:`Page.draw_oval` +* :index:`drawOval` -- :meth:`Shape.draw_oval` +* :index:`drawPolyline` -- :meth:`Page.draw_polyline` +* :index:`drawPolyline` -- :meth:`Shape.draw_polyline` +* :index:`drawQuad` -- :meth:`Page.draw_quad` +* :index:`drawQuad` -- :meth:`Shape.draw_quad` +* :index:`drawRect` -- :meth:`Page.draw_rect` +* :index:`drawRect` -- :meth:`Shape.draw_rect` +* :index:`drawSector` -- :meth:`Page.draw_sector` +* :index:`drawSector` -- :meth:`Shape.draw_sector` +* :index:`drawSquiggle` -- :meth:`Page.draw_squiggle` +* :index:`drawSquiggle` -- :meth:`Shape.draw_squiggle` +* :index:`drawZigzag` -- :meth:`Page.draw_zigzag` +* :index:`drawZigzag` -- :meth:`Shape.draw_zigzag` +* :index:`embeddedFileAdd` -- :meth:`Document.embfile_add` +* :index:`embeddedFileCount` -- :meth:`Document.embfile_count` +* :index:`embeddedFileDel` -- :meth:`Document.embfile_del` +* :index:`embeddedFileGet` -- :meth:`Document.embfile_get` +* :index:`embeddedFileInfo` -- :meth:`Document.embfile_info` +* :index:`embeddedFileNames` -- :meth:`Document.embfile_names` +* :index:`embeddedFileUpd` -- :meth:`Document.embfile_upd` +* :index:`extractFont` -- :meth:`Document.extract_font` +* :index:`extractImage` -- :meth:`Document.extract_image` +* :index:`fileGet` -- :meth:`Annot.get_file` +* :index:`fileUpd` -- :meth:`Annot.update_file` +* :index:`fillTextbox` -- :meth:`TextWriter.fill_textbox` +* :index:`findBookmark` -- :meth:`Document.find_bookmark` +* :index:`firstAnnot` -- :attr:`Page.first_annot` +* :index:`firstLink` -- :attr:`Page.first_link` +* :index:`firstWidget` -- :attr:`Page.first_widget` +* :index:`fullcopyPage` -- :meth:`Document.fullcopy_page` +* :index:`gammaWith` -- :meth:`Pixmap.gamma_with` +* :index:`getArea` -- :meth:`Rect.get_area` +* :index:`getArea` -- :meth:`IRect.get_area` +* :index:`getCharWidths` -- :meth:`Document.get_char_widths` +* :index:`getContents` -- :meth:`Page.get_contents` +* :index:`getDisplayList` -- :meth:`Page.get_displaylist` +* :index:`getDrawings` -- :meth:`Page.get_drawings` +* :index:`getFontList` -- :meth:`Page.get_fonts` +* :index:`getImageBbox` -- :meth:`Page.get_image_bbox` +* :index:`getImageData` -- :meth:`Pixmap.tobytes` +* :index:`getImageList` -- :meth:`Page.get_images` +* :index:`getLinks` -- :meth:`Page.get_links` +* :index:`getOCGs` -- :meth:`Document.get_ocgs` +* :index:`getPageFontList` -- :meth:`Document.get_page_fonts` +* :index:`getPageImageList` -- :meth:`Document.get_page_images` +* :index:`getPagePixmap` -- :meth:`Document.get_page_pixmap` +* :index:`getPageText` -- :meth:`Document.get_page_text` +* :index:`getPageXObjectList` -- :meth:`Document.get_page_xobjects` +* :index:`getPDFnow` -- :meth:`get_pdf_now` +* :index:`getPDFstr` -- :meth:`get_pdf_str` +* :index:`getPixmap` -- :meth:`Page.get_pixmap` +* :index:`getPixmap` -- :meth:`Annot.get_pixmap` +* :index:`getPixmap` -- :meth:`DisplayList.get_pixmap` +* :index:`getPNGData` -- :meth:`Pixmap.tobytes` +* :index:`getPNGdata` -- :meth:`Pixmap.tobytes` +* :index:`getRectArea` -- :meth:`Rect.get_area` +* :index:`getRectArea` -- :meth:`IRect.get_area` +* :index:`getSigFlags` -- :meth:`Document.get_sigflags` +* :index:`getSVGimage` -- :meth:`Page.get_svg_image` +* :index:`getText` -- :meth:`Page.get_text` +* :index:`getText` -- :meth:`Annot.get_text` +* :index:`getTextBlocks` -- :meth:`Page.get_text_blocks` +* :index:`getTextbox` -- :meth:`Page.get_textbox` +* :index:`getTextbox` -- :meth:`Annot.get_textbox` +* :index:`getTextLength` -- :meth:`get_text_length` +* :index:`getTextPage` -- :meth:`Page.get_textpage` +* :index:`getTextPage` -- :meth:`Annot.get_textpage` +* :index:`getTextPage` -- :meth:`DisplayList.get_textpage` +* :index:`getTextWords` -- :meth:`Page.get_text_words` +* :index:`getToC` -- :meth:`Document.get_toc` +* :index:`getXmlMetadata` -- :meth:`Document.get_xml_metadata` +* :index:`ImageProperties` -- :meth:`image_properties` +* :index:`includePoint` -- :meth:`Rect.include_point` +* :index:`includePoint` -- :meth:`IRect.include_point` +* :index:`includeRect` -- :meth:`Rect.include_rect` +* :index:`includeRect` -- :meth:`IRect.include_rect` +* :index:`insertFont` -- :meth:`Page.insert_font` +* :index:`insertImage` -- :meth:`Page.insert_image` +* :index:`insertLink` -- :meth:`Page.insert_link` +* :index:`insertPage` -- :meth:`Document.insert_page` +* :index:`insertPDF` -- :meth:`Document.insert_pdf` +* :index:`insertText` -- :meth:`Page.insert_text` +* :index:`insertText` -- :meth:`Shape.insert_text` +* :index:`insertTextbox` -- :meth:`Page.insert_textbox` +* :index:`insertTextbox` -- :meth:`Shape.insert_textbox` +* :index:`invertIRect` -- :meth:`Pixmap.invert_irect` +* :index:`isConvex` -- :attr:`Quad.is_convex` +* :index:`isDirty` -- :attr:`Document.is_dirty` +* :index:`isEmpty` -- :attr:`Rect.is_empty` +* :index:`isEmpty` -- :attr:`IRect.is_empty` +* :index:`isEmpty` -- :attr:`Quad.is_empty` +* :index:`isFormPDF` -- :attr:`Document.is_form_pdf` +* :index:`isInfinite` -- :attr:`Rect.is_infinite` +* :index:`isInfinite` -- :attr:`IRect.is_infinite` +* :index:`isPDF` -- :attr:`Document.is_pdf` +* :index:`isRectangular` -- :attr:`Quad.is_rectangular` +* :index:`isRectilinear` -- :attr:`Matrix.is_rectilinear` +* :index:`isReflowable` -- :attr:`Document.is_reflowable` +* :index:`isRepaired` -- :attr:`Document.is_repaired` +* :index:`isStream` -- :meth:`Document.is_stream` +* :index:`lastLocation` -- :attr:`Document.last_location` +* :index:`lineEnds` -- :attr:`Annot.line_ends` +* :index:`loadAnnot` -- :meth:`Page.load_annot` +* :index:`loadLinks` -- :meth:`Page.load_links` +* :index:`loadPage` -- :meth:`Document.load_page` +* :index:`makeBookmark` -- :meth:`Document.make_bookmark` +* :index:`MediaBox` -- :attr:`Page.mediabox` +* :index:`MediaBoxSize` -- :attr:`Page.mediabox_size` +* :index:`metadataXML` -- :meth:`Document.xref_xml_metadata` +* :index:`movePage` -- :meth:`Document.move_page` +* :index:`needsPass` -- :attr:`Document.needs_pass` +* :index:`newPage` -- :meth:`Document.new_page` +* :index:`newShape` -- :meth:`Page.new_shape` +* :index:`nextLocation` -- :meth:`Document.next_location` +* :index:`pageCount` -- :attr:`Document.page_count` +* :index:`pageCropBox` -- :meth:`Document.page_cropbox` +* :index:`pageXref` -- :meth:`Document.page_xref` +* :index:`PaperRect` -- :meth:`paper_rect` +* :index:`PaperSize` -- :meth:`paper_size` +* :index:`paperSizes` -- :attr:`paper_sizes` +* :index:`PDFCatalog` -- :meth:`Document.pdf_catalog` +* :index:`PDFTrailer` -- :meth:`Document.pdf_trailer` +* :index:`pillowData` -- :meth:`Pixmap.pil_tobytes` +* :index:`pillowWrite` -- :meth:`Pixmap.pil_save` +* :index:`planishLine` -- :meth:`planish_line` +* :index:`preRotate` -- :meth:`Matrix.prerotate` +* :index:`preScale` -- :meth:`Matrix.prescale` +* :index:`preShear` -- :meth:`Matrix.preshear` +* :index:`preTranslate` -- :meth:`Matrix.pretranslate` +* :index:`previousLocation` -- :meth:`Document.prev_location` +* :index:`readContents` -- :meth:`Page.read_contents` +* :index:`resolveLink` -- :meth:`Document.resolve_link` +* :index:`rotationMatrix` -- :attr:`Page.rotation_matrix` +* :index:`searchFor` -- :meth:`Page.search_for` +* :index:`searchPageFor` -- :meth:`Document.search_page_for` +* :index:`setAlpha` -- :meth:`Pixmap.set_alpha` +* :index:`setBlendMode` -- :meth:`Annot.set_blendmode` +* :index:`setBorder` -- :meth:`Annot.set_border` +* :index:`setColors` -- :meth:`Annot.set_colors` +* :index:`setCropBox` -- :meth:`Page.set_cropbox` +* :index:`setFlags` -- :meth:`Annot.set_flags` +* :index:`setInfo` -- :meth:`Annot.set_info` +* :index:`setLanguage` -- :meth:`Document.set_language` +* :index:`setLineEnds` -- :meth:`Annot.set_line_ends` +* :index:`setMediaBox` -- :meth:`Page.set_mediabox` +* :index:`setMetadata` -- :meth:`Document.set_metadata` +* :index:`setName` -- :meth:`Annot.set_name` +* :index:`setOC` -- :meth:`Annot.set_oc` +* :index:`setOpacity` -- :meth:`Annot.set_opacity` +* :index:`setOrigin` -- :meth:`Pixmap.set_origin` +* :index:`setPixel` -- :meth:`Pixmap.set_pixel` +* :index:`setRect` -- :meth:`Annot.set_rect` +* :index:`setRect` -- :meth:`Pixmap.set_rect` +* :index:`setResolution` -- :meth:`Pixmap.set_dpi` +* :index:`setRotation` -- :meth:`Page.set_rotation` +* :index:`setToC` -- :meth:`Document.set_toc` +* :index:`setXmlMetadata` -- :meth:`Document.set_xml_metadata` +* :index:`showPDFpage` -- :meth:`Page.show_pdf_page` +* :index:`soundGet` -- :meth:`Annot.get_sound` +* :index:`tintWith` -- :meth:`Pixmap.tint_with` +* :index:`transformationMatrix` -- :attr:`Page.transformation_matrix` +* :index:`updateLink` -- :meth:`Page.update_link` +* :index:`updateObject` -- :meth:`Document.update_object` +* :index:`updateStream` -- :meth:`Document.update_stream` +* :index:`wrapContents` -- :meth:`Page.wrap_contents` +* :index:`writeImage` -- :meth:`Pixmap.save` +* :index:`writePNG` -- :meth:`Pixmap.save` +* :index:`writeText` -- :meth:`Page.write_text` +* :index:`writeText` -- :meth:`TextWriter.write_text` +* :index:`xrefLength` -- :meth:`Document.xref_length` +* :index:`xrefObject` -- :meth:`Document.xref_object` +* :index:`xrefStream` -- :meth:`Document.xref_stream` +* :index:`xrefStreamRaw` -- :meth:`Document.xref_stream_raw` diff --git a/docs/document.rst b/docs/document.rst index 92a745534..390347162 100644 --- a/docs/document.rst +++ b/docs/document.rst @@ -25,7 +25,7 @@ For details on **embedded files** refer to Appendix 3. ======================================= ========================================================== **Method / Attribute** **Short Description** ======================================= ========================================================== -:meth:`Document.add_layer_config` PDF only: make new optional content configuration +:meth:`Document.add_layer` PDF only: make new optional content configuration :meth:`Document.add_ocg` PDF only: add new optional content group :meth:`Document.authenticate` gain access to an encrypted document :meth:`Document.can_save_incrementally` check if incremental save is possible @@ -46,7 +46,7 @@ For details on **embedded files** refer to Appendix 3. :meth:`Document.ez_save` PDF only: :meth:`Document.save` with different defaults :meth:`Document.find_bookmark` retrieve page location after layouting :meth:`Document.fullcopy_page` PDF only: duplicate a page -:meth:`Document.get_oc_states` PDF only: lists of OCGs in ON, OFF, RBGroups +:meth:`Document.get_layer` PDF only: lists of OCGs in ON, OFF, RBGroups :meth:`Document.get_oc` PDF only: get OCG /OCMD xref of image / form xobject :meth:`Document.get_ocgs` PDF only: info on all optional content groups :meth:`Document.get_ocmd` PDF only: retrieve definition of an :data:`OCMD` @@ -224,15 +224,15 @@ For details on **embedded files** refer to Appendix 3. {'number': 0, 'name': 'my-config', 'creator': ''} >>> # use 'number' as config identifyer in add_ocg - .. method:: add_layer_config(name, creator=None, on=None) + .. method:: add_layer(name, creator=None, on=None) *(New in v1.18.3)* - Add an optional content configuration. Layers serve as a collection of ON / OFF states for optional content groups. They allow fast visibility switches between different views on the same document. + Add an optional content configuration. Layers serve as a collection of ON / OFF states for optional content groups and allow fast visibility switches between different views on the same document. :arg str name: arbitrary name. - :arg str creator: creating software. - :arg sequ on: a sequence of OCG :data:`xref` numbers which should be set to ON (visible). All other OCGs will be set to OFF. + :arg str creator: (optional) creating software. + :arg sequ on: a sequence of OCG :data:`xref` numbers which should be set to ON when this layer gets activated. All OCGs not listed here will be set to OFF. .. method:: switch_layer(number, as_default=False) @@ -268,7 +268,7 @@ For details on **embedded files** refer to Appendix 3. *(New in v1.18.4)* - Create or update an :data:`OCMD` (optional content membership dictionary). + Create or update an :data:`OCMD`, **Optional Content Membership Dictionary.** :arg int xref: :data:`xref` of the OCMD to be updated, or 0 for a new OCMD. :arg list ocgs: a sequence of :data:`xref` numbers of existing :data:`OCG` PDF objects. @@ -279,9 +279,9 @@ For details on **embedded files** refer to Appendix 3. .. note:: - The purpose of OCMDs is to more flexibly determine visibility. An OCMD actually is a boolean expression: it evaluates the current visibility of one or more optional content groups and then computes its own ON (true) or OFF (false) state. + Like an OCG, an OCMD has a visibility state ON or OFF, and it can be used like an OCG. In contrast to an OCG, the OCMD state is determined by evaluating the state of one or more OCGs via special forms of **boolean expressions.** If the expression evaluates to true, the OCMD state is ON and OFF for false. - There are two ways to formulate the OCMD's visibility: + There are two ways to formulate OCMD visibility: 1. Use the combination of *ocgs* and *policy*: The *policy* value is interpreted as follows: @@ -294,11 +294,11 @@ For details on **embedded files** refer to Appendix 3. Solution: use an **OCG** for object 1 and an **OCMD** for object 2. Create the OCMD via ``set_ocmd(ocgs=[xref], policy="AllOff")``, with the :data:`xref` of the OCG. - 2. Use the **visibility expression** *ve*: This is a list of a logical expression keyword (string) followed by integers or other lists. The possible logical expressions are **"and"**, **"or"**, and **"not"**. The integers must be :data:`xref` numbers of OCGs. The syntax of this parameter is a bit awkward, but quite powerful: + 2. Use the **visibility expression** *ve*: This is a list of two or more items. The **first item** is a logical keyword: one of the strings **"and"**, **"or"**, or **"not"**. The **second** and all subsequent items must either be an integer or another list. An integer must be the :data:`xref` number of an OCG. A list must again have at least two items starting with one of the boolean keywords. This syntax is a bit awkward, but quite powerful: - - Each list, including the top one, must start with a logical expression. - - If the first item is a **"not"**, then the list must have exactly two items. If it is **"and"** or **"or"**, any number of other items may follow. - - Items following the logical expression may be either integers or other lists. An *integer* must be the xref of an OCG. A *list* must conform to the rules above. + - Each list must start with a logical keyword. + - If the keyword is a **"not"**, then the list must have exactly two items. If it is **"and"** or **"or"**, any number of other items may follow. + - Items following the logical keyword may be either integers or again a list. An *integer* must be the xref of an OCG. A *list* must conform to the previous rules. **Examples:** @@ -307,21 +307,21 @@ For details on **embedded files** refer to Appendix 3. For more details and examples see page 367 of :ref:`AdobeManual`. Also do have a look at example scripts `here `_. - Visibility expressions, ``/VE``, are part of the PDF version 1.6 specification. If you are using an older PDF consumer software, you hence may find it unsupported (i.e. ignored). + Visibility expressions, ``/VE``, are part of PDF specification version 1.6. So not all PDF viewers / readers may already support this feature and hence will react in some standard way for those cases. .. method:: get_ocmd(xref) *(New in v1.18.4)* - Retrieve the definition of an OCMD (optional content membership dictionary). + Retrieve the definition of an :data:`OCMD`. :arg int xref: the :data:`xref` of the OCMD. :rtype: dict :returns: a dictionary with the keys *xref*, *ocgs*, *policy* and *ve*. - .. method:: get_oc_states(config=-1) + .. method:: get_layer(config=-1) *(New in v1.18.3)* @@ -329,7 +329,7 @@ For details on **embedded files** refer to Appendix 3. :arg int config: the configuration layer (default is the standard config layer). - >>> pprint(doc.get_oc_states()) + >>> pprint(doc.get_layer()) {'off': [8, 9, 10], 'on': [5, 6, 7], 'rbgroups': [[7, 10]]} >>> @@ -348,7 +348,7 @@ For details on **embedded files** refer to Appendix 3. Values *None* will not change the corresponding PDF array. >>> doc.set_layer(-1, basestate="OFF") # only changes the base state - >>> pprint(doc.get_oc_states()) + >>> pprint(doc.get_layer()) {'basestate': 'OFF', 'off': [8, 9, 10], 'on': [5, 6, 7], 'rbgroups': [[7, 10]]} @@ -583,7 +583,8 @@ For details on **embedded files** refer to Appendix 3. :rtype: :ref:`Page` :returns: a new copy of the same page. All pending updates (e.g. to annotations or widgets) will be finalized and a fresh copy of the page will be loaded. - .. note:: In a typical use case, a page :ref:`Pixmap` should be taken after annotations / widgets have been added or changed. To force all those changes being reflected in the page structure, this method re-instates a fresh copy while keeping the object hierarchy "document -> page -> annotation(s)" intact. + + .. note:: In a typical use case, a page :ref:`Pixmap` should be taken after annotations / widgets have been added or changed. To force all those changes being reflected in the page structure, this method re-instates a fresh copy while keeping the object hierarchy "document -> page -> annotations/widgets" intact. .. method:: page_cropbox(pno) @@ -775,13 +776,13 @@ For details on **embedded files** refer to Appendix 3. * **null** -- the string ``"null"``. This is the PDF equivalent to Python's ``None`` and causes the key to be ignored -- however not necessarily removed, resp. removed on saves with garbage collection. * **bool** -- one of the strings ``"true"`` or ``"false"``. * **name** -- a valid PDF name with a leading slash: ``"/PageLayout"``. See page 56 of the :ref:`AdobeManual`. - * **string** -- a valid PDF string. **All PDF strings** must be enclosed by some type of brackets. Denote the empty string as ``"()"``. Depending on its content, the possible bracket types are "(...)" or "<...>". Reserved PDF characters must be escaped. If in doubt, we **strongly recommend** to use :meth:`getPDFstr`! This function automatically generates the right brackets, escapes, and overall format. E.g. it will do conversions like these: + * **string** -- a valid PDF string. **All PDF strings** must be enclosed by some type of brackets. Denote the empty string as ``"()"``. Depending on its content, the possible bracket types are "(...)" or "<...>". Reserved PDF characters must be escaped. If in doubt, we **strongly recommend** to use :meth:`get_pdf_str`! This function automatically generates the right brackets, escapes, and overall format. E.g. it will do conversions like these: >>> # because of €, the following yields UTF-16BE BOM - >>> fitz.getPDFstr("Pay in $ or €.") + >>> fitz.get_pdf_str("Pay in $ or €.") '' >>> # escapes for brackets and non-ASCII - >>> fitz.getPDFstr("Prices in EUR (USD also accepted). Areas are in m².") + >>> fitz.get_pdf_str("Prices in EUR (USD also accepted). Areas are in m².") '(Prices in EUR \\(USD also accepted\\). Areas are in m\\262.)' @@ -982,20 +983,20 @@ For details on **embedded files** refer to Appendix 3. .. method:: del_toc_item(idx) - *(New in v1.17.7)* + * New in v1.17.7 + * Changed in v1.18.14: no longer remove the item's text, but show it grayed-out. - PDF only: Remove this TOC item. This is a high-speed method primarily meant for *disabling* items, which are pointing to deleted pages. Physically, the item still exists in the TOC tree, but will show an empty title and no longer point to a destination. So the overall TOC structure remains intact. + PDF only: Remove this TOC item. This is a high-speed method, which **disables** the respective item, but leaves the overall TOC struture intact. Physically, the item still exists in the TOC tree, but is shown grayed-out and will no longer point to any destination. - This also implies that you can reassign the item to a destination when required. + This also implies that you can reassign the item to a new destination using :meth:`Document.set_toc_item`, when required. :arg int idx: the index of the item in list :meth:`Document.get_toc`. .. method:: set_toc_item(idx, dest_dict=None, kind=None, pno=None, uri=None, title=None, to=None, filename=None, zoom=0) - *(New in v1.17.7)* - - *(Changed in v1.18.6)* + * New in v1.17.7 + * Changed in v1.18.6 PDF only: Changes the TOC item identified by its index. Change the item **title**, **destination**, **appearance** (color, bold, italic) or collapsing sub-items -- or to remove the item altogether. @@ -1113,8 +1114,6 @@ For details on **embedded files** refer to Appendix 3. PDF only: saves the document incrementally. This is a convenience abbreviation for *doc.save(doc.name, incremental=True, encryption=PDF_ENCRYPT_KEEP)*. - .. method:: ez_save() - .. method:: tobytes(garbage=0, clean=False, deflate=False, deflate_images=False, deflate_fonts=False, ascii=False, expand=0, linear=False, pretty=False, encryption=PDF_ENCRYPT_NONE, permissions=-1, owner_pw=None, user_pw=None) *(Changed in v1.18.7)* @@ -1207,40 +1206,48 @@ For details on **embedded files** refer to Appendix 3. PDF only: Delete a page given by its 0-based number in -inf < pno < page_count - 1. - Changed in version 1.14.17 + * Changed in v1.18.14: support Python's ``del`` statement. :arg int pno: the page to be deleted. Negative number count backwards from the end of the document (like with indices). Default is the last page. .. method:: delete_pages(*args, **kwds) - *Changed in v1.18.13: more flexibility specifying pages to delete.* + * Changed in v1.18.13: more flexibility specifying pages to delete. + * Changed in v1.18.14: support Python's ``del`` statement. - PDF only: Delete multiple pages given as 0-based numbers. *Changed in v1.18.13:* introduced much more flexibility for specifying pages. + PDF only: Delete multiple pages given as 0-based numbers. - **Format 1:** Use keywords. Represents the old format. + **Format 1:** Use keywords. Represents the old format. A contiguous range of pages is removed. * "from_page": first page to delete. Zero if omitted. * "to_page": last page to delete. Last page in document if omitted. Must not be less then "from_page". - **Format 2:** A sequence as one positional parameter. A list, tuple or range object specifying pages to delete. Pages need not be consecutive. + **Format 2:** Two page numbers as positional parameters. Handled like Format 1. + + **Format 3:** One positional integer parameter. Equivalent to :meth:`Page.delete_page`. - **Format 3:** Page number as a single positional parameter. Equivalent to :meth:`Page.delete_page`. + **Format 4:** One positional parameter of type *list*, *tuple* or *range()* of page numbers. The items of this sequence may be in any order and may contain duplicates. - **Format 4:** Two page numbers as positional parameters. Handled like Format 1. + **Format 5:** *(New in v1.18.14)* Using the Python ``del`` statement and index / slice notation is now possible. .. note:: - *(Changed in v1.14.17, optimized in v1.17.7)* In an effort to maintain a valid PDF structure, this method and :meth:`delete_page` will also invalidate items in the table of contents which happen to point to deleted pages. "Invalidation" here means, that the bookmark will point to nowhere and the title will show the string "<>". The overall TOC structure is left intact. + *(Changed in v1.14.17, optimized in v1.17.7)* In an effort to maintain a valid PDF structure, this method and :meth:`delete_page` will also deactivate items in the table of contents which point to deleted pages. "Deactivation" here means, that the bookmark will point to nowhere and the title will be shown grayed-out by supporting PDF viewers. The overall TOC structure is left intact. - Similarly, it will remove any **links on remaining pages** that point to a deleted one. This action may have an extended response time for documents with many pages. + It will also remove any **links on remaining pages** which point to a deleted one. This action may have an extended response time for documents with many pages. - Following examples all delete pages 500 through 519: + Following examples will all delete pages 500 through 519: * ``doc.delete_pages(500, 519)`` * ``doc.delete_pages(from_page=500, to_page=519)`` - * ``doc.delete_pages([500, 501, 502, ... , 519])`` + * ``doc.delete_pages((500, 501, 502, ... , 519))`` * ``doc.delete_pages(range(500, 520))`` + * ``del doc[500:520]`` + * ``del doc[(500, 501, 502, ... , 519)]`` + * ``del doc[range(500, 520)]`` + + For the :ref:`AdobeManual` the above takes about 0.6 seconds, because the remaining 1290 pages must be cleaned from invalid links. - For the :ref:`AdobeManual` the above takes about 0.5 to 0.6 seconds, because on every of the remaining 1290 pages all links must be removed, which point to a deleted pages. + In general, the performance of this method is dependent on the number of remaining pages -- **not** on the number of deleted pages: in the above example, **deleting all pages except** those 20, will need much less time. .. method:: copy_page(pno, to=-1) @@ -1288,10 +1295,9 @@ For details on **embedded files** refer to Appendix 3. :rtype: bool :returns: - * None: not a Form PDF or property not defined. - * True / False: the value of the property (either just set or existing for inquiries). + * None: not a Form PDF, or property not defined. + * True / False: the value of the property (either just set or existing for inquiries). Has no effect if no Form PDF. - Once set, the property cannot be removed again (which is no problem). .. method:: get_sigflags() @@ -1556,12 +1562,6 @@ For details on **embedded files** refer to Appendix 3. :type: bool - .. attribute:: is_pdf - - *True* if this is a PDF document, else *False*. - - :type: bool - .. attribute:: is_form_pdf *False* if this is not a PDF or has no form fields, otherwise the number of root form fields (fields with no ancestors). diff --git a/docs/faq.rst b/docs/faq.rst index b27578a45..894fe9240 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -950,7 +950,7 @@ With this method, only the **number of lines** will be controlled to not go beyo Line **width is ignored**. The surplus part of a line will simply be invisible. -However, for built-in fonts there are ways to calculate the line width beforehand - see :meth:`getTextlength`. +However, for built-in fonts there are ways to calculate the line width beforehand - see :meth:`get_text_length`. Here is another example. It inserts 4 text strings using the four different rotation options, and thereby explains, how the text insertion point must be chosen to achieve the desired result:: @@ -1508,10 +1508,10 @@ To create a page in *landscape* format, just exchange the width and height value Use this to create the page with another pre-defined paper format: ->>> w, h = fitz.PaperSize("letter-l") # 'Letter' landscape +>>> w, h = fitz.paper_size("letter-l") # 'Letter' landscape >>> page = doc.new_page(width = w, height = h) -The convenience function :meth:`PaperSize` knows over 40 industry standard paper formats to choose from. To see them, inspect dictionary :attr:`paperSizes`. Pass the desired dictionary key to :meth:`PaperSize` to retrieve the paper dimensions. Upper and lower case is supported. If you append "-L" to the format name, the landscape version is returned. +The convenience function :meth:`paper_size` knows over 40 industry standard paper formats to choose from. To see them, inspect dictionary :attr:`paperSizes`. Pass the desired dictionary key to :meth:`paper_size` to retrieve the paper dimensions. Upper and lower case is supported. If you append "-L" to the format name, the landscape version is returned. .. note:: Here is a 3-liner that creates a PDF with one empty page. Its file size is 470 bytes: @@ -1691,7 +1691,7 @@ This deals with joining PDF pages to form a new PDF with pages each combining tw src = fitz.open(infile) doc = fitz.open() # empty output PDF - width, height = fitz.PaperSize("a4") # A4 portrait output page format + width, height = fitz.paper_size("a4") # A4 portrait output page format r = fitz.Rect(0, 0, width, height) # define the 4 rectangles per page @@ -1774,7 +1774,7 @@ It features maintaining any metadata, table of contents and links contained in t if not meta["creator"]: meta["creator"] = "PyMuPDF PDF converter" - meta["modDate"] = fitz.getPDFnow() + meta["modDate"] = fitz.get_pdf_now() meta["creationDate"] = meta["modDate"] pdf.set_metadata(meta) @@ -2188,11 +2188,11 @@ ID array File identifier consisting of two byte strings. XRefStm int Offset of a cross-reference stream. See :ref:`AdobeManual` p. 109. ======= =========== =================================================================================== -Access this information via PyMuPDF with :meth:`Document.pdf_trailer`. +Access this information via PyMuPDF with :meth:`Document.pdf_trailer` or, equivalently, via :meth:`Document.xref_object` using -1 instead of a valid :data:`xref` number. >>> import fitz >>> doc=fitz.open("PyMuPDF.pdf") - >>> print(doc.xref_object(-1)) + >>> print(doc.xref_object(-1)) # or: print(doc.pdf_trailer()) << /Type /XRef /Index [ 0 8263 ] @@ -2294,7 +2294,7 @@ Vice cersa, you can also **store private metadata items** in a PDF. It is your r raise ValueError("PDF has no metadata") xref = int(value.replace("0 R", "")) # extract the metadata xref # add some private information - doc.xref_set_key(xref, "mykey", fitz.getPDFstr("北京 is Beijing")) + doc.xref_set_key(xref, "mykey", fitz.get_pdf_str("北京 is Beijing")) # # after executing the previous code snippet, we will see this: pprint(metadata) diff --git a/docs/font.rst b/docs/font.rst index 782d6a54b..f2e5414e4 100644 --- a/docs/font.rst +++ b/docs/font.rst @@ -16,7 +16,8 @@ A Font object also contains useful general information, like the font bbox, the :meth:`~Font.glyph_bbox` Glyph rectangle :meth:`~Font.glyph_name_to_unicode` Get unicode from glyph name :meth:`~Font.has_glyph` Return glyph id of unicode -:meth:`~Font.text_length` Compute text length under a fontsize +:meth:`~Font.text_length` Compute string length +:meth:`~Font.char_lengths` Tuple of char widths of a string :meth:`~Font.unicode_to_glyph_name` Get glyph name of a unicode :meth:`~Font.valid_codepoints` Array of supported unicodes :attr:`~Font.ascender` Font ascender @@ -75,15 +76,15 @@ A Font object also contains useful general information, like the font bbox, the * a font file will **always** be embedded in your PDF, * Greek and Cyrillic characters are supported without needing the *encoding* parameter. - Using *ordering >= 0*, or fontnames "cjk", "china-t", "china-s", "japan" or "korea" will **always create the same "universal"** font **"Droid Sans Fallback Regular"**. This font supports **all CJK and all Latin characters**, including Greek and Cyrillic. + Using *ordering >= 0*, or fontnames "cjk", "china-t", "china-s", "japan" or "korea" will **always create the same "universal"** font **"Droid Sans Fallback Regular"**. This font supports **all Chinese, Japanese, Korean and Latin characters**, including Greek and Cyrillic. This is a sans-serif font. - Actually, you would rarely ever need another font than **"Droid Sans Fallback Regular"**. **Except** that this font file is relatively large and adds about 1.65 MB (compressed) to your PDF file size. If you do not need CJK support, stick with specifying "helv", "tiro" etc., and you will get away with about 35 KB compressed. + Actually, you would rarely ever need another sans-serif font than **"Droid Sans Fallback Regular"**. **Except** that this font file is relatively large and adds about 1.65 MB (compressed) to your PDF file size. If you do not need CJK support, stick with specifying "helv", "tiro" etc., and you will get away with about 35 KB compressed. - If you **know** you have a mixture of CJK and Latin text, consider just using ``Font("cjk")`` because this supports everything and also significantly (by a factor of two to three) speeds up execution: MuPDF will always find any character in this single font and need not check fallbacks. + If you **know** you have a mixture of CJK and Latin text, consider just using ``Font("cjk")`` because this supports everything and also significantly (by a factor of up to three) speeds up execution: MuPDF will always find any character in this single font and never needs to check fallbacks. - But if you do specify a Base-14 fontname, you will still be able to also write CJK characters: MuPDF detects this situation and silently falls back to the universal font (which will then of course also be embedded in your PDF). + But if you do use some other font, you will still automatically be able to also write CJK characters: MuPDF detects this situation and silently falls back to the universal font (which will then of course also be embedded in your PDF). - *(New in v1.17.5)* Optionally, some new "reserved" fontname codes become available if you install `pymupdf-fonts `_. **"Fira Mono"** is a nice mono-spaced sans font set and **FiraGO** is another non-serifed "universal" font, set which supports all Latin (including Cyrillic and Greek) plus Thai, Arabian, Hewbrew and Devanagari -- but none of the CJK languages. The size of a FiraGO font is only a quarter of the "Droid Sans Fallback" size (compressed 400 KB vs. 1.65 MB) -- **and** it provides the weight bold, italic, bold-italic -- which the universal font doesn't. + *(New in v1.17.5)* Optionally, some new "reserved" fontname codes become available if you install `pymupdf-fonts `_, ``pip install pymupdf-fonts``. **"Fira Mono"** is a mono-spaced sans font set and **FiraGO** is another non-serifed "universal" font set which supports all Latin (including Cyrillic and Greek) plus Thai, Arabian, Hewbrew and Devanagari -- but none of the CJK languages. The size of a FiraGO font is only a quarter of the "Droid Sans Fallback" size (compressed 400 KB vs. 1.65 MB) -- **and** it provides the weights bold, italic, bold-italic -- which the universal font doesn't. **"Space Mono"** is another nice and small mono-spaced font from Google Fonts, which supports Latin Extended characters and comes with all 4 important weights. @@ -115,7 +116,7 @@ A Font object also contains useful general information, like the font bbox, the .. method:: has_glyph(chr, language=None, script=0, fallback=False) - Check whether the unicode *chr* exists in the font or some fallback font. May be used to check whether any "TOFU" symbols will appear on output. + Check whether the unicode *chr* exists in the font or (option) some fallback font. May be used to check whether any "TOFU" symbols will appear on output. :arg int chr: the unicode of the character (i.e. *ord()*). :arg str language: the language -- currently unused. @@ -203,15 +204,56 @@ A Font object also contains useful general information, like the font bbox, the .. method:: text_length(text, fontsize=11) - Calculate the length of a unicode string. + Calculate the length in points of a unicode string. - :arg str text: a text string -- UTF-8 encoded. + .. note:: There is a functional overlap with :meth:`get_text_length` for Base-14 fonts only. + + :arg str text: a text string, UTF-8 encoded. :arg float fontsize: the fontsize. :rtype: float - :returns: the length of the string when stored in the PDF. Internally :meth:`glyph_advance` is used on a by-character level. If the font does not have a character, it will automatically be looked up in a fallback font. + :returns: the length of the string in points when stored in the PDF. If a character is not contained in the font, it will automatically be looked up in a fallback font. + + .. note:: This method was originally implemented in Python, based on calling :meth:`Font.glyph_advance`. For performance reasons, it has been rewritten in C for v1.18.14. To compute the width of a single character, you can now use either of the following without performance penalty: + + 1. ``font.glyph_advance(ord("Ä")) * fontsize`` + 2. ``font.text_length("Ä", fontsize=fontsize)`` + + For multi-character strings, the method offers a huge performance advantage compared to the previous implementation: instead of about 0.5 microseconds for each character, only 12.5 nanoseconds are required for the second and subsequent ones. + + .. method:: char_lengths(text, fontsize=11) + + *New in v1.18.14* + + Sequence of character lengths in points of a unicode string. + + :arg str text: a text string, UTF-8 encoded. + + :arg float fontsize: the fontsize. + + :rtype: tuple + + :returns: the lengths in points of the characters of a string when stored in the PDF. It works like :meth:`Font.text_length` broken down to single characters. This is a high speed method, used e.g. in :meth:`TextWriter.fill_textbox`. The following is true (allowing rounding errors): ``font.text_length(text) == sum(font.char_lengths(text))``. + + >>> font = fitz.Font("helv") + >>> text = "PyMuPDF" + >>> font.text_length(text) + 50.115999937057495 + >>> fitz.get_text_length(text, fontname="helv") + 50.115999937057495 + >>> sum(font.char_lengths(text)) + 50.115999937057495 + >>> pprint(font.char_lengths(text)) + (7.336999952793121, # P + 5.5, # y + 9.163000047206879, # M + 6.115999937057495, # u + 7.336999952793121, # P + 7.942000031471252, # D + 6.721000015735626) # F + .. attribute:: buffer diff --git a/docs/functions.rst b/docs/functions.rst index 24904d23a..19343b657 100644 --- a/docs/functions.rst +++ b/docs/functions.rst @@ -21,18 +21,16 @@ Yet others are handy, general-purpose utilities. :meth:`Document.del_xml_metadata` PDF only: remove XML metadata :meth:`Document.delete_object` PDF only: delete an object :meth:`Document.get_new_xref` PDF only: create and return a new :data:`xref` entry -:meth:`Document._getOLRootNumber` PDF only: return / create :data:`xref` of */Outline* :meth:`Document.xml_metadata_xref` PDF only: return XML metadata :data:`xref` number :meth:`Document.xref_length` PDF only: return length of :data:`xref` table :meth:`Document.extract_font` PDF only: extract embedded font :meth:`Document.extract_image` PDF only: extract embedded image :meth:`Document.get_char_widths` PDF only: return a list of glyph widths of a font :meth:`Document.is_stream` PDF only: check whether an :data:`xref` is a stream object -:attr:`Document.FontInfos` PDF only: information on inserted fonts -:meth:`ImageProperties` return a dictionary of basic image properties -:meth:`getPDFnow` return the current timestamp in PDF format -:meth:`getPDFstr` return PDF-compatible string -:meth:`getTextlength` return string length for a given font & fontsize +:meth:`image_properties` return a dictionary of basic image properties +:meth:`get_pdf_now` return the current timestamp in PDF format +:meth:`get_pdf_str` return PDF-compatible string +:meth:`get_text_length` return string length for a given font & fontsize :meth:`Page.clean_contents` PDF only: clean the page's :data:`contents` objects :meth:`Page.get_contents` PDF only: return a list of content :data:`xref` numbers :meth:`Page.set_contents` PDF only: set page's :data:`contents` to some :data:`xref` @@ -43,26 +41,26 @@ Yet others are handy, general-purpose utilities. :meth:`Page.read_contents` PDF only: get complete, concatenated /Contents source :meth:`Page.wrap_contents` wrap contents with stacking commands :attr:`Page.is_wrapped` check whether contents wrapping is present -:meth:`planishLine` matrix to map a line to the x-axis -:meth:`PaperSize` return width, height for a known paper format -:meth:`PaperRect` return rectangle for a known paper format -:meth:`sRGB_to_pdf` return PDF RGB color tuple from a sRGB integer -:meth:`sRGB_to_rgb` return (R, G, B) color tuple from a sRGB integer +:meth:`planish_line` matrix to map a line to the x-axis +:meth:`paper_size` return width, height for a known paper format +:meth:`paper_rect` return rectangle for a known paper format +:meth:`sRGB_to_pdf` return PDF RGB color tuple from an sRGB integer +:meth:`sRGB_to_rgb` return (R, G, B) color tuple from an sRGB integer :meth:`recover_quad` return the quad for a text span ("dict" / "rawdict") :meth:`glyph_name_to_unicode` return unicode from a glyph name :meth:`unicode_to_glyph_name` return glyph name from a unicode :meth:`make_table` split rectangle in sub-rectangles :meth:`adobe_glyph_names` list of glyph names defined in **Adobe Glyph List** :meth:`adobe_glyph_unicodes` list of unicodes defined in **Adobe Glyph List** -:attr:`paperSizes` dictionary of pre-defined paper formats +:meth:`paper_sizes` dictionary of pre-defined paper formats +:meth:`recover_quad` compute the quad of a span ("dict", "rawdict") +:meth:`recover_char_quad` compute the quad of a char ("rawdict") +:meth:`recover_span_quad` compute the quad of a subset of span characters +:meth:`recover_line_quad` compute the quad of a subset of line spans :attr:`fitz_fontdescriptors` dictionary of available supplement fonts -:attr:`recover_quad` compute the quad of a span ("dict", "rawdict") -:attr:`recover_char_quad` compute the quad of a char ("rawdict") -:attr:`recover_span_quad` compute the quad of a subset of span characters -:attr:`recover_line_quad` compute the quad of a subset of line spans ==================================== ============================================================== - .. method:: PaperSize(s) + .. method:: paper_size(s) Convenience function to return width and height of a known paper format code. These values are given in pixels for the standard resolution 72 pixels = 1 inch. @@ -70,24 +68,24 @@ Yet others are handy, general-purpose utilities. A format name must be supplied as a string (case **in** \sensitive), optionally suffixed with "-L" (landscape) or "-P" (portrait). No suffix defaults to portrait. - :arg str s: any format name from above (upper or lower case), like *"A4"* or *"letter-l"*. + :arg str s: any format name from above in upper or lower case, like *"A4"* or *"letter-l"*. :rtype: tuple - :returns: *(width, height)* of the paper format. For an unknown format *(-1, -1)* is returned. Esamples: *fitz.PaperSize("A4")* returns *(595, 842)* and *fitz.PaperSize("letter-l")* delivers *(792, 612)*. + :returns: *(width, height)* of the paper format. For an unknown format *(-1, -1)* is returned. Examples: *fitz.paper_size("A4")* returns *(595, 842)* and *fitz.paper_size("letter-l")* delivers *(792, 612)*. ----- - .. method:: PaperRect(s) + .. method:: paper_rect(s) Convenience function to return a :ref:`Rect` for a known paper format. - :arg str s: any format name supported by :meth:`PaperSize`. + :arg str s: any format name supported by :meth:`paper_size`. :rtype: :ref:`Rect` - :returns: *fitz.Rect(0, 0, width, height)* with *width, height=fitz.PaperSize(s)*. + :returns: *fitz.Rect(0, 0, width, height)* with *width, height=fitz.paper_size(s)*. >>> import fitz - >>> fitz.PaperRect("letter-l") + >>> fitz.paper_rect("letter-l") fitz.Rect(0.0, 0.0, 792.0, 612.0) >>> @@ -101,7 +99,19 @@ Yet others are handy, general-purpose utilities. :arg int srgb: an integer of format RRGGBB, where each color component is an integer in range(255). - :returns: a tuple (red, green, blue) with float items in intervall *0 <= item <= 1* representing the same color. + :returns: a tuple (red, green, blue) with float items in intervall *0 <= item <= 1* representing the same color. Example ``sRGB_to_pdf(0xff0000) = (1, 0, 0)`` (red). + +----- + + .. method:: sRGB_to_rgb(srgb) + + *New in v1.17.4* + + Convenience function returning a color (red, green, blue) for a given *sRGB* color integer. + + :arg int srgb: an integer of format RRGGBB, where each color component is an integer in range(255). + + :returns: a tuple (red, green, blue) with integer items in ``range(256)`` representing the same color. Example ``sRGB_to_pdf(0xff0000) = (255, 0, 0)`` (red). ----- @@ -126,7 +136,7 @@ Yet others are handy, general-purpose utilities. Return the glyph name of a unicode number, based on the **Adobe Glyph List**. - :arg int che: the unicode given by e.g. ``ord("ß")``. The function is based on the `Adobe Glyph List `_. + :arg int ch: the unicode given by e.g. ``ord("ß")``. The function is based on the `Adobe Glyph List `_. :rtype: str :returns: the glyph name. E.g. ``fitz.unicode_to_glyph_name(ord("Ä"))`` returns ``'Adieresis'``. @@ -159,18 +169,6 @@ Yet others are handy, general-purpose utilities. .. note:: A similar functionality is provided by package `fontTools `_ in its *agl* sub-package. ------ - - .. method:: sRGB_to_rgb(srgb) - - *New in v1.17.4* - - Convenience function returning a color (red, green, blue) for a given *sRGB* color integer. - - :arg int srgb: an integer of format RRGGBB, where each color component is an integer in range(255). - - :returns: a tuple (red, green, blue) with integer items in intervall *0 <= item <= 255* representing the same color. - ----- .. method:: recover_quad(line_dir, span) @@ -203,7 +201,7 @@ Yet others are handy, general-purpose utilities. ----- - .. method:: planishLine(p1, p2) + .. method:: planish_line(p1, p2) *(New in version 1.16.2)* @@ -213,21 +211,19 @@ Yet others are handy, general-purpose utilities. :arg point_like p2: end point of the line. :rtype: :ref:`Matrix` - :returns: + :returns: a matrix which combines a rotation and a translation:: - a matrix which combines a rotation and a translation:: - - p1 = fitz.Point(1, 1) - p2 = fitz.Point(4, 5) - abs(p2 - p1) # distance of points + >>> p1 = fitz.Point(1, 1) + >>> p2 = fitz.Point(4, 5) + >>> abs(p2 - p1) # distance of points 5.0 - m = fitz.planishLine(p1, p2) - p1 * m + >>> m = fitz.planish_line(p1, p2) + >>> p1 * m Point(0.0, 0.0) - p2 * m + >>> p2 * m Point(5.0, -5.960464477539063e-08) - # distance of the resulting points - abs(p2 * m - p1 * m) + >>> # distance of the resulting points + >>> abs(p2 * m - p1 * m) 5.0 @@ -237,9 +233,9 @@ Yet others are handy, general-purpose utilities. ----- - .. attribute:: paperSizes + .. method:: paper_sizes - A dictionary of pre-defines paper formats. Used as basis for :meth:`PaperSize`. + A dictionary of pre-defines paper formats. Used as basis for :meth:`paper_size`. ----- @@ -263,10 +259,13 @@ Yet others are handy, general-purpose utilities. 'serif': True, 'glyphs': 1485} + If ``pymupdf-fonts`` is not installed, the dictionary is empty. + + The dictionary keys can be used to define a :ref:`Font` via e.g. ``font = fitz.Font("fimo")`` -- just like you can do it with the builtin fonts "Helvetica" and friends. ----- - .. method:: getPDFnow() + .. method:: get_pdf_now() Convenience function to return the current local timestamp in PDF compatible format, e.g. *D:20170501121525-04'00'* for local datetime May 1, 2017, 12:15:25 in a timezone 4 hours westward of the UTC meridian. @@ -275,7 +274,7 @@ Yet others are handy, general-purpose utilities. ----- - .. method:: getTextlength(text, fontname="helv", fontsize=11, encoding=TEXT_ENCODING_LATIN) + .. method:: get_text_length(text, fontname="helv", fontsize=11, encoding=TEXT_ENCODING_LATIN) *(New in version 1.14.7)* @@ -283,20 +282,22 @@ Yet others are handy, general-purpose utilities. :arg str text: the text string. :arg str fontname: the fontname. Must be one of either the :ref:`Base-14-Fonts` or the CJK fonts, identified by their "reserved" fontnames (see table in :meth.`Page.insert_font`). - :arg float fontsize: size of the font. + :arg float fontsize: the fontsize. :arg int encoding: the encoding to use. Besides 0 = Latin, 1 = Greek and 2 = Cyrillic (Russian) are available. Relevant for Base-14 fonts "Helvetica", "Courier" and "Times" and their variants only. Make sure to use the same value as in the corresponding text insertion. :rtype: float :returns: the length in points the string will have (e.g. when used in :meth:`Page.insert_text`). - .. note:: This function will only do the calculation -- it won't insert font or text. + .. note:: This function will only do the calculation -- it won't insert font nor text. - .. warning:: If you use this function to determine the required rectangle width for the (:ref:`Page` or :ref:`Shape`) *insert_textbox* methods, be aware that they calculate on a **by-character level**. Because of rounding effects, this will mostly lead to a slightly larger number: *sum([fitz.getTextlength(c) for c in text]) > fitz.getTextlength(text)*. So either (1) do the same, or (2) use something like *fitz.getTextlength(text + "'")* for your calculation. + .. note:: The :ref:`Font` class offers a similar method, :meth:`Font.text_length`, which supports Base-14 fonts and any font with a character map (CMap, Type 0 fonts). + + .. warning:: If you use this function to determine the required rectangle width for the (:ref:`Page` or :ref:`Shape`) *insert_textbox* methods, be aware that they calculate on a **by-character level**. Because of rounding effects, this will mostly lead to a slightly larger number: *sum([fitz.get_text_length(c) for c in text]) > fitz.get_text_length(text)*. So either (1) do the same, or (2) use something like *fitz.get_text_length(text + "'")* for your calculation. ----- - .. method:: getPDFstr(text) + .. method:: get_pdf_str(text) - Make a PDF-compatible string: if the text contains code points *ord(c) > 255*, then it will be converted to UTF-16BE with BOM as a hexadecimal character string enclosed in "<>" brackets like **. Otherwise, it will return the string enclosed in (round) brackets, replacing any characters outside the ASCII range with some special code. Also, every "(", ")" or backslash is escaped with an additional backslash. + Make a PDF-compatible string: if the text contains code points *ord(c) > 255*, then it will be converted to UTF-16BE with BOM as a hexadecimal character string enclosed in "<>" brackets like **. Otherwise, it will return the string enclosed in (round) brackets, replacing any characters outside the ASCII range with some special code. Also, every "(", ")" or backslash is escaped with a backslash. :arg str text: the object to convert @@ -305,7 +306,7 @@ Yet others are handy, general-purpose utilities. ----- - .. method:: ImageProperties(stream) + .. method:: image_properties(stream) *(New in version 1.14.14)* @@ -329,7 +330,7 @@ Yet others are handy, general-purpose utilities. Example: - >>> fitz.ImageProperties(open("img-clip.jpg","rb")) + >>> fitz.image_properties(open("img-clip.jpg","rb")) {'bpc': 8, 'format': 9, 'colorspace': 3, 'height': 325, 'width': 244, 'ext': 'jpeg', 'size': 14161} >>> @@ -442,7 +443,7 @@ Yet others are handy, general-purpose utilities. PDF only: Clean and concatenate all :data:`contents` objects associated with this page. "Cleaning" includes syntactical corrections, standardizations and "pretty printing" of the contents stream. Discrepancies between :data:`contents` and :data:`resources` objects will also be corrected if sanitize is true. See :meth:`Page.get_contents` for more details. - Changed in version 1.16.0 Annotations are no longer implicitely cleaned by this method. Use :meth:`Annot._cleanContents` separately. + Changed in version 1.16.0 Annotations are no longer implicitely cleaned by this method. Use :meth:`Annot.clean_contents` separately. :arg bool sanitize: *(new in v1.17.6)* if true, synchronization between resources and their actual use in the contents object is snychronized. For example, if a font is not actually used for any text of the page, then it will be deleted from the ``/Resources/Font`` object. @@ -598,22 +599,6 @@ Yet others are handy, general-purpose utilities. .. note: The returned *basename* in general is **not** the original file name, but it probably has some similarity. - .. attribute:: Document.FontInfos - - Contains following information for any font inserted via :meth:`Page.insert_font` in **this** session of PyMuPDF: - - * xref *(int)* -- XREF number of the */Type/Font* object. - * info *(dict)* -- detail font information with the following keys: - - * name *(str)* -- name of the basefont - * idx *(int)* -- index number for multi-font files - * type *(str)* -- font type (like "TrueType", "Type0", etc.) - * ext *(str)* -- extension to be used, when font is extracted to a file (see :ref:`FontExtensions`). - * glyphs (*list*) -- list of glyph numbers and widths (filled by textinsertion methods). - - :rtype: list - - ----- .. method:: recover_quad(line_dir, span) diff --git a/docs/installation.rst b/docs/installation.rst index 785251a2e..e61373c10 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -49,11 +49,9 @@ Now perform a *python setup.py install*. Option 2: Install from Binaries -------------------------------- -You can install PyMuPDF from Python wheels. The wheels are *self-contained*, i.e. you will **not need any other software** nor download / install MuPDF to run PyMuPDF scripts. -This installation option is available for all MS Windows and the most **popular 64-bit** Mac OSX and Linux platforms for Python versions 3.6 through 3.9. -Windows binaries are provided for Python **32-bit and 64-bit** versions. - -.. note:: For the time being, wheels for Python versions 2.7 and 3.5 are generated as well, but not uploaded to PyPI until explicitely requested via an issue. Starting year 2021, support for these wheel versions will be dropped entirely. +You can install PyMuPDF from Python wheels. Wheels are *self-contained*, i.e. you will **not need any other software** nor download / install MuPDF to run PyMuPDF scripts. +This installation option is available for all MS Windows and the most **popular 64-bit** Mac OSX and Linux platforms for Python versions 3.6 through 3.9. Since version 1.18.13, Linux ARM 64-bit architectures are also supported. +Windows binaries are provided for Python 64-bit **and** 32-bit versions. **Overview of wheel names (PyMuPDF version is x.xx.xx):** diff --git a/docs/irect.rst b/docs/irect.rst index 684f84fd4..33ee4cfa0 100644 --- a/docs/irect.rst +++ b/docs/irect.rst @@ -10,9 +10,7 @@ IRect is a rectangular bounding box similar to :ref:`Rect`, except that all corn **Attribute / Method** **Short Description** ============================== =========================================== :meth:`IRect.contains` checks containment of another object -:meth:`IRect.getArea` calculate rectangle area -:meth:`IRect.getRect` return a :ref:`Rect` with same coordinates -:meth:`IRect.getRectArea` calculate rectangle area +:meth:`IRect.get_area` calculate rectangle area :meth:`IRect.intersect` common part with another rectangle :meth:`IRect.intersects` checks for non-empty intersection :meth:`IRect.morph` transform with a point and a matrix @@ -21,9 +19,9 @@ IRect is a rectangular bounding box similar to :ref:`Rect`, except that all corn :attr:`IRect.bottom_left` bottom left point, synonym *bl* :attr:`IRect.bottom_right` bottom right point, synonym *br* :attr:`IRect.height` height of the rectangle -:attr:`IRect.isEmpty` whether rectangle is empty -:attr:`IRect.isInfinite` whether rectangle is infinite -:attr:`IRect.rect` equals result of method *getRect()* +:attr:`IRect.is_empty` whether rectangle is empty +:attr:`IRect.is_infinite` whether rectangle is infinite +:attr:`IRect.rect` the :ref:`Rect` equivalent :attr:`IRect.top_left` top left point, synonym *tl* :attr:`IRect.top_right` top_right point, synonym *tr* :attr:`IRect.quad` :ref:`Quad` made from rectangle corners @@ -54,15 +52,8 @@ IRect is a rectangular bounding box similar to :ref:`Rect`, except that all corn The other parameters mean integer coordinates. - .. method:: getRect() - A convenience function returning a :ref:`Rect` with the same coordinates. Also available as attribute *rect*. - - :rtype: :ref:`Rect` - - .. method:: getRectArea([unit]) - - .. method:: getArea([unit]) + .. method:: get_area([unit]) Calculates the area of the rectangle and, with no parameter, equals *abs(IRect)*. Like an empty rectangle, the area of an infinite rectangle is also zero. @@ -145,6 +136,12 @@ IRect is a rectangular bounding box similar to :ref:`Rect`, except that all corn :type: :ref:`Point` + .. attribute:: rect + + The :ref:`Rect` with the same coordinates as floats. + + :type: :ref:`Rect` + .. attribute:: quad The quadrilateral *Quad(irect.tl, irect.tr, irect.bl, irect.br)*. @@ -187,13 +184,13 @@ IRect is a rectangular bounding box similar to :ref:`Rect`, except that all corn :type: int - .. attribute:: isInfinite + .. attribute:: is_infinite *True* if rectangle is infinite, *False* otherwise. :type: bool - .. attribute:: isEmpty + .. attribute:: is_empty *True* if rectangle is empty, *False* otherwise. diff --git a/docs/matrix.rst b/docs/matrix.rst index 30ecaeaab..0cf235538 100644 --- a/docs/matrix.rst +++ b/docs/matrix.rst @@ -21,10 +21,10 @@ Please note: ================================ ============================================== **Method / Attribute** **Description** ================================ ============================================== -:meth:`Matrix.preRotate` perform a rotation -:meth:`Matrix.preScale` perform a scaling -:meth:`Matrix.preShear` perform a shearing (skewing) -:meth:`Matrix.preTranslate` perform a translation (shifting) +:meth:`Matrix.prerotate` perform a rotation +:meth:`Matrix.prescale` perform a scaling +:meth:`Matrix.preshear` perform a shearing (skewing) +:meth:`Matrix.pretranslate` perform a translation (shifting) :meth:`Matrix.concat` perform a matrix multiplication :meth:`Matrix.invert` calculate the inverted matrix :meth:`Matrix.norm` the Euclidean norm @@ -34,7 +34,7 @@ Please note: :attr:`Matrix.d` zoom factor Y direction :attr:`Matrix.e` horizontal shift :attr:`Matrix.f` vertical shift -:attr:`Matrix.isRectilinear` true if rect corners will remain rect corners +:attr:`Matrix.is_rectilinear` true if rect corners will remain rect corners ================================ ============================================== **Class API** @@ -75,7 +75,7 @@ Please note: Return the Euclidean norm of the matrix as a vector. - .. method:: preRotate(deg) + .. method:: prerotate(deg) Modify the matrix to perform a counter-clockwise rotation for positive *deg* degrees, else clockwise. The matrix elements of an identity matrix will change in the following way: @@ -83,7 +83,7 @@ Please note: :arg float deg: The rotation angle in degrees (use conventional notation based on Pi = 180 degrees). - .. method:: preScale(sx, sy) + .. method:: prescale(sx, sy) Modify the matrix to scale by the zoom factors sx and sy. Has effects on attributes *a* thru *d* only: *[a, b, c, d, e, f] -> [a*sx, b*sx, c*sy, d*sy, e, f]*. @@ -91,7 +91,7 @@ Please note: :arg float sy: Zoom factor in Y direction. For the effect see description of attribute *d*. - .. method:: preShear(sx, sy) + .. method:: preshear(sx, sy) Modify the matrix to perform a shearing, i.e. transformation of rectangles into parallelograms (rhomboids). Has effects on attributes *a* thru *d* only: *[a, b, c, d, e, f] -> [c*sy, d*sy, a*sx, b*sx, e, f]*. @@ -99,7 +99,7 @@ Please note: :arg float sy: Shearing effect in Y direction. See attribute *b*. - .. method:: preTranslate(tx, ty) + .. method:: pretranslate(tx, ty) Modify the matrix to perform a shifting / translation operation along the x and / or y axis. Has effects on attributes *e* and *f* only: *[a, b, c, d, e, f] -> [a, b, c, d, tx*a + ty*c, tx*b + ty*d]*. @@ -162,7 +162,7 @@ Please note: :type: float - .. attribute:: isRectilinear + .. attribute:: is_rectilinear Rectilinear means that no shearing is present and that any rotations are integer multiples of 90 degrees. Usually this is used to confirm that (axis-aligned) rectangles before the transformation are still axis-aligned rectangles afterwards. @@ -238,6 +238,6 @@ Rotating --------- .. |rot60| image:: images/img-rot-60.* -Finally a rotation by 30 clockwise degrees (*preRotate(-30)*). +Finally a rotation by 30 clockwise degrees (*prerotate(-30)*). |rot60| diff --git a/docs/new-annots.py b/docs/new-annots.py index ba2576f4a..74d459d89 100644 --- a/docs/new-annots.py +++ b/docs/new-annots.py @@ -151,7 +151,7 @@ def print_descr(annot): print_descr(annot) # annot.rect r += displ -annot = page.addStampAnnot(r, stamp=10) # 'Stamp' +annot = page.add_stamp_annot(r, stamp=10) # 'Stamp' annot.set_colors(stroke=green) annot.update() print_descr(annot) diff --git a/docs/pixmap.rst b/docs/pixmap.rst index 3fb8dd643..01d917eed 100644 --- a/docs/pixmap.rst +++ b/docs/pixmap.rst @@ -28,8 +28,8 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". :meth:`Pixmap.gamma_with` apply a gamma factor to the pixmap :meth:`Pixmap.tobytes` return a memory area in a variety of formats :meth:`Pixmap.invert_irect` invert the pixels of a given area -:meth:`Pixmap.pil_save` save as image using pillow (experimental) -:meth:`Pixmap.pil_tobytes` write image stream using pillow (experimental) +:meth:`Pixmap.pil_save` save as image using pillow +:meth:`Pixmap.pil_tobytes` write to ``bytes`` object using pillow :meth:`Pixmap.pixel` return the value of a pixel :meth:`Pixmap.set_alpha` set alpha values :meth:`Pixmap.set_pixel` set the color of a pixel @@ -94,7 +94,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". :arg irect_like clip: restrict the resulting pixmap to this region of the **scaled** pixmap. - .. note:: If width or height are not *de facto* integers (i.e. *float(int(value) != value*), then the resulting pixmap will have an alpha channel. + .. note:: If width or height are not *de facto* integers (i.e. ``value.is_integer() != True``), then the resulting pixmap **will have an alpha channel**. .. method:: __init__(self, source, alpha=1) @@ -268,7 +268,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". If copying from :data:`CS_GRAY` to :data:`CS_RGB`, the source gray-shade value will be put into each of the three rgb component bytes. If the other way round, *(r + g + b) / 3* will be taken as the gray-shade value of the target. - Between *irect* and the target pixmap's rectangle, an "intersection" is calculated at first. This takes into account the rectangle coordinates and the current attribute values ``source.x`` and ``source.y`` (which you are free to modify for this purpose via :meth:`Pixmap.set_origin`). Then the corresponding data of this intersection are copied. If the intersection is empty, nothing will happen. + Between *irect* and the target pixmap's rectangle, an "intersection" is calculated at first. This takes into account the rectangle coordinates and the current attribute values :attr:`Pixmap.x` and :attr:`Pixmap.y` (which you are free to modify for this purpose via :meth:`Pixmap.set_origin`). Then the corresponding data of this intersection are copied. If the intersection is empty, nothing will happen. :arg source: source pixmap. :type source: :ref:`Pixmap` @@ -283,9 +283,6 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". :arg str output: The requested image format. The default is the filename's extension. If not recognized, *png* is assumed. For other possible values see :ref:`PixmapOutput`. - .. method:: save(filename) - - Equal to *pix.save(filename, "png")*. .. method:: tobytes(output="png") @@ -307,7 +304,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". A simple example: ``pix.pil_save("some.jpg", optimize=True, dpi=(150, 150))``. For details on other parameters see the Pillow documentation. - .. note:: *(Changed in v1.18.0)* :meth:`Pixmap.save` and :meth:`Pixmap.save` now also set resolution / dpi from *xres* / *yres* automatically, when saving a PNG image. + .. note:: *(Changed in v1.18.0)* :meth:`Pixmap.save` now also sets dpi from *xres* / *yres* automatically, when saving a PNG image. .. method:: pil_tobytes(*args, **kwargs) @@ -315,6 +312,8 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". Return an image as a bytes object in the specified format using Pillow. For example ``stream = pix.pil_tobytes(format="JPEG", optimize=True)``. Also see above. For details on other parameters see the Pillow documentation. + :rtype: bytes + .. attribute:: alpha @@ -324,7 +323,7 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". .. attribute:: digest - The MD5 hashcode of the pixmap. This is a technical value used for unique identifications. + The MD5 hashcode (16 bytes) of the pixmap. This is a technical value used for unique identifications. :type: bytes @@ -403,13 +402,13 @@ Have a look at the :ref:`FAQ` section to see some pixmap usage "at work". .. attribute:: xres - Horizontal resolution in dpi (dots per inch). Please also see :data:`resolution`. + Horizontal resolution in dpi (dots per inch). Please also see :data:`resolution`. Cannot directly be changed -- use :meth:`Pixmap.set_dpi`. :type: int .. attribute:: yres - Vertical resolution in dpi (dots per inch). Please also see :data:`resolution`. + Vertical resolution in dpi (dots per inch). Please also see :data:`resolution`. Cannot directly be changed -- use :meth:`Pixmap.set_dpi`. :type: int diff --git a/docs/quad.rst b/docs/quad.rst index f8c125b03..82a795434 100644 --- a/docs/quad.rst +++ b/docs/quad.rst @@ -10,7 +10,7 @@ Quads can **be obtained** as results of text search methods (:meth:`Page.search_ .. note:: - * If the corners of a rectangle are transformed with a **rotation**, **scale** or **translation** :ref:`Matrix`, then the resulting quad is **rectangular**, i.e. its corners again enclose angles of 90 degrees. Property :attr:`Quad.isRectangular` checks whether a quad can be thought of being the result of such an operation. + * If the corners of a rectangle are transformed with a **rotation**, **scale** or **translation** :ref:`Matrix`, then the resulting quad is **rectangular** (= congruent to a rectangle), i.e. all of its corners again enclose angles of 90 degrees. Property :attr:`Quad.is_rectangular` checks whether a quad can be thought of being the result of such an operation. * This is not true for all matrices: e.g. shear matrices produce parallelograms, and non-invertible matrices deliver "degenerate" tetragons like triangles or lines. @@ -26,9 +26,9 @@ Quads can **be obtained** as results of text search methods (:meth:`Page.search_ :attr:`Quad.ur` upper right point :attr:`Quad.ll` lower left point :attr:`Quad.lr` lower right point -:attr:`Quad.isConvex` true if quad is a convex set -:attr:`Quad.isEmpty` true if quad is an empty set -:attr:`Quad.isRectangular` true if quad is a (rotated) rectangle +:attr:`Quad.is_convex` true if quad is a convex set +:attr:`Quad.is_empty` true if quad is an empty set +:attr:`Quad.is_rectangular` true if quad is congruent to a rectangle :attr:`Quad.rect` smallest containing :ref:`Rect` :attr:`Quad.width` the longest width value :attr:`Quad.height` the longest height value @@ -67,7 +67,7 @@ Quads can **be obtained** as results of text search methods (:meth:`Page.search_ :arg matrix_like matrix: the matrix. :returns: a new quad. The effect is achieved by using the following code:: - >>> T = fitz.Matrix(1, 1).preTranslate(fixpoint.x, fixpoint.y) + >>> T = fitz.Matrix(1, 1).pretranslate(fixpoint.x, fixpoint.y) >>> result = self * ~T * matrix * T So the quad is translated such, that fixpoint becomes the origin (0, 0), then the matrix is applied to it, and finally a reverse translation is done. @@ -106,21 +106,21 @@ Quads can **be obtained** as results of text search methods (:meth:`Page.search_ :type: :ref:`Point` - .. attribute:: isConvex + .. attribute:: is_convex *(New in version 1.16.1)* - True if every line connecting two points of the quad is inside the quad. We in addition also make sure here, that the quad is not "degenerate", i.e. not all corners are on the same line (which would still qualify as convexity in the mathematical sense). + Checks if for any two points of the quad, all points on their connecting line also belong to the quad. :type: bool - .. attribute:: isEmpty + .. attribute:: is_empty True if enclosed area is zero, which means that at least three of the four corners are on the same line. If this is false, the quad may still be degenerate or not look like a tetragon at all (triangles, parallelograms, trapezoids, ...). :type: bool - .. attribute:: isRectangular + .. attribute:: is_rectangular True if all corner angles are 90 degrees. This implies that the quad is **convex and not empty**. diff --git a/docs/rect.rst b/docs/rect.rst index 381b4d920..5aa4d3416 100644 --- a/docs/rect.rst +++ b/docs/rect.rst @@ -24,10 +24,9 @@ Hence some useful classification: **Methods / Attributes** **Short Description** ============================= ======================================================= :meth:`Rect.contains` checks containment of another object -:meth:`Rect.getArea` calculate rectangle area -:meth:`Rect.getRectArea` calculate rectangle area -:meth:`Rect.includePoint` enlarge rectangle to also contain a point -:meth:`Rect.includeRect` enlarge rectangle to also contain another one +:meth:`Rect.get_area` calculate rectangle area +:meth:`Rect.include_point` enlarge rectangle to also contain a point +:meth:`Rect.include_rect` enlarge rectangle to also contain another one :meth:`Rect.intersect` common part with another rectangle :meth:`Rect.intersects` checks for non-empty intersections :meth:`Rect.morph` transform with a point and a matrix @@ -39,8 +38,8 @@ Hence some useful classification: :attr:`Rect.bottom_right` bottom right point, synonym *br* :attr:`Rect.height` rectangle height :attr:`Rect.irect` equals result of method *round()* -:attr:`Rect.isEmpty` whether rectangle is empty -:attr:`Rect.isInfinite` whether rectangle is infinite +:attr:`Rect.is_empty` whether rectangle is empty +:attr:`Rect.is_infinite` whether rectangle is infinite :attr:`Rect.top_left` top left point, synonym *tl* :attr:`Rect.top_right` top_right point, synonym *tr* :attr:`Rect.quad` :ref:`Quad` made from rectangle corners @@ -87,11 +86,11 @@ Hence some useful classification: 3. **Possible paradox:** The result may be empty, **even if** the rectangle is **not** empty! In such cases, the result obviously does **not** contain the rectangle. This is because MuPDF's algorithm allows for a small tolerance (1e-3). Example: >>> r = fitz.Rect(100, 100, 200, 100.001) - >>> r.isEmpty # rect is NOT empty + >>> r.is_empty # rect is NOT empty False >>> r.round() # but its irect IS empty! fitz.IRect(100, 100, 200, 100) - >>> r.round().isEmpty + >>> r.round().is_empty True :rtype: :ref:`IRect` @@ -113,23 +112,22 @@ Hence some useful classification: :arg r: Second rectangle :type r: :ref:`Rect` - .. method:: includeRect(r) + .. method:: include_rect(r) The smallest rectangle containing the current one and *r* is calculated and **replaces the current** one. If either rectangle is infinite, the result is also infinite. If one is empty, the other one will be taken as the result. :arg r: Second rectangle :type r: :ref:`Rect` - .. method:: includePoint(p) + .. method:: include_point(p) - The smallest rectangle containing the current one and point *p* is calculated and **replaces the current** one. **Infinite rectangles remain unchanged.** To create a rectangle containing a series of points, start with (the empty) *fitz.Rect(p1, p1)* and successively perform *includePoint* operations for the other points. + The smallest rectangle containing the current one and point *p* is calculated and **replaces the current** one. **Infinite rectangles remain unchanged.** To create a rectangle containing a series of points, start with (the empty) *fitz.Rect(p1, p1)* and successively perform *include_point* operations for the other points. :arg p: Point to include. :type p: :ref:`Point` - .. method:: getRectArea([unit]) - .. method:: getArea([unit]) + .. method:: get_area([unit]) Calculate the area of the rectangle and, with no parameter, equals *abs(rect)*. Like an empty rectangle, the area of an infinite rectangle is also zero. So, at least one of *fitz.Rect(p1, p2)* and *fitz.Rect(p2, p1)* has a zero area. @@ -251,13 +249,13 @@ Hence some useful classification: :type: float - .. attribute:: isInfinite + .. attribute:: is_infinite *True* if rectangle is infinite, *False* otherwise. :type: bool - .. attribute:: isEmpty + .. attribute:: is_empty *True* if rectangle is empty, *False* otherwise. diff --git a/docs/textpage.rst b/docs/textpage.rst index b2021035b..971ad8938 100644 --- a/docs/textpage.rst +++ b/docs/textpage.rst @@ -149,7 +149,7 @@ Methods :meth:`TextPage.extractDICT`, :meth:`TextPage.extractJSON`, :meth:`TextP All PyMuPDF geometry objects herein (points, rectangles, matrices) are represented by there **"like"** formats: a :data:`rect_like` *tuple* is used instead of a :ref:`Rect`, etc. The reasons for this are performance and memory considerations: * This code is written in C, where Python tuples can easily be generated. The geometry objects on the other hand are defined in Python source only. A conversion of each Python tuple into its corresponding geometry object would add significant -- and largely unnecessary -- execution time. -* A 4-tuple needs about 168 bytes, the corresponding :ref:`Rect` 472 bytes - almost three times the size. A "dict" dictionary for a text-heavy page contains 300+ bbox objects -- which thus require about 50 KB storage as 4-tuples versus 140 KB as :ref:`Rect` objects. A "rawdict" output for such a page will however contain **4 to 5 thousand** bboxes, so in this case we talk about 750 KB versus 11 MB. +* A 4-tuple needs about 168 bytes, the corresponding :ref:`Rect` 472 bytes - almost three times the size. A "dict" dictionary for a text-heavy page contains 300+ bbox objects -- which thus require about 50 KB storage as 4-tuples versus 140 KB as :ref:`Rect` objects. A "rawdict" output for such a page will however contain **4 to 5 thousand** bboxes, so in this case we talk about 750 KB versus 2 MB. Please also note, that only **bboxes** (= :data:`rect_like` 4-tuples) are returned, whereas a :ref:`TextPage` actually has the **full position information** -- in :ref:`Quad` format. The reason for this decision is again a memory consideration: a :data:`quad_like` needs 488 bytes (3 times the size of a :data:`rect_like`). Given the mentioned amounts of generated bboxes, returning :data:`quad_like` information would have a significant impact. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 2cf05d55c..0739faef6 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -213,21 +213,21 @@ We can also extract all text, images and other information of a page in many dif Use one of the following strings for *opt* to obtain different formats [#f2]_: -* *"text"*: (default) plain text with line breaks. No formatting, no text position details, no images. +* **"text"**: (default) plain text with line breaks. No formatting, no text position details, no images. -* *"blocks"*: generate a list of text blocks (= paragraphs). +* **"blocks"**: generate a list of text blocks (= paragraphs). -* *"words"*: generate a list of words (strings not containing spaces). +* **"words"**: generate a list of words (strings not containing spaces). -* *"html"*: creates a full visual version of the page including any images. This can be displayed with your internet browser. +* **"html"**: creates a full visual version of the page including any images. This can be displayed with your internet browser. -* *"dict"* / *"json"*: same information level as HTML, but provided as a Python dictionary or resp. JSON string. See :meth:`TextPage.extractDICT` resp. :meth:`TextPage.extractJSON` for details of its structure. +* **"dict"** / **"json"**: same information level as HTML, but provided as a Python dictionary or resp. JSON string. See :meth:`TextPage.extractDICT` for details of its structure. -* *"rawdict"* / *"rawjson"*: a super-set of :meth:`TextPage.extractDICT`. It additionally provides character detail information like XML. See :meth:`TextPage.extractRAWDICT` for details of its structure. +* **"rawdict"** / **"rawjson"**: a super-set of **"dict"** / **"json"**. It additionally provides character detail information like XML. See :meth:`TextPage.extractRAWDICT` for details of its structure. -* *"xhtml"*: text information level as the TEXT version but includes images. Can also be displayed by internet browsers. +* **"xhtml"**: text information level as the TEXT version but includes images. Can also be displayed by internet browsers. -* *"xml"*: contains no images, but full position and font information down to each single text character. Use an XML module to interpret. +* **"xml"**: contains no images, but full position and font information down to each single text character. Use an XML module to interpret. To give you an idea about the output of these alternatives, we did text example extracts. See :ref:`Appendix2`. diff --git a/docs/version.rst b/docs/version.rst index ed8d3df52..67e017f65 100644 --- a/docs/version.rst +++ b/docs/version.rst @@ -1,6 +1,6 @@ Covered Version -------------------- -This documentation covers PyMuPDF v1.18.13 features as of **2021-05-05 06:32:22**. +This documentation covers PyMuPDF v1.18.14 features as of **2021-06-01 08:11:38**. .. note:: The major and minor versions of **PyMuPDF** and **MuPDF** will always be the same. Only the third qualifier (patch level) may deviate from that of MuPDF. \ No newline at end of file diff --git a/docs/wheelnames.txt b/docs/wheelnames.txt index e4d318412..ef24e33ab 100644 --- a/docs/wheelnames.txt +++ b/docs/wheelnames.txt @@ -1,16 +1,20 @@ PyMuPDF-x.xx.xx-cp36-cp36m-macosx_10_9_x86_64.whl PyMuPDF-x.xx.xx-cp36-cp36m-manylinux2010_x86_64.whl -PyMuPDF-x.xx.xx-cp36-cp36m-win32.whl +PyMuPDF-x.xx.xx-cp36-cp36m-manylinux2014_aarch64.whl PyMuPDF-x.xx.xx-cp36-cp36m-win_amd64.whl +PyMuPDF-x.xx.xx-cp36-cp36m-win32.whl PyMuPDF-x.xx.xx-cp37-cp37m-macosx_10_9_x86_64.whl PyMuPDF-x.xx.xx-cp37-cp37m-manylinux2010_x86_64.whl -PyMuPDF-x.xx.xx-cp37-cp37m-win32.whl +PyMuPDF-x.xx.xx-cp37-cp37m-manylinux2014_aarch64.whl PyMuPDF-x.xx.xx-cp37-cp37m-win_amd64.whl +PyMuPDF-x.xx.xx-cp37-cp37m-win32.whl PyMuPDF-x.xx.xx-cp38-cp38-macosx_10_9_x86_64.whl PyMuPDF-x.xx.xx-cp38-cp38-manylinux2010_x86_64.whl -PyMuPDF-x.xx.xx-cp38-cp38-win32.whl +PyMuPDF-x.xx.xx-cp38-cp38-manylinux2014_aarch64.whl PyMuPDF-x.xx.xx-cp38-cp38-win_amd64.whl +PyMuPDF-x.xx.xx-cp38-cp38-win32.whl PyMuPDF-x.xx.xx-cp39-cp39-macosx_10_9_x86_64.whl PyMuPDF-x.xx.xx-cp39-cp39-manylinux2010_x86_64.whl -PyMuPDF-x.xx.xx-cp39-cp39-win32.whl +PyMuPDF-x.xx.xx-cp39-cp39-manylinux2014_aarch64.whl PyMuPDF-x.xx.xx-cp39-cp39-win_amd64.whl +PyMuPDF-x.xx.xx-cp39-cp39-win32.whl diff --git a/docs/znames.rst b/docs/znames.rst index 5c95c8beb..98b5b14f7 100644 --- a/docs/znames.rst +++ b/docs/znames.rst @@ -4,15 +4,40 @@ Deprecated Names ================ -This list maps deprecated names of methods and attributes to their new "snake_case" versions. +The original naming convention for methods and properties has been "camelCase". Since its creation around 2013, a tremendous increase of functionality has happened in PyMuPDF -- and with it a corresponding increase in classes, methods and properties. In too many cases, this has led to non-intuitive, illogical and ugly names, difficult to memorize or guess. -This is a major effort, that we only can muster in a step-wise fashion. We believe, we so far (v1.18.7) are done with :ref:`Annot`, :ref:`Document`, :ref:`Page` , :ref:`Shape` and :ref:`TextWriter`. +A few versions ago, I therefore decided to shift gears and switch to a "snake_cased" naming standard. +This was a major effort, which needed a step-wise approach. I think am done with it now (version 1.18.14). -Names of classes (camel case) and package-wide constants (the majority of which is upper case) remain untouched. -Deprecated names will remain available through MuPDF version 1.19.0 and probably be **removed in v1.20.0**. +The following list maps deprecated names to their new versions. For example, property ``pageCount`` became ``page_count`` in the :ref:`Document` class. There also are less obvious name changes, e.g. method ``getPNGdata`` was renamed to ``tobytes`` in the :ref:`Pixmap` class. -Deprecated names will no longer be separately documented. +Names of classes (camel case) and package-wide constants (the majority is upper case) remain untouched. -.. note:: The following list is automatically generated. One or two items refer to yet undocumented methods - please simply ignore them. +Old names will remain available as deprecated aliases through MuPDF version 1.19.0 and **be removed** in the version that follows it - probably version 1.20.0, but this depends on upstream decisions (MuPDF). + +Starting with version 1.19.0, we will issue deprecation warnings on ``sys.stderr`` like ``Deprecation: 'newPage' removed from class 'Document' after v1.19.0 - use 'new_page'.`` when aliased methods are being used. Using a deprecated property will not cause this type of warning. + +Starting immediately, all deprecated objects (methods and properties) will show a copy of the original's docstring, **prefixed** with the deprecation message, for example:: + + >>> print(fitz.Document.pageCount.__doc__) + *** Deprecated and removed in version following 1.19.0 - use 'page_count'. *** + Number of pages. + >>> print(fitz.Document.newPage.__doc__) + *** Deprecated and removed in version following 1.19.0 - use 'new_page'. *** + Create and return a new page object. + + Args: + pno: (int) insert before this page. Default: after last page. + width: (float) page width in points. Default: 595 (ISO A4 width). + height: (float) page height in points. Default 842 (ISO A4 height). + Returns: + A Page object. + + +There is a utility script `alias-changer.py `_ which can be used to do mass-renames in your scripts. It accepts either a single file or a folder as argument. If a folder is supplied, all its Python files and those of its subfolders are changed. Optionally, backups of the scripts can be taken. + +Deprecated names are not separately documented. The following list will help you find the documentation of the original. + +.. note:: This is automatically generated. One or two items refer to yet undocumented methods - please simply ignore them. .. include:: deprecated.rst diff --git a/fitz/__init__.py b/fitz/__init__.py index 649e07b6b..4bb7f534f 100644 --- a/fitz/__init__.py +++ b/fitz/__init__.py @@ -46,18 +46,18 @@ fitz.Document.get_page_labels = fitz.utils.get_page_labels fitz.Document.get_page_numbers = fitz.utils.get_page_numbers fitz.Document.get_page_pixmap = fitz.utils.get_page_pixmap -fitz.Document.get_page_text = fitz.utils.getPageText -fitz.Document.get_toc = fitz.utils.getToC +fitz.Document.get_page_text = fitz.utils.get_page_text +fitz.Document.get_toc = fitz.utils.get_toc fitz.Document.has_annots = fitz.utils.has_annots fitz.Document.has_links = fitz.utils.has_links -fitz.Document.insert_page = fitz.utils.insertPage -fitz.Document.new_page = fitz.utils.newPage +fitz.Document.insert_page = fitz.utils.insert_page +fitz.Document.new_page = fitz.utils.new_page fitz.Document.scrub = fitz.utils.scrub -fitz.Document.search_page_for = fitz.utils.searchPageFor +fitz.Document.search_page_for = fitz.utils.search_page_for fitz.Document.set_metadata = fitz.utils.set_metadata fitz.Document.set_ocmd = fitz.utils.set_ocmd fitz.Document.set_page_labels = fitz.utils.set_page_labels -fitz.Document.set_toc = fitz.utils.setToC +fitz.Document.set_toc = fitz.utils.set_toc fitz.Document.set_toc_item = fitz.utils.set_toc_item fitz.Document.tobytes = fitz.Document.write fitz.Document.subset_fonts = fitz.utils.subset_fonts @@ -69,7 +69,7 @@ # Page # ------------------------------------------------------------------------------ fitz.Page.apply_redactions = fitz.utils.apply_redactions -fitz.Page.delete_widget = fitz.utils.deleteWidget +fitz.Page.delete_widget = fitz.utils.delete_widget fitz.Page.draw_bezier = fitz.utils.draw_bezier fitz.Page.draw_circle = fitz.utils.draw_circle fitz.Page.draw_curve = fitz.utils.draw_curve @@ -83,20 +83,20 @@ fitz.Page.draw_zigzag = fitz.utils.draw_zigzag fitz.Page.get_links = fitz.utils.get_links fitz.Page.get_pixmap = fitz.utils.get_pixmap -fitz.Page.get_text = fitz.utils.getText +fitz.Page.get_text = fitz.utils.get_text fitz.Page.get_image_info = fitz.utils.get_image_info -fitz.Page.get_text_blocks = fitz.utils.getTextBlocks -fitz.Page.get_text_selection = fitz.utils.getTextSelection -fitz.Page.get_text_words = fitz.utils.getTextWords -fitz.Page.get_textbox = fitz.utils.getTextbox +fitz.Page.get_text_blocks = fitz.utils.get_text_blocks +fitz.Page.get_text_selection = fitz.utils.get_text_selection +fitz.Page.get_text_words = fitz.utils.get_text_words +fitz.Page.get_textbox = fitz.utils.get_textbox fitz.Page.insert_image = fitz.utils.insert_image -fitz.Page.insert_link = fitz.utils.insertLink +fitz.Page.insert_link = fitz.utils.insert_link fitz.Page.insert_text = fitz.utils.insert_text fitz.Page.insert_textbox = fitz.utils.insert_textbox fitz.Page.new_shape = lambda x: fitz.utils.Shape(x) -fitz.Page.search_for = fitz.utils.searchFor +fitz.Page.search_for = fitz.utils.search_for fitz.Page.show_pdf_page = fitz.utils.show_pdf_page -fitz.Page.update_link = fitz.utils.updateLink +fitz.Page.update_link = fitz.utils.update_link fitz.Page.write_text = fitz.utils.write_text fitz.Page.get_label = fitz.utils.get_label fitz.Page.get_image_rects = fitz.utils.get_image_rects @@ -104,21 +104,14 @@ # ------------------------------------------------------------------------ # Annot # ------------------------------------------------------------------------ -fitz.Annot.get_text = fitz.utils.getText -fitz.Annot.get_textbox = fitz.utils.getTextbox +fitz.Annot.get_text = fitz.utils.get_text +fitz.Annot.get_textbox = fitz.utils.get_textbox # ------------------------------------------------------------------------ -# Rect +# Rect and IRect # ------------------------------------------------------------------------ -fitz.Rect.getRectArea = fitz.utils.getRectArea -fitz.Rect.getArea = fitz.utils.getRectArea - -# ------------------------------------------------------------------------ -# IRect -# ------------------------------------------------------------------------ - -fitz.IRect.getRectArea = fitz.utils.getRectArea -fitz.IRect.getArea = fitz.utils.getRectArea +fitz.Rect.get_area = fitz.utils.get_area +fitz.IRect.get_area = fitz.utils.get_area # ------------------------------------------------------------------------ # TextWriter @@ -126,191 +119,300 @@ fitz.TextWriter.fill_textbox = fitz.utils.fill_textbox +class FitzDeprecation(DeprecationWarning): + pass + + def restore_aliases(): + import warnings + + warnings.filterwarnings( + "once", + category=FitzDeprecation, + ) + + def showthis(msg, cat, filename, lineno, file=None, line=None): + text = warnings.formatwarning(msg, cat, filename, lineno, line=line) + s = text.find("FitzDeprecation") + if s < 0: + print(text, file=sys.stderr) + return + text = text[s:].splitlines()[0][4:] + print(text, file=sys.stderr) + + warnings.showwarning = showthis + + def _alias(fitz_class, old, new): + fname = getattr(fitz_class, new) + r = str(fitz_class)[1:-1] + objname = " ".join(r.split()[:2]) + objname = objname.replace("fitz.fitz.", "") + objname = objname.replace("fitz.utils.", "") + if callable(fname): + + def deprecated_function(*args, **kw): + msg = "'%s' removed from %s after v1.19.0 - use '%s'." % ( + old, + objname, + new, + ) + if not VersionBind.startswith("1.18"): + warnings.warn(msg, category=FitzDeprecation) + return fname(*args, **kw) + + setattr(fitz_class, old, deprecated_function) + else: + if type(fname) is property: + setattr(fitz_class, old, property(fname.fget)) + else: + setattr(fitz_class, old, fname) + + eigen = getattr(fitz_class, old) + x = fname.__doc__ + if not x: + x = "" + try: + if callable(fname) or type(fname) is property: + eigen.__doc__ = ( + "*** Deprecated and removed in version following 1.19.0 - use '%s'. ***\n" + % new + + x + ) + except: + pass + # deprecated Document aliases - fitz.Document.chapterCount = fitz.Document.chapter_count - fitz.Document.chapterPageCount = fitz.Document.chapter_page_count - fitz.Document.convertToPDF = fitz.Document.convert_to_pdf - fitz.Document.copyPage = fitz.Document.copy_page - fitz.Document.deletePage = fitz.Document.delete_page - fitz.Document.deletePageRange = fitz.Document.delete_pages - fitz.Document.embeddedFileAdd = fitz.Document.embfile_add - fitz.Document.embeddedFileCount = fitz.Document.embfile_count - fitz.Document.embeddedFileDel = fitz.Document.embfile_del - fitz.Document.embeddedFileGet = fitz.Document.embfile_get - fitz.Document.embeddedFileInfo = fitz.Document.embfile_info - fitz.Document.embeddedFileNames = fitz.Document.embfile_names - fitz.Document.embeddedFileUpd = fitz.Document.embfile_upd - fitz.Document.extractFont = fitz.Document.extract_font - fitz.Document.extractImage = fitz.Document.extract_image - fitz.Document.findBookmark = fitz.Document.find_bookmark - fitz.Document.fullcopyPage = fitz.Document.fullcopy_page - fitz.Document.getCharWidths = fitz.Document.get_char_widths - fitz.Document.getOCGs = fitz.Document.get_ocgs - fitz.Document.getPageFontList = fitz.Document.get_page_fonts - fitz.Document.getPageImageList = fitz.Document.get_page_images - fitz.Document.getPagePixmap = fitz.Document.get_page_pixmap - fitz.Document.getPageText = fitz.Document.get_page_text - fitz.Document.getPageXObjectList = fitz.Document.get_page_xobjects - fitz.Document.getSigFlags = fitz.Document.get_sigflags - fitz.Document.getToC = fitz.Document.get_toc - fitz.Document.getXmlMetadata = fitz.Document.get_xml_metadata - fitz.Document.insertPage = fitz.Document.insert_page - fitz.Document.insertPDF = fitz.Document.insert_pdf - fitz.Document.isDirty = fitz.Document.is_dirty - fitz.Document.isFormPDF = fitz.Document.is_form_pdf - fitz.Document.isPDF = fitz.Document.is_pdf - fitz.Document.isReflowable = fitz.Document.is_reflowable - fitz.Document.isRepaired = fitz.Document.is_repaired - fitz.Document.isStream = fitz.Document.is_stream - fitz.Document.lastLocation = fitz.Document.last_location - fitz.Document.loadPage = fitz.Document.load_page - fitz.Document.makeBookmark = fitz.Document.make_bookmark - fitz.Document.metadataXML = fitz.Document.xref_xml_metadata - fitz.Document.movePage = fitz.Document.move_page - fitz.Document.needsPass = fitz.Document.needs_pass - fitz.Document.newPage = fitz.Document.new_page - fitz.Document.nextLocation = fitz.Document.next_location - fitz.Document.pageCount = fitz.Document.page_count - fitz.Document.pageCropBox = fitz.Document.page_cropbox - fitz.Document.pageXref = fitz.Document.page_xref - fitz.Document.PDFCatalog = fitz.Document.pdf_catalog - fitz.Document.PDFTrailer = fitz.Document.pdf_trailer - fitz.Document.previousLocation = fitz.Document.prev_location - fitz.Document.resolveLink = fitz.Document.resolve_link - fitz.Document.searchPageFor = fitz.Document.search_page_for - fitz.Document.setLanguage = fitz.Document.set_language - fitz.Document.setMetadata = fitz.Document.set_metadata - fitz.Document.setToC = fitz.Document.set_toc - fitz.Document.setXmlMetadata = fitz.Document.set_xml_metadata - fitz.Document.updateObject = fitz.Document.update_object - fitz.Document.updateStream = fitz.Document.update_stream - fitz.Document.xrefLength = fitz.Document.xref_length - fitz.Document.xrefObject = fitz.Document.xref_object - fitz.Document.xrefStream = fitz.Document.xref_stream - fitz.Document.xrefStreamRaw = fitz.Document.xref_stream_raw + _alias(fitz.Document, "chapterCount", "chapter_count") + _alias(fitz.Document, "chapterPageCount", "chapter_page_count") + _alias(fitz.Document, "convertToPDF", "convert_to_pdf") + _alias(fitz.Document, "copyPage", "copy_page") + _alias(fitz.Document, "deletePage", "delete_page") + _alias(fitz.Document, "deletePageRange", "delete_pages") + _alias(fitz.Document, "embeddedFileAdd", "embfile_add") + _alias(fitz.Document, "embeddedFileCount", "embfile_count") + _alias(fitz.Document, "embeddedFileDel", "embfile_del") + _alias(fitz.Document, "embeddedFileGet", "embfile_get") + _alias(fitz.Document, "embeddedFileInfo", "embfile_info") + _alias(fitz.Document, "embeddedFileNames", "embfile_names") + _alias(fitz.Document, "embeddedFileUpd", "embfile_upd") + _alias(fitz.Document, "extractFont", "extract_font") + _alias(fitz.Document, "extractImage", "extract_image") + _alias(fitz.Document, "findBookmark", "find_bookmark") + _alias(fitz.Document, "fullcopyPage", "fullcopy_page") + _alias(fitz.Document, "getCharWidths", "get_char_widths") + _alias(fitz.Document, "getOCGs", "get_ocgs") + _alias(fitz.Document, "getPageFontList", "get_page_fonts") + _alias(fitz.Document, "getPageImageList", "get_page_images") + _alias(fitz.Document, "getPagePixmap", "get_page_pixmap") + _alias(fitz.Document, "getPageText", "get_page_text") + _alias(fitz.Document, "getPageXObjectList", "get_page_xobjects") + _alias(fitz.Document, "getSigFlags", "get_sigflags") + _alias(fitz.Document, "getToC", "get_toc") + _alias(fitz.Document, "getXmlMetadata", "get_xml_metadata") + _alias(fitz.Document, "insertPage", "insert_page") + _alias(fitz.Document, "insertPDF", "insert_pdf") + _alias(fitz.Document, "isDirty", "is_dirty") + _alias(fitz.Document, "isFormPDF", "is_form_pdf") + _alias(fitz.Document, "isPDF", "is_pdf") + _alias(fitz.Document, "isReflowable", "is_reflowable") + _alias(fitz.Document, "isRepaired", "is_repaired") + _alias(fitz.Document, "isStream", "is_stream") + _alias(fitz.Document, "lastLocation", "last_location") + _alias(fitz.Document, "loadPage", "load_page") + _alias(fitz.Document, "makeBookmark", "make_bookmark") + _alias(fitz.Document, "metadataXML", "xref_xml_metadata") + _alias(fitz.Document, "movePage", "move_page") + _alias(fitz.Document, "needsPass", "needs_pass") + _alias(fitz.Document, "newPage", "new_page") + _alias(fitz.Document, "nextLocation", "next_location") + _alias(fitz.Document, "pageCount", "page_count") + _alias(fitz.Document, "pageCropBox", "page_cropbox") + _alias(fitz.Document, "pageXref", "page_xref") + _alias(fitz.Document, "PDFCatalog", "pdf_catalog") + _alias(fitz.Document, "PDFTrailer", "pdf_trailer") + _alias(fitz.Document, "previousLocation", "prev_location") + _alias(fitz.Document, "resolveLink", "resolve_link") + _alias(fitz.Document, "searchPageFor", "search_page_for") + _alias(fitz.Document, "setLanguage", "set_language") + _alias(fitz.Document, "setMetadata", "set_metadata") + _alias(fitz.Document, "setToC", "set_toc") + _alias(fitz.Document, "setXmlMetadata", "set_xml_metadata") + _alias(fitz.Document, "updateObject", "update_object") + _alias(fitz.Document, "updateStream", "update_stream") + _alias(fitz.Document, "xrefLength", "xref_length") + _alias(fitz.Document, "xrefObject", "xref_object") + _alias(fitz.Document, "xrefStream", "xref_stream") + _alias(fitz.Document, "xrefStreamRaw", "xref_stream_raw") # deprecated Page aliases - fitz.Page._isWrapped = fitz.Page.is_wrapped - fitz.Page.addCaretAnnot = fitz.Page.add_caret_annot - fitz.Page.addCircleAnnot = fitz.Page.add_circle_annot - fitz.Page.addFileAnnot = fitz.Page.add_file_annot - fitz.Page.addFreetextAnnot = fitz.Page.add_freetext_annot - fitz.Page.addHighlightAnnot = fitz.Page.add_highlight_annot - fitz.Page.addInkAnnot = fitz.Page.add_ink_annot - fitz.Page.addLineAnnot = fitz.Page.add_line_annot - fitz.Page.addPolygonAnnot = fitz.Page.add_polygon_annot - fitz.Page.addPolylineAnnot = fitz.Page.add_polyline_annot - fitz.Page.addRectAnnot = fitz.Page.add_rect_annot - fitz.Page.addRedactAnnot = fitz.Page.add_redact_annot - fitz.Page.addSquigglyAnnot = fitz.Page.add_squiggly_annot - fitz.Page.addStampAnnot = fitz.Page.add_stamp_annot - fitz.Page.addStrikeoutAnnot = fitz.Page.add_strikeout_annot - fitz.Page.addTextAnnot = fitz.Page.add_text_annot - fitz.Page.addUnderlineAnnot = fitz.Page.add_underline_annot - fitz.Page.addWidget = fitz.Page.add_widget - fitz.Page.cleanContents = fitz.Page.clean_contents - fitz.Page.CropBox = fitz.Page.cropbox - fitz.Page.CropBoxPosition = fitz.Page.cropbox_position - fitz.Page.deleteAnnot = fitz.Page.delete_annot - fitz.Page.deleteLink = fitz.Page.delete_link - fitz.Page.deleteWidget = fitz.Page.delete_widget - fitz.Page.derotationMatrix = fitz.Page.derotation_matrix - fitz.Page.drawBezier = fitz.Page.draw_bezier - fitz.Page.drawCircle = fitz.Page.draw_circle - fitz.Page.drawCurve = fitz.Page.draw_curve - fitz.Page.drawLine = fitz.Page.draw_line - fitz.Page.drawOval = fitz.Page.draw_oval - fitz.Page.drawPolyline = fitz.Page.draw_polyline - fitz.Page.drawQuad = fitz.Page.draw_quad - fitz.Page.drawRect = fitz.Page.draw_rect - fitz.Page.drawSector = fitz.Page.draw_sector - fitz.Page.drawSquiggle = fitz.Page.draw_squiggle - fitz.Page.drawZigzag = fitz.Page.draw_zigzag - fitz.Page.firstAnnot = fitz.Page.first_annot - fitz.Page.firstLink = fitz.Page.first_link - fitz.Page.firstWidget = fitz.Page.first_widget - fitz.Page.getContents = fitz.Page.get_contents - fitz.Page.getDisplayList = fitz.Page.get_displaylist - fitz.Page.getDrawings = fitz.Page.get_drawings - fitz.Page.getFontList = fitz.Page.get_fonts - fitz.Page.getImageBbox = fitz.Page.get_image_bbox - fitz.Page.getImageList = fitz.Page.get_images - fitz.Page.getLinks = fitz.Page.get_links - fitz.Page.getPixmap = fitz.Page.get_pixmap - fitz.Page.getSVGimage = fitz.Page.get_svg_image - fitz.Page.getText = fitz.Page.get_text - fitz.Page.getTextBlocks = fitz.Page.get_text_blocks - fitz.Page.getTextbox = fitz.Page.get_textbox - fitz.Page.getTextPage = fitz.Page.get_textpage - fitz.Page.getTextWords = fitz.Page.get_text_words - fitz.Page.insertFont = fitz.Page.insert_font - fitz.Page.insertImage = fitz.Page.insert_image - fitz.Page.insertLink = fitz.Page.insert_link - fitz.Page.insertText = fitz.Page.insert_text - fitz.Page.insertTextbox = fitz.Page.insert_textbox - fitz.Page.loadAnnot = fitz.Page.load_annot - fitz.Page.loadLinks = fitz.Page.load_links - fitz.Page.MediaBox = fitz.Page.mediabox - fitz.Page.MediaBoxSize = fitz.Page.mediabox_size - fitz.Page.newShape = fitz.Page.new_shape - fitz.Page.readContents = fitz.Page.read_contents - fitz.Page.rotationMatrix = fitz.Page.rotation_matrix - fitz.Page.searchFor = fitz.Page.search_for - fitz.Page.setCropBox = fitz.Page.set_cropbox - fitz.Page.setMediaBox = fitz.Page.set_mediabox - fitz.Page.setRotation = fitz.Page.set_rotation - fitz.Page.showPDFpage = fitz.Page.show_pdf_page - fitz.Page.transformationMatrix = fitz.Page.transformation_matrix - fitz.Page.updateLink = fitz.Page.update_link - fitz.Page.wrapContents = fitz.Page.wrap_contents - fitz.Page.writeText = fitz.Page.write_text + _alias(fitz.Page, "_isWrapped", "is_wrapped") + _alias(fitz.Page, "addCaretAnnot", "add_caret_annot") + _alias(fitz.Page, "addCircleAnnot", "add_circle_annot") + _alias(fitz.Page, "addFileAnnot", "add_file_annot") + _alias(fitz.Page, "addFreetextAnnot", "add_freetext_annot") + _alias(fitz.Page, "addHighlightAnnot", "add_highlight_annot") + _alias(fitz.Page, "addInkAnnot", "add_ink_annot") + _alias(fitz.Page, "addLineAnnot", "add_line_annot") + _alias(fitz.Page, "addPolygonAnnot", "add_polygon_annot") + _alias(fitz.Page, "addPolylineAnnot", "add_polyline_annot") + _alias(fitz.Page, "addRectAnnot", "add_rect_annot") + _alias(fitz.Page, "addRedactAnnot", "add_redact_annot") + _alias(fitz.Page, "addSquigglyAnnot", "add_squiggly_annot") + _alias(fitz.Page, "addStampAnnot", "add_stamp_annot") + _alias(fitz.Page, "addStrikeoutAnnot", "add_strikeout_annot") + _alias(fitz.Page, "addTextAnnot", "add_text_annot") + _alias(fitz.Page, "addUnderlineAnnot", "add_underline_annot") + _alias(fitz.Page, "addWidget", "add_widget") + _alias(fitz.Page, "cleanContents", "clean_contents") + _alias(fitz.Page, "CropBox", "cropbox") + _alias(fitz.Page, "CropBoxPosition", "cropbox_position") + _alias(fitz.Page, "deleteAnnot", "delete_annot") + _alias(fitz.Page, "deleteLink", "delete_link") + _alias(fitz.Page, "deleteWidget", "delete_widget") + _alias(fitz.Page, "derotationMatrix", "derotation_matrix") + _alias(fitz.Page, "drawBezier", "draw_bezier") + _alias(fitz.Page, "drawCircle", "draw_circle") + _alias(fitz.Page, "drawCurve", "draw_curve") + _alias(fitz.Page, "drawLine", "draw_line") + _alias(fitz.Page, "drawOval", "draw_oval") + _alias(fitz.Page, "drawPolyline", "draw_polyline") + _alias(fitz.Page, "drawQuad", "draw_quad") + _alias(fitz.Page, "drawRect", "draw_rect") + _alias(fitz.Page, "drawSector", "draw_sector") + _alias(fitz.Page, "drawSquiggle", "draw_squiggle") + _alias(fitz.Page, "drawZigzag", "draw_zigzag") + _alias(fitz.Page, "firstAnnot", "first_annot") + _alias(fitz.Page, "firstLink", "first_link") + _alias(fitz.Page, "firstWidget", "first_widget") + _alias(fitz.Page, "getContents", "get_contents") + _alias(fitz.Page, "getDisplayList", "get_displaylist") + _alias(fitz.Page, "getDrawings", "get_drawings") + _alias(fitz.Page, "getFontList", "get_fonts") + _alias(fitz.Page, "getImageBbox", "get_image_bbox") + _alias(fitz.Page, "getImageList", "get_images") + _alias(fitz.Page, "getLinks", "get_links") + _alias(fitz.Page, "getPixmap", "get_pixmap") + _alias(fitz.Page, "getSVGimage", "get_svg_image") + _alias(fitz.Page, "getText", "get_text") + _alias(fitz.Page, "getTextBlocks", "get_text_blocks") + _alias(fitz.Page, "getTextbox", "get_textbox") + _alias(fitz.Page, "getTextPage", "get_textpage") + _alias(fitz.Page, "getTextWords", "get_text_words") + _alias(fitz.Page, "insertFont", "insert_font") + _alias(fitz.Page, "insertImage", "insert_image") + _alias(fitz.Page, "insertLink", "insert_link") + _alias(fitz.Page, "insertText", "insert_text") + _alias(fitz.Page, "insertTextbox", "insert_textbox") + _alias(fitz.Page, "loadAnnot", "load_annot") + _alias(fitz.Page, "loadLinks", "load_links") + _alias(fitz.Page, "MediaBox", "mediabox") + _alias(fitz.Page, "MediaBoxSize", "mediabox_size") + _alias(fitz.Page, "newShape", "new_shape") + _alias(fitz.Page, "readContents", "read_contents") + _alias(fitz.Page, "rotationMatrix", "rotation_matrix") + _alias(fitz.Page, "searchFor", "search_for") + _alias(fitz.Page, "setCropBox", "set_cropbox") + _alias(fitz.Page, "setMediaBox", "set_mediabox") + _alias(fitz.Page, "setRotation", "set_rotation") + _alias(fitz.Page, "showPDFpage", "show_pdf_page") + _alias(fitz.Page, "transformationMatrix", "transformation_matrix") + _alias(fitz.Page, "updateLink", "update_link") + _alias(fitz.Page, "wrapContents", "wrap_contents") + _alias(fitz.Page, "writeText", "write_text") + + # deprecated Shape aliases + _alias(fitz.utils.Shape, "drawBezier", "draw_bezier") + _alias(fitz.utils.Shape, "drawCircle", "draw_circle") + _alias(fitz.utils.Shape, "drawCurve", "draw_curve") + _alias(fitz.utils.Shape, "drawLine", "draw_line") + _alias(fitz.utils.Shape, "drawOval", "draw_oval") + _alias(fitz.utils.Shape, "drawPolyline", "draw_polyline") + _alias(fitz.utils.Shape, "drawQuad", "draw_quad") + _alias(fitz.utils.Shape, "drawRect", "draw_rect") + _alias(fitz.utils.Shape, "drawSector", "draw_sector") + _alias(fitz.utils.Shape, "drawSquiggle", "draw_squiggle") + _alias(fitz.utils.Shape, "drawZigzag", "draw_zigzag") + _alias(fitz.utils.Shape, "insertText", "insert_text") + _alias(fitz.utils.Shape, "insertTextbox", "insert_textbox") # deprecated Annot aliases - fitz.Annot.getText = fitz.Annot.get_text - fitz.Annot.getTextbox = fitz.Annot.get_textbox - fitz.Annot.fileGet = fitz.Annot.get_file - fitz.Annot.fileUpd = fitz.Annot.update_file - fitz.Annot.getPixmap = fitz.Annot.get_pixmap - fitz.Annot.getTextPage = fitz.Annot.get_textpage - fitz.Annot.lineEnds = fitz.Annot.line_ends - fitz.Annot.setBlendMode = fitz.Annot.set_blendmode - fitz.Annot.setBorder = fitz.Annot.set_border - fitz.Annot.setColors = fitz.Annot.set_colors - fitz.Annot.setFlags = fitz.Annot.set_flags - fitz.Annot.setInfo = fitz.Annot.set_info - fitz.Annot.setLineEnds = fitz.Annot.set_line_ends - fitz.Annot.setName = fitz.Annot.set_name - fitz.Annot.setOpacity = fitz.Annot.set_opacity - fitz.Annot.setRect = fitz.Annot.set_rect - fitz.Annot.setOC = fitz.Annot.set_oc - fitz.Annot.soundGet = fitz.Annot.get_sound + _alias(fitz.Annot, "getText", "get_text") + _alias(fitz.Annot, "getTextbox", "get_textbox") + _alias(fitz.Annot, "fileGet", "get_file") + _alias(fitz.Annot, "fileUpd", "update_file") + _alias(fitz.Annot, "getPixmap", "get_pixmap") + _alias(fitz.Annot, "getTextPage", "get_textpage") + _alias(fitz.Annot, "lineEnds", "line_ends") + _alias(fitz.Annot, "setBlendMode", "set_blendmode") + _alias(fitz.Annot, "setBorder", "set_border") + _alias(fitz.Annot, "setColors", "set_colors") + _alias(fitz.Annot, "setFlags", "set_flags") + _alias(fitz.Annot, "setInfo", "set_info") + _alias(fitz.Annot, "setLineEnds", "set_line_ends") + _alias(fitz.Annot, "setName", "set_name") + _alias(fitz.Annot, "setOpacity", "set_opacity") + _alias(fitz.Annot, "setRect", "set_rect") + _alias(fitz.Annot, "setOC", "set_oc") + _alias(fitz.Annot, "soundGet", "get_sound") # deprecated TextWriter aliases - fitz.TextWriter.writeText = fitz.TextWriter.write_text - fitz.TextWriter.fillTextbox = fitz.TextWriter.fill_textbox + _alias(fitz.TextWriter, "writeText", "write_text") + _alias(fitz.TextWriter, "fillTextbox", "fill_textbox") # deprecated DisplayList aliases - fitz.DisplayList.getPixmap = fitz.DisplayList.get_pixmap - fitz.DisplayList.getTextPage = fitz.DisplayList.get_textpage + _alias(fitz.DisplayList, "getPixmap", "get_pixmap") + _alias(fitz.DisplayList, "getTextPage", "get_textpage") # deprecated Pixmap aliases - fitz.Pixmap.setAlpha = fitz.Pixmap.set_alpha - fitz.Pixmap.gammaWith = fitz.Pixmap.gamma_with - fitz.Pixmap.tintWith = fitz.Pixmap.tint_with - fitz.Pixmap.clearWith = fitz.Pixmap.clear_with - fitz.Pixmap.copyPixmap = fitz.Pixmap.copy - fitz.Pixmap.getImageData = fitz.Pixmap.tobytes - fitz.Pixmap.getPNGData = fitz.Pixmap.tobytes - fitz.Pixmap.writeImage = fitz.Pixmap.save - fitz.Pixmap.writePNG = fitz.Pixmap.save - fitz.Pixmap.pillowWrite = fitz.Pixmap.pil_save - fitz.Pixmap.pillowData = fitz.Pixmap.pil_tobytes - fitz.Pixmap.invertIRect = fitz.Pixmap.invert_irect - fitz.Pixmap.setPixel = fitz.Pixmap.set_pixel - fitz.Pixmap.setOrigin = fitz.Pixmap.set_origin - fitz.Pixmap.setRect = fitz.Pixmap.set_rect - fitz.Pixmap.setResolution = fitz.Pixmap.set_dpi + _alias(fitz.Pixmap, "setAlpha", "set_alpha") + _alias(fitz.Pixmap, "gammaWith", "gamma_with") + _alias(fitz.Pixmap, "tintWith", "tint_with") + _alias(fitz.Pixmap, "clearWith", "clear_with") + _alias(fitz.Pixmap, "copyPixmap", "copy") + _alias(fitz.Pixmap, "getImageData", "tobytes") + _alias(fitz.Pixmap, "getPNGData", "tobytes") + _alias(fitz.Pixmap, "getPNGdata", "tobytes") + _alias(fitz.Pixmap, "writeImage", "save") + _alias(fitz.Pixmap, "writePNG", "save") + _alias(fitz.Pixmap, "pillowWrite", "pil_save") + _alias(fitz.Pixmap, "pillowData", "pil_tobytes") + _alias(fitz.Pixmap, "invertIRect", "invert_irect") + _alias(fitz.Pixmap, "setPixel", "set_pixel") + _alias(fitz.Pixmap, "setOrigin", "set_origin") + _alias(fitz.Pixmap, "setRect", "set_rect") + _alias(fitz.Pixmap, "setResolution", "set_dpi") + + # deprecated geometry aliases + _alias(fitz.Rect, "getArea", "get_area") + _alias(fitz.IRect, "getArea", "get_area") + _alias(fitz.Rect, "getRectArea", "get_area") + _alias(fitz.IRect, "getRectArea", "get_area") + _alias(fitz.Rect, "includePoint", "include_point") + _alias(fitz.IRect, "includePoint", "include_point") + _alias(fitz.Rect, "includeRect", "include_rect") + _alias(fitz.IRect, "includeRect", "include_rect") + _alias(fitz.Rect, "isInfinite", "is_infinite") + _alias(fitz.IRect, "isInfinite", "is_infinite") + _alias(fitz.Rect, "isEmpty", "is_empty") + _alias(fitz.IRect, "isEmpty", "is_empty") + _alias(fitz.Quad, "isEmpty", "is_empty") + _alias(fitz.Quad, "isRectangular", "is_rectangular") + _alias(fitz.Quad, "isConvex", "is_convex") + _alias(fitz.Matrix, "isRectilinear", "is_rectilinear") + _alias(fitz.Matrix, "preRotate", "prerotate") + _alias(fitz.Matrix, "preScale", "prescale") + _alias(fitz.Matrix, "preShear", "preshear") + _alias(fitz.Matrix, "preTranslate", "pretranslate") + + # deprecated other aliases + _alias(fitz, "getPDFstr", "get_pdf_str") + _alias(fitz, "getPDFnow", "get_pdf_now") + _alias(fitz, "PaperSize", "paper_size") + _alias(fitz, "PaperRect", "paper_rect") + _alias(fitz, "paperSizes", "paper_sizes") + _alias(fitz, "ImageProperties", "image_properties") + _alias(fitz, "planishLine", "planish_line") + _alias(fitz, "getTextLength", "get_text_length") fitz.__doc__ = """ diff --git a/fitz/__main__.py b/fitz/__main__.py index ce37a0c6b..beed8d82d 100644 --- a/fitz/__main__.py +++ b/fitz/__main__.py @@ -50,7 +50,7 @@ def open_file(filename, password, show=False, pdf=True): if not doc.is_pdf and pdf is True: sys.exit("this command supports PDF files only") rc = -1 - if not doc.needsPass: + if not doc.needs_pass: return doc if password: rc = doc.authenticate(password) @@ -156,7 +156,7 @@ def show(args): % ( args.input, doc.page_count, - doc._getXrefLength() - 1, + doc.xref_length() - 1, size, flag, meta["format"], @@ -176,7 +176,7 @@ def show(args): print() if args.catalog: print(mycenter("PDF catalog")) - xref = doc.PDFCatalog() + xref = doc.pdf_catalog() print_xref(doc, xref) print() if args.metadata: @@ -185,7 +185,7 @@ def show(args): print() if args.xrefs: print(mycenter("object information")) - xrefl = get_list(args.xrefs, doc._getXrefLength(), what="xref") + xrefl = get_list(args.xrefs, doc.xref_length(), what="xref") for xref in xrefl: print_xref(doc, xref) print() @@ -296,7 +296,7 @@ def embedded_copy(args): for item in names: info = src.embfile_info(item) - buff = src.embeddedFileGet(item) + buff = src.embfile_get(item) doc.embfile_add( item, buff, @@ -326,7 +326,7 @@ def embedded_del(args): except ValueError: sys.exit("no such embedded file '%s'" % args.name) if not args.output or args.output == args.input: - doc.saveIncr() + doc.save_incr() else: doc.save(args.output, garbage=1) doc.close() @@ -336,7 +336,7 @@ def embedded_get(args): """Retrieve contents of an embedded file.""" doc = open_file(args.input, args.password, pdf=True) try: - stream = doc.embeddedFileGet(args.name) + stream = doc.embfile_get(args.name) d = doc.embfile_info(args.name) except ValueError: sys.exit("no such embedded file '%s'" % args.name) @@ -566,7 +566,7 @@ def main(): # 'clean' command # ------------------------------------------------------------------------- ps_clean = subps.add_parser( - "clean", description=mycenter("optimize PDF or create sub-PDF if pages given") + "clean", description=mycenter("optimize PDF, or create sub-PDF if pages given") ) ps_clean.add_argument("input", type=str, help="PDF filename") ps_clean.add_argument("output", type=str, help="output PDF filename") diff --git a/fitz/fitz.i b/fitz/fitz.i index a949b245a..fc5fd8099 100644 --- a/fitz/fitz.i +++ b/fitz/fitz.i @@ -69,8 +69,6 @@ CheckParent(self)%} #define THROWMSG(gctx, msg) fz_throw(gctx, FZ_ERROR_GENERIC, msg) #define ASSERT_PDF(cond) if (cond == NULL) fz_throw(gctx, FZ_ERROR_GENERIC, "not a PDF") #define INRANGE(v, low, high) ((low) <= v && v <= (high)) -#define MAX(a, b) ((a) < (b)) ? (b) : (a) -#define MIN(a, b) ((a) < (b)) ? (a) : (b) #define JM_PyErr_Clear if (PyErr_Occurred()) PyErr_Clear() @@ -967,7 +965,7 @@ struct Document if (!filespec) THROWMSG(gctx, "bad PDF: /EF object not found"); res = JM_BufferFromBytes(gctx, buffer); if (EXISTS(buffer) && !res) THROWMSG(gctx, "bad type: 'buffer'"); - if (res) + if (res && buffer != Py_None) { JM_update_stream(gctx, pdf, filespec, res, 1); // adjust /DL and /Size parameters @@ -1172,8 +1170,8 @@ struct Document filename=filename, ufilename=ufilename, desc=desc) - date = getPDFnow() - self.xref_set_key(xref, "Params/ModDate", getPDFstr(date)) + date = get_pdf_now() + self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) return xref def embfile_add(self, name: str, buffer: typing.ByteString, @@ -1204,10 +1202,10 @@ struct Document filename=filename, ufilename=ufilename, desc=desc) - date = getPDFnow() + date = get_pdf_now() self.xref_set_key(xref, "Type", "/EmbeddedFile") - self.xref_set_key(xref, "Params/CreationDate", getPDFstr(date)) - self.xref_set_key(xref, "Params/ModDate", getPDFstr(date)) + self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date)) + self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date)) return xref %} @@ -1928,14 +1926,14 @@ struct Document int fp = from_page, tp = to_page, sa = start_at; // normalize page numbers - fp = MAX(fp, 0); // -1 = first page - fp = MIN(fp, srcCount - 1); // but do not exceed last page + fp = Py_MAX(fp, 0); // -1 = first page + fp = Py_MIN(fp, srcCount - 1); // but do not exceed last page if (tp < 0) tp = srcCount - 1; // -1 = last page - tp = MIN(tp, srcCount - 1); // but do not exceed last page + tp = Py_MIN(tp, srcCount - 1); // but do not exceed last page if (sa < 0) sa = outCount; // -1 = behind last page - sa = MIN(sa, outCount); // but that is also the limit + sa = Py_MIN(sa, outCount); // but that is also the limit fz_try(gctx) { if (!pdfout || !pdfsrc) THROWMSG(gctx, "source or target not a PDF"); @@ -3161,13 +3159,18 @@ if not self.is_form_pdf: PyObject *_remove_toc_item(int xref) { // "remove" bookmark by letting it point to nowhere - pdf_obj *item = NULL; + pdf_obj *item = NULL, *color; + int i; pdf_document *pdf = pdf_specifics(gctx, (fz_document *) $self); fz_try(gctx) { item = pdf_new_indirect(gctx, pdf, xref, 0); pdf_dict_del(gctx, item, PDF_NAME(Dest)); pdf_dict_del(gctx, item, PDF_NAME(A)); - pdf_dict_put_text_string(gctx, item, PDF_NAME(Title), "<>"); + color = pdf_new_array(gctx, pdf, 3); + for (i=0; i < 3; i++) { + pdf_array_push_real(gctx, color, 0.8); + } + pdf_dict_put_drop(gctx, item, PDF_NAME(C), color); } fz_always(gctx) { pdf_drop_obj(gctx, item); @@ -3766,6 +3769,11 @@ if basestate: raise ValueError("document closed or encrypted") if not self.is_pdf: return () + if type(pno) is not int: + try: + pno = pno.number + except: + raise ValueError("need a Page or page number") val = self._getPageInfo(pno, 1) if full is False: return [v[:-1] for v in val] @@ -3779,6 +3787,11 @@ if basestate: raise ValueError("document closed or encrypted") if not self.is_pdf: return () + if type(pno) is not int: + try: + pno = pno.number + except: + raise ValueError("need a Page or page number") val = self._getPageInfo(pno, 2) if full is False: return [v[:-1] for v in val] @@ -3792,6 +3805,11 @@ if basestate: raise ValueError("document closed or encrypted") if not self.is_pdf: return () + if type(pno) is not int: + try: + pno = pno.number + except: + raise ValueError("need a Page or page number") val = self._getPageInfo(pno, 3) rc = [(v[0], v[1], v[2], Rect(v[3])) for v in val] return rc @@ -3858,16 +3876,17 @@ if basestate: while pno < 0: pno += page_count - if not pno in range(page_count): + if pno >= page_count: raise ValueError("bad page number(s)") # remove TOC bookmarks pointing to deleted page - old_toc = self.get_toc() - for i, item in enumerate(old_toc): + toc = self.get_toc() + ol_xrefs = self.get_outline_xrefs() + for i, item in enumerate(toc): if item[2] == pno + 1: - self.del_toc_item(i) + self._remove_toc_item(ol_xrefs[i]) - self._remove_links_to((pno,)) + self._remove_links_to(frozenset((pno,))) self._delete_page(pno) self._reset_page_refs() @@ -3909,6 +3928,8 @@ if basestate: raise ValueError("both arguments must be int") if f > t: f, t = t, f + if not f <= t < page_count: + raise ValueError("bad page number(s)") numbers = tuple(range(f, t + 1)) else: r = args[0] @@ -3917,15 +3938,19 @@ if basestate: numbers = tuple(r) numbers = list(map(int, set(numbers))) # ensure unique integers + if numbers == []: + print("nothing to delete") + return numbers.sort() if numbers[0] < 0 or numbers[-1] >= page_count: raise ValueError("bad page number(s)") - old_toc = self.get_toc() - for i, item in enumerate(old_toc): - if item[2] - 1 in numbers: # a deleted page number - self.del_toc_item(i) + frozen_numbers = frozenset(numbers) + toc = self.get_toc() + for i, xref in enumerate(self.get_outline_xrefs()): + if toc[i][2] - 1 in frozen_numbers: + self._remove_toc_item(xref) # remove target in PDF object - self._remove_links_to(numbers) + self._remove_links_to(frozen_numbers) for i in reversed(numbers): # delete pages, last to first self._delete_page(i) @@ -4016,6 +4041,31 @@ if basestate: raise IndexError("page not in document") return self.load_page(i) + + def __delitem__(self, i: AnyType)->None: + if not self.is_pdf: + raise ValueError("not a PDF") + if type(i) is int: + return self.delete_page(i) + if type(i) in (list, tuple, range): + return self.delete_pages(i) + if type(i) is not slice: + raise ValueError("bad argument type") + pc = self.page_count + start = i.start if i.start else 0 + stop = i.stop if i.stop else pc + step = i.step if i.step else 1 + while start < 0: + start += pc + if start >= pc: + raise ValueError("bad page number(s)") + while stop < 0: + stop += pc + if stop > pc: + raise ValueError("bad page number(s)") + return self.delete_pages(range(start, stop, step)) + + def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None)->"Page": """Return a generator iterator over a page range. @@ -5339,7 +5389,7 @@ def get_oc_items(self) -> list: For this, it must be exactly three connected lines, of which the first and the last one must be horizontal and line two - must be vertical. + must be vertical. Also, 'closePath' must be true. """ if not path["closePath"]: return False @@ -5352,6 +5402,12 @@ def get_oc_items(self) -> list: return False if p1.y != p2.y or p3.x != p4.x or p5.y != p6.y: return False + r = Rect(p1, p2).normalize() + r |= p3 + r |= p4 + r |= p5 + r |= p6 + path["rect"] = r return True def check_and_merge(this, prev): @@ -5419,27 +5475,34 @@ def get_oc_items(self) -> list: if item[0] == "m": p = Point(item[1]) * ctm current = p - path["rect"] = Rect(p, p) elif item[0] == "l": p2 = Point(item[1]) * ctm path["items"].append(("l", current, p2)) current = p2 - path["rect"] |= p2 elif item[0] == "c": p2 = Point(item[1]) * ctm p3 = Point(item[2]) * ctm p4 = Point(item[3]) * ctm path["items"].append(("c", current, p2, p3, p4)) current = p4 - path["rect"] |= p2 - path["rect"] |= p3 - path["rect"] |= p4 elif item == "closePath": path["closePath"] = True elif item in ("estroke", "efill", "eclip", "eclip-stroke"): if is_rectangle(path): path["items"] = [("re", path["rect"])] path["closePath"] = False + # make path rectangle for items + else: + for i, item in enumerate(path["items"]): + for j, p in enumerate(item[1:]): + if i == 0 and j == 0: + x0 = x1 = p.x + y0 = y1 = p.y + x0 = min(x0, p.x) + x1 = max(x1, p.x) + y0 = min(y0, p.y) + y1 = max(y1, p.y) + path["rect"] = Rect(x0, y0, x1, y1) try: # check if path is "stroke" duplicate of previous prev = paths.pop() # get previous path in list @@ -6064,7 +6127,14 @@ if not sanitize and not self.is_wrapped: // process stream --------------------------------- have_stream:; - fz_md5_buffer(gctx, imgbuf, digest); + fz_md5 state; + fz_md5_init(&state); + fz_md5_update(&state, imgbuf->data, imgbuf->len); + if (EXISTS(imask)) { + maskbuf = JM_BufferFromBytes(gctx, imask); + fz_md5_update(&state, maskbuf->data, maskbuf->len); + } + fz_md5_final(&state, digest); md5_py = PyBytes_FromStringAndSize(digest, 16); temp = PyDict_GetItem(digests, md5_py); if (temp) { @@ -6109,7 +6179,6 @@ if not sanitize and not self.is_wrapped: bpc = image->bpc; fz_colorspace *colorspace = image->colorspace; fz_image_resolution(image, &xres, &yres); - maskbuf = JM_BufferFromBytes(gctx, imask); mask = fz_new_image_from_buffer(gctx, maskbuf); zimg = fz_new_image_from_compressed_buffer(gctx, w, h, bpc, colorspace, xres, yres, 1, 0, NULL, @@ -6880,7 +6949,13 @@ if not self.colorspace or self.colorspace.n > 3: FITZEXCEPTION(set_alpha, !result) %pythonprepend set_alpha %{"""Set alpha channel to values contained in a byte array. -If omitted, set alphas to 255."""%} +If omitted, set alphas to 255. + +Args: + alphavalues: (bytes) with length (width * height) values in range(255). + premultiply: (bool, True) premultiply colors with alpha values. + opaque: (tuple) length colorspace.n, color value to set to opacity 0. +"""%} PyObject *set_alpha(PyObject *alphavalues=NULL, int premultiply=1, PyObject *opaque=NULL) { fz_buffer *res = NULL; @@ -7094,7 +7169,7 @@ def save(self, filename, output=None): def pil_save(self, *args, **kwargs): """Write to image file using Pillow. - Arguments are passed to Pillow's Image.save() method. + Args are passed to Pillow's Image.save method, see their documentation. Use instead of save when other output formats are desired. """ try: @@ -7123,8 +7198,8 @@ def pil_save(self, *args, **kwargs): def pil_tobytes(self, *args, **kwargs): """Convert to binary image stream using pillow. - Arguments are passed to Pillow's Image.save() method. - Use it instead of save when other output formats are needed. + Args are passed to Pillow's Image.save method, see their documentation. + Use instead of 'tobytes' when other output formats are needed. """ from io import BytesIO bytes_out = BytesIO() @@ -8429,29 +8504,29 @@ struct Annot //---------------------------------------------------------------- // annotation update appearance //---------------------------------------------------------------- - PyObject *_update_appearance(float opacity=-1, char *blend_mode=NULL, - PyObject *fill_color=NULL, - int rotate = -1) + PyObject *_update_appearance(float opacity=-1, + char *blend_mode=NULL, + PyObject *fill_color=NULL, + int rotate = -1) { pdf_annot *annot = (pdf_annot *) $self; int type = pdf_annot_type(gctx, annot); float fcol[4] = {1,1,1,1}; // std fill color: white - int nfcol = 0; // number of color components + int i, nfcol = 0; // number of color components JM_color_FromSequence(fill_color, &nfcol, fcol); fz_try(gctx) { - pdf_dirty_annot(gctx, annot); // enforce MuPDF /AP formatting - if (type == PDF_ANNOT_FREE_TEXT) { - if (EXISTS(fill_color)) { - pdf_set_annot_color(gctx, annot, nfcol, fcol); - } else { - pdf_dict_del(gctx, annot->obj, PDF_NAME(IC)); - } - } else { - if (EXISTS(fill_color)) { - pdf_set_annot_interior_color(gctx, annot, nfcol, fcol); - } else if (fill_color != Py_None) { - pdf_dict_del(gctx, annot->obj, PDF_NAME(IC)); - } + pdf_dirty_annot(gctx, annot); // enforce new /AP + // remove fill color from unsupported annots + // or if so requested + if (type != PDF_ANNOT_SQUARE + && type != PDF_ANNOT_CIRCLE + && type != PDF_ANNOT_LINE + && type != PDF_ANNOT_POLY_LINE + && type != PDF_ANNOT_POLYGON + || nfcol == 0 && fill_color != Py_None) { + pdf_dict_del(gctx, annot->obj, PDF_NAME(IC)); + } else if (nfcol > 0) { + pdf_set_annot_interior_color(gctx, annot, nfcol, fcol); } int insert_rot = (rotate >= 0) ? 1 : 0; @@ -8470,11 +8545,25 @@ struct Annot default: insert_rot = 0; } - if (insert_rot) + if (insert_rot) { pdf_dict_put_int(gctx, annot->obj, PDF_NAME(Rotate), rotate); + } + annot->needs_new_ap = 1; // re-create appearance stream - pdf_update_annot(gctx, annot); // update the annotation + pdf_update_annot(gctx, annot); // let MuPDF update + // insert fill color + if (type == PDF_ANNOT_FREE_TEXT) { + if (nfcol > 0) { + pdf_set_annot_color(gctx, annot, nfcol, fcol); + } + } else if (nfcol > 0) { + pdf_obj *col = pdf_new_array(gctx, annot->page->doc, nfcol); + for (i = 0; i < nfcol; i++) { + pdf_array_push_real(gctx, col, fcol[i]); + } + pdf_dict_put_drop(gctx,annot->obj, PDF_NAME(IC), col); + } } fz_catch(gctx) { PySys_WriteStderr("cannot update annot: '%s'\n", fz_caught_message(gctx)); @@ -8575,9 +8664,9 @@ struct Annot dt = self.border["dashes"] # get the dashes spec bwidth = self.border["width"] # get border line width stroke = self.colors["stroke"] # get the stroke color - if fill_color != None: + if fill_color != None: # change of fill color requested fill = fill_color - else: + else: # put in current annot value fill = self.colors["fill"] rect = None # self.rect # prevent MuPDF fiddling with it @@ -8612,9 +8701,8 @@ struct Annot fill_color=fill, rotate=rotate, ) - if not val: # something went wrong, skip the rest - return val - + if val == False: + raise ValueError("Error updating annotation.") bfill = color_string(fill, "f") bstroke = color_string(stroke, "s") @@ -9835,7 +9923,7 @@ struct TextPage { while i < items - 1: v1 = val[i] v2 = val[i + 1] - if v1.y1 != v2.y1 or (v1 & v2).isEmpty: + if v1.y1 != v2.y1 or (v1 & v2).is_empty: i += 1 continue # no overlap on same line val[i] = v1 | v2 # join rectangles @@ -10502,7 +10590,7 @@ struct TextWriter if morph: p = morph[0] * self.ictm - delta = Matrix(1, 1).preTranslate(p.x, p.y) + delta = Matrix(1, 1).pretranslate(p.x, p.y) matrix = ~delta * morph[1] * delta if morph or matrix: new_cont_lines.append("%g %g %g %g %g %g cm" % JM_TUPLE(matrix)) @@ -10660,14 +10748,77 @@ struct Font %pythonprepend glyph_advance %{"""Return the glyph width of a unicode (font size 1)."""%} - float glyph_advance(int chr, char *language=NULL, int script=0, int wmode=0) + PyObject *glyph_advance(int chr, char *language=NULL, int script=0, int wmode=0) { fz_font *font; fz_text_language lang = fz_text_language_from_string(language); int gid = fz_encode_character_with_fallback(gctx, (fz_font *) $self, chr, script, lang, &font); - return fz_advance_glyph(gctx, font, gid, wmode); + return PyFloat_FromDouble((double) fz_advance_glyph(gctx, font, gid, wmode)); + } + + + FITZEXCEPTION(text_length, !result) + %pythonprepend text_length + %{"""Return length of unicode 'text' under a fontsize."""%} + PyObject *text_length(PyObject *text, double fontsize=11, char *language=NULL, int script=0, int wmode=0) + { + fz_font *font=NULL, *thisfont = (fz_font *) $self; + fz_text_language lang = fz_text_language_from_string(language); + double rc = 0; + fz_try(gctx) { + if (!PyUnicode_Check(text) || PyUnicode_READY(text) != 0) { + THROWMSG(gctx, "bad type: text"); + } + Py_ssize_t i, len = PyUnicode_GET_LENGTH(text); + int kind = PyUnicode_KIND(text); + void *data = PyUnicode_DATA(text); + for (i = 0; i < len; i++) { + int c = PyUnicode_READ(kind, data, i); + int gid = fz_encode_character_with_fallback(gctx,thisfont, c, script, lang, &font); + rc += (double) fz_advance_glyph(gctx, font, gid, wmode); + } + } + fz_catch(gctx) { + PyErr_Clear(); + return NULL; + } + rc *= fontsize; + return PyFloat_FromDouble(rc); + } + + + FITZEXCEPTION(char_lengths, !result) + %pythonprepend char_lengths + %{"""Return tuple of char lengths of unicode 'text' under a fontsize."""%} + PyObject *char_lengths(PyObject *text, double fontsize=11, char *language=NULL, int script=0, int wmode=0) + { + fz_font *font, *thisfont = (fz_font *) $self; + fz_text_language lang = fz_text_language_from_string(language); + PyObject *rc = NULL; + fz_try(gctx) { + if (!PyUnicode_Check(text) || PyUnicode_READY(text) != 0) { + THROWMSG(gctx, "bad type: text"); + } + Py_ssize_t i, len = PyUnicode_GET_LENGTH(text); + int kind = PyUnicode_KIND(text); + void *data = PyUnicode_DATA(text); + rc = PyTuple_New(len); + for (i = 0; i < len; i++) { + int c = PyUnicode_READ(kind, data, i); + int gid = fz_encode_character_with_fallback(gctx,thisfont, c, script, lang, &font); + PyTuple_SET_ITEM(rc, i, + PyFloat_FromDouble(fontsize * (double) fz_advance_glyph(gctx, font, gid, wmode))); + } + } + fz_catch(gctx) { + PyErr_Clear(); + Py_CLEAR(rc); + return NULL; + } + return rc; } + %pythonprepend glyph_bbox %{"""Return the glyph bbox of a unicode (font size 1)."""%} %pythonappend glyph_bbox %{val = Rect(val)%} @@ -10798,10 +10949,6 @@ struct Font """Return the glyph name for a unicode.""" return unicode_to_glyph_name(ch) - def text_length(self, text, fontsize=11, wmode=0): - """Calculate the length of a string for this font.""" - return fontsize * sum([self.glyph_advance(ord(c), wmode=wmode) for c in text]) - def __repr__(self): return "Font('%s')" % self.name @@ -11317,28 +11464,38 @@ struct Tools } - float _measure_string(const char *text, const char *fontname, float fontsize, int encoding = 0) + FITZEXCEPTION(_measure_string, !result) + PyObject *_measure_string(const char *text, const char *fontname, double fontsize, int encoding = 0) { - fz_font *font = fz_new_base14_font(gctx, fontname); - float w = 0; - while (*text) - { - int c, g; - text += fz_chartorune(&c, text); - switch (encoding) + double w = 0; + fz_font *font = NULL; + fz_try(gctx) { + font = fz_new_base14_font(gctx, fontname); + while (*text) { - case PDF_SIMPLE_ENCODING_GREEK: - c = fz_iso8859_7_from_unicode(c); break; - case PDF_SIMPLE_ENCODING_CYRILLIC: - c = fz_windows_1251_from_unicode(c); break; - default: - c = fz_windows_1252_from_unicode(c); break; + int c, g; + text += fz_chartorune(&c, text); + switch (encoding) + { + case PDF_SIMPLE_ENCODING_GREEK: + c = fz_iso8859_7_from_unicode(c); break; + case PDF_SIMPLE_ENCODING_CYRILLIC: + c = fz_windows_1251_from_unicode(c); break; + default: + c = fz_windows_1252_from_unicode(c); break; + } + if (c < 0) c = 0xB7; + g = fz_encode_character(gctx, font, c); + w += (double) fz_advance_glyph(gctx, font, g, 0); } - if (c < 0) c = 0xB7; - g = fz_encode_character(gctx, font, c); - w += fz_advance_glyph(gctx, font, g, 0); } - return w * fontsize; + fz_always(gctx) { + fz_drop_font(gctx, font); + } + fz_catch(gctx) { + return NULL; + } + return PyFloat_FromDouble(w * fontsize); } PyObject * diff --git a/fitz/helper-fields.i b/fitz/helper-fields.i index 601d51880..17207fa00 100644 --- a/fitz/helper-fields.i +++ b/fitz/helper-fields.i @@ -870,8 +870,8 @@ class Widget(object): def _validate(self): """Validate the class entries. """ - if (self.rect.isInfinite - or self.rect.isEmpty + if (self.rect.is_infinite + or self.rect.is_empty ): raise ValueError("bad rect") diff --git a/fitz/helper-geo-py.i b/fitz/helper-geo-py.i index 78c1c7ed1..216a0f63a 100644 --- a/fitz/helper-geo-py.i +++ b/fitz/helper-geo-py.i @@ -52,7 +52,7 @@ class Matrix(object): self.a, self.b, self.c, self.d, self.e, self.f = dst[1] return 0 - def preTranslate(self, tx, ty): + def pretranslate(self, tx, ty): """Calculate pre translation and replace current matrix.""" tx = float(tx) ty = float(ty) @@ -60,7 +60,7 @@ class Matrix(object): self.f += tx * self.b + ty * self.d return self - def preScale(self, sx, sy): + def prescale(self, sx, sy): """Calculate pre scaling and replace current matrix.""" sx = float(sx) sy = float(sy) @@ -70,7 +70,7 @@ class Matrix(object): self.d *= sy return self - def preShear(self, h, v): + def preshear(self, h, v): """Calculate pre shearing and replace current matrix.""" h = float(h) v = float(v) @@ -81,7 +81,7 @@ class Matrix(object): self.d += h * b return self - def preRotate(self, theta): + def prerotate(self, theta): """Calculate pre rotation and replace current matrix.""" theta = float(theta) while theta < 0: theta += 360 @@ -218,7 +218,7 @@ class Matrix(object): norm = __abs__ @property - def isRectilinear(self): + def is_rectilinear(self): """True if rectangles are mapped to rectangles.""" return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \ (abs(self.a) < EPSILON and abs(self.d) < EPSILON); @@ -239,10 +239,10 @@ class IdentityMatrix(Matrix): def checkargs(*args): raise NotImplementedError("Identity is readonly") - preRotate = checkargs - preShear = checkargs - preScale = checkargs - preTranslate = checkargs + prerotate = checkargs + preshear = checkargs + prescale = checkargs + pretranslate = checkargs concat = checkargs invert = checkargs @@ -480,12 +480,12 @@ class Rect(object): return self @property - def isEmpty(self): + def is_empty(self): """True if rectangle area is empty.""" return self.x0 == self.x1 or self.y0 == self.y1 @property - def isInfinite(self): + def is_infinite(self): """True if rectangle is infinite.""" return self.x0 > self.x1 or self.y0 > self.y1 @@ -535,14 +535,14 @@ class Rect(object): width = property(lambda self: abs(self.x1 - self.x0)) height = property(lambda self: abs(self.y1 - self.y0)) - def includePoint(self, p): + def include_point(self, p): """Extend to include point-like p.""" if len(p) != 2: raise ValueError("bad Point: sequ. length") self.x0, self.y0, self.x1, self.y1 = TOOLS._include_point_in_rect(self, p) return self - def includeRect(self, r): + def include_rect(self, r): """Extend to include rect-like r.""" if len(r) != 4: raise ValueError("bad Rect: sequ. length") @@ -604,7 +604,7 @@ class Rect(object): return len(rect) == 4 and bool(self - rect) is False def __abs__(self): - if self.isEmpty or self.isInfinite: + if self.is_empty or self.is_infinite: return 0.0 return (self.x1 - self.x0) * (self.y1 - self.y0) @@ -652,9 +652,9 @@ class Rect(object): l = len(x) r = Rect(self).normalize() if l == 4: - if r.isEmpty: return False + if r.is_empty: return False xr = Rect(x).normalize() - if xr.isEmpty: return True + if xr.is_empty: return True if r.x0 <= xr.x0 and r.y0 <= xr.y0 and \ r.x1 >= xr.x1 and r.y1 >= xr.y1: return True @@ -664,7 +664,8 @@ class Rect(object): r.y0 <= x[1] <= r.y1: return True return False - return False + msg = "bad type or sequence: '%s'" % repr(x) + raise ValueError(msg) def __or__(self, x): if not hasattr(x, "__len__"): @@ -672,9 +673,9 @@ class Rect(object): r = Rect(self) if len(x) == 2: - return r.includePoint(x) + return r.include_point(x) if len(x) == 4: - return r.includeRect(x) + return r.include_rect(x) raise ValueError("bad type op 2") def __and__(self, x): @@ -688,10 +689,10 @@ class Rect(object): def intersects(self, x): """Check if intersection with rectangle x is not empty.""" r1 = Rect(x) - if self.isEmpty or self.isInfinite or r1.isEmpty or r1.isInfinite: + if self.is_empty or self.is_infinite or r1.is_empty or r1.is_infinite: return False r = Rect(self) - if r.intersect(r1).isEmpty: + if r.intersect(r1).is_empty: return False return True @@ -721,17 +722,20 @@ class IRect(Rect): def __repr__(self): return "IRect" + str(tuple(self)) - def includePoint(self, p): + def include_point(self, p): """Extend rectangle to include point p.""" - return Rect.includePoint(self, p).round() + rect = self.rect.include_point(p) + return rect.irect - def includeRect(self, r): + def include_rect(self, r): """Extend rectangle to include rectangle r.""" - return Rect.includeRect(self, r).round() + rect = self.rect.include_rect(r) + return rect.irect def intersect(self, r): """Restrict rectangle to intersection with rectangle r.""" - return Rect.intersect(self, r).round() + rect = self.rect.intersect(r) + return rect.irect def __setitem__(self, i, v): v = int(v) @@ -793,7 +797,7 @@ class Quad(object): raise ValueError("bad Quad constructor") @property - def isRectangular(self)->bool: + def is_rectangular(self)->bool: """Check if quad is rectangular. Notes: @@ -819,53 +823,33 @@ class Quad(object): @property - def isConvex(self)->bool: + def is_convex(self)->bool: """Check if quad is convex and not degenerate. Notes: - For convexity, every line connecting two points of the quad must be - inside the quad. This is equivalent to that every corner encloses - an angle with 0 < angle < 180 degrees. - Excluding the "degenerate" case (all corners on same line), - it suffices to check that the sines of three angles are > 0. + Check that for the two diagonals, the other two corners are not + on the same side of the diagonal. Returns: True or False. """ - count = 0 - sine = TOOLS._sine_between(self.ul, self.ur, self.lr) - if sine > 0: - count += 1 - elif sine < 0: + m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis + p1 = self.ll * m # transform the + p2 = self.ur * m # other two points + if p1.y * p2.y > 0: return False - - sine = TOOLS._sine_between(self.ur, self.lr, self.ll) - if sine > 0: - count += 1 - elif sine < 0: - return False - - sine = TOOLS._sine_between(self.lr, self.ll, self.ul) - if sine > 0: - count += 1 - elif sine < 0: + m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis + p1 = self.lr * m # tranform the + p2 = self.ul * m # remaining points + if p1.y * p2.y > 0: return False + return True - sine = TOOLS._sine_between(self.ll, self.ul, self.ur) - if sine > 0: - count += 1 - elif sine < 0: - return False - - if count >= 2: - return True - - return False width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr))) height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr))) @property - def isEmpty(self): + def is_empty(self): """Check whether all quad corners are on the same line. This is the case if width or height is zero. @@ -893,9 +877,9 @@ class Quad(object): return False if CheckRect(x): r = Rect(x) - if r.isInfinite: + if r.is_infinite: return False - if r.isEmpty: + if r.is_empty: return True return TOOLS._point_in_quad(x[:2], self) and TOOLS._point_in_quad(x[2:], self) if CheckQuad(x): @@ -931,10 +915,10 @@ class Quad(object): return Quad(-self.ul, -self.ur, -self.ll, -self.lr) def __bool__(self): - return not self.isEmpty + return not self.is_empty def __nonzero__(self): - return not self.isEmpty + return not self.is_empty def __eq__(self, quad): if not hasattr(quad, "__len__"): @@ -947,7 +931,7 @@ class Quad(object): ) def __abs__(self): - if self.isEmpty: + if self.is_empty: return 0.0 return abs(self.ul - self.ur) * abs(self.ul - self.ll) @@ -957,7 +941,7 @@ class Quad(object): Return a new quad.""" - delta = Matrix(1, 1).preTranslate(p.x, p.y) + delta = Matrix(1, 1).pretranslate(p.x, p.y) q = self * ~delta * m * delta return q diff --git a/fitz/helper-other.i b/fitz/helper-other.i index b3af17bf1..b3c9669cd 100644 --- a/fitz/helper-other.i +++ b/fitz/helper-other.i @@ -1079,13 +1079,13 @@ calc_image_matrix(int width, int height, PyObject *tr, int rotate, int keep) w = trw; h = trh; if (keep) { - large = (float) MAX(width, height); + large = (float) Py_MAX(width, height); fw = (float) width / large; fh = (float) height / large; } else { fw = fh = 1; } - small = MIN(fw, fh); + small = Py_MIN(fw, fh); if (rotate != 0 && rotate != 180) { f = fw; fw = fh; diff --git a/fitz/helper-python.i b/fitz/helper-python.i index 3ca516c0d..445bbbeca 100644 --- a/fitz/helper-python.i +++ b/fitz/helper-python.i @@ -111,13 +111,13 @@ annot_skel = { } -def getTextlength(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float: - """Calculate length of a string for a given built-in font. +def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float: + """Calculate length of a string for a built-in font. Args: fontname: name of the font. - fontsize: size of font in points. - encoding: encoding to use (0=Latin, 1=Greek, 2=Cyrillic). + fontsize: font size points. + encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic. Returns: (float) length of text. """ @@ -745,7 +745,7 @@ class linkDest(object): # ------------------------------------------------------------------------------- # "Now" timestamp in PDF Format # ------------------------------------------------------------------------------- -def getPDFnow() -> str: +def get_pdf_now() -> str: import time tz = "%s'%s'" % ( @@ -762,7 +762,7 @@ def getPDFnow() -> str: return tstamp -def getPDFstr(s: str) -> str: +def get_pdf_str(s: str) -> str: """ Return a PDF string depending on its coding. Notes: @@ -852,61 +852,64 @@ def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, o return "[<" + otxt + ">]" -""" -Information taken from the following web sites: -www.din-formate.de -www.din-formate.info/amerikanische-formate.html -www.directtools.de/wissen/normen/iso.htm -""" -paperSizes = { # known paper formats @ 72 dpi - "a0": (2384, 3370), - "a1": (1684, 2384), - "a10": (74, 105), - "a2": (1191, 1684), - "a3": (842, 1191), - "a4": (595, 842), - "a5": (420, 595), - "a6": (298, 420), - "a7": (210, 298), - "a8": (147, 210), - "a9": (105, 147), - "b0": (2835, 4008), - "b1": (2004, 2835), - "b10": (88, 125), - "b2": (1417, 2004), - "b3": (1001, 1417), - "b4": (709, 1001), - "b5": (499, 709), - "b6": (354, 499), - "b7": (249, 354), - "b8": (176, 249), - "b9": (125, 176), - "c0": (2599, 3677), - "c1": (1837, 2599), - "c10": (79, 113), - "c2": (1298, 1837), - "c3": (918, 1298), - "c4": (649, 918), - "c5": (459, 649), - "c6": (323, 459), - "c7": (230, 323), - "c8": (162, 230), - "c9": (113, 162), - "card-4x6": (288, 432), - "card-5x7": (360, 504), - "commercial": (297, 684), - "executive": (522, 756), - "invoice": (396, 612), - "ledger": (792, 1224), - "legal": (612, 1008), - "legal-13": (612, 936), - "letter": (612, 792), - "monarch": (279, 540), - "tabloid-extra": (864, 1296), -} - +def paper_sizes(): + """Known paper formats @ 72 dpi as a dictionary. Key is the format string + like "a4" for ISO-A4. Value is the tuple (width, height). -def PaperSize(s: str) -> tuple: + Information taken from the following web sites: + www.din-formate.de + www.din-formate.info/amerikanische-formate.html + www.directtools.de/wissen/normen/iso.htm + """ + return { + "a0": (2384, 3370), + "a1": (1684, 2384), + "a10": (74, 105), + "a2": (1191, 1684), + "a3": (842, 1191), + "a4": (595, 842), + "a5": (420, 595), + "a6": (298, 420), + "a7": (210, 298), + "a8": (147, 210), + "a9": (105, 147), + "b0": (2835, 4008), + "b1": (2004, 2835), + "b10": (88, 125), + "b2": (1417, 2004), + "b3": (1001, 1417), + "b4": (709, 1001), + "b5": (499, 709), + "b6": (354, 499), + "b7": (249, 354), + "b8": (176, 249), + "b9": (125, 176), + "c0": (2599, 3677), + "c1": (1837, 2599), + "c10": (79, 113), + "c2": (1298, 1837), + "c3": (918, 1298), + "c4": (649, 918), + "c5": (459, 649), + "c6": (323, 459), + "c7": (230, 323), + "c8": (162, 230), + "c9": (113, 162), + "card-4x6": (288, 432), + "card-5x7": (360, 504), + "commercial": (297, 684), + "executive": (522, 756), + "invoice": (396, 612), + "ledger": (792, 1224), + "legal": (612, 1008), + "legal-13": (612, 936), + "letter": (612, 792), + "monarch": (279, 540), + "tabloid-extra": (864, 1296), + } + + +def paper_size(s: str) -> tuple: """Return a tuple (width, height) for a given paper format string. Notes: @@ -920,16 +923,16 @@ def PaperSize(s: str) -> tuple: size = size[:-2] if size.endswith("-p"): size = size[:-2] - rc = paperSizes.get(size, (-1, -1)) + rc = paper_sizes().get(size, (-1, -1)) if f == "p": return rc return (rc[1], rc[0]) -def PaperRect(s: str) -> Rect: +def paper_rect(s: str) -> Rect: """Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked. """ - width, height = PaperSize(s) + width, height = paper_size(s) return Rect(0.0, 0.0, width, height) @@ -980,11 +983,11 @@ def CheckRect(r: typing.Any) -> bool: r = Rect(r) except: return False - return not (r.isEmpty or r.isInfinite) + return not (r.is_empty or r.is_infinite) def CheckQuad(q: typing.Any) -> bool: - """Check whether an object is convex, not empty quad-like. + """Check whether an object is convex, not empty quad-like. It must be a sequence of 4 number pairs. """ @@ -992,7 +995,7 @@ def CheckQuad(q: typing.Any) -> bool: q0 = Quad(q) except: return False - return q0.isConvex + return q0.is_convex def CheckMarkerArg(quads: typing.Any) -> tuple: @@ -1054,13 +1057,14 @@ def DUMMY(*args, **kw): return -def planishLine(p1: point_like, p2: point_like) -> Matrix: - """Return matrix which flattens out the line from p1 to p2. +def planish_line(p1: point_like, p2: point_like) -> Matrix: + """Compute matrix which maps line from p1 to p2 to the x-axis, such that it + maintains its length and p1 * matrix = Point(0, 0). Args: p1, p2: point_like Returns: - Matrix which maps p1 to Point(0,0) and p2 to a point on the x axis at + Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at the same distance to Point(0,0). Will always combine a rotation and a transformation. """ @@ -1069,7 +1073,7 @@ def planishLine(p1: point_like, p2: point_like) -> Matrix: return Matrix(TOOLS._hor_matrix(p1, p2)) -def ImageProperties(img: typing.ByteString) -> dict: +def image_properties(img: typing.ByteString) -> dict: """ Return basic properties of an image. Args: @@ -1160,40 +1164,6 @@ def ConversionTrailer(i: str): return r -def DerotateRect(cropbox: rect_like, rect: rect_like, deg: float) -> Rect: - """Calculate the non-rotated rect version. - - Args: - cropbox: the page's /CropBox - rect: rectangle - deg: the page's /Rotate value - Returns: - Rectangle in original (/CropBox) coordinates - """ - while deg < 0: - deg += 360 - while deg >= 360: - deg -= 360 - if deg % 90 > 0: - deg = 0 - if deg == 0: # no rotation: no-op - return rect - points = [] # store the new rect points here - for p in rect.quad: # run through the rect's quad points - if deg == 90: - q = (p.y, cropbox.height - p.x) - elif deg == 270: - q = (cropbox.width - p.y, p.x) - else: - q = (cropbox.width - p.x, cropbox.height - p.y) - points.append(q) - - r = Rect(points[0], points[0]) - for p in points[1:]: - r |= p - return r - - def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list: """Return rectangles of text lines between two points. @@ -1219,7 +1189,7 @@ def get_highlight_selection(page, start: point_like =None, stop: point_like =Non stop = clip.br clip.y0 = start.y clip.y1 = stop.y - if clip.isEmpty or clip.isInfinite: + if clip.is_empty or clip.is_infinite: return [] # extract text of page, clip only, no images, expand ligatures @@ -1241,7 +1211,7 @@ def get_highlight_selection(page, start: point_like =None, stop: point_like =Non bboxf = lines.pop(0) if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough? r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle - if not (r.isEmpty or r.isInfinite): + if not (r.is_empty or r.is_infinite): lines.insert(0, r) # insert again if not empty else: lines.insert(0, bboxf) # insert again @@ -1253,7 +1223,7 @@ def get_highlight_selection(page, start: point_like =None, stop: point_like =Non bboxl = lines.pop() if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough? r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle - if not (r.isEmpty or r.isInfinite): + if not (r.is_empty or r.is_infinite): lines.append(r) # append if not empty else: lines.append(bboxl) # append again @@ -1330,7 +1300,7 @@ def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> lis PyMuPDF Rect objects of equal sizes. """ rect = Rect(rect) # ensure this is a Rect - if rect.isEmpty or rect.isInfinite: + if rect.is_empty or rect.is_infinite: raise ValueError("rect must be finite and not empty") tl = rect.tl diff --git a/fitz/helper-select.i b/fitz/helper-select.i index c9b846630..0402dc66d 100644 --- a/fitz/helper-select.i +++ b/fitz/helper-select.i @@ -323,28 +323,29 @@ void retainpages(fz_context *ctx, globals *glo, PyObject *liste) void remove_dest_range(fz_context *ctx, pdf_document *pdf, PyObject *numbers) { - int i, j, pno, len, pagecount = pdf_count_pages(ctx, pdf); - PyObject *n1 = NULL; fz_try(ctx) { + int i, j, pno, len, pagecount = pdf_count_pages(ctx, pdf); + PyObject *n1 = NULL; + pdf_obj *target, *annots, *pageref, *o, *action, *dest; for (i = 0; i < pagecount; i++) { n1 = PyLong_FromLong((long) i); - if (PySequence_Contains(numbers, n1)) { + if (PySet_Contains(numbers, n1)) { Py_DECREF(n1); continue; } Py_DECREF(n1); - pdf_obj *pageref = pdf_lookup_page_obj(ctx, pdf, i); - pdf_obj *annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots)); - pdf_obj *target; + pageref = pdf_lookup_page_obj(ctx, pdf, i); + annots = pdf_dict_get(ctx, pageref, PDF_NAME(Annots)); if (!annots) continue; len = pdf_array_len(ctx, annots); for (j = len - 1; j >= 0; j -= 1) { - pdf_obj *o = pdf_array_get(ctx, annots, j); - if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME(Subtype)), PDF_NAME(Link))) + o = pdf_array_get(ctx, annots, j); + if (!pdf_name_eq(ctx, pdf_dict_get(ctx, o, PDF_NAME(Subtype)), PDF_NAME(Link))) { continue; - pdf_obj *action = pdf_dict_get(ctx, o, PDF_NAME(A)); - pdf_obj *dest = pdf_dict_get(ctx, o, PDF_NAME(Dest)); + } + action = pdf_dict_get(ctx, o, PDF_NAME(A)); + dest = pdf_dict_get(ctx, o, PDF_NAME(Dest)); if (action) { if (!pdf_name_eq(ctx, pdf_dict_get(ctx, action, PDF_NAME(S)), PDF_NAME(GoTo))) @@ -361,17 +362,18 @@ void remove_dest_range(fz_context *ctx, pdf_document *pdf, PyObject *numbers) pdf_to_text_string(ctx, dest), NULL, NULL); } - if (pno < 0) { // page lookup did not work + if (pno < 0) { // page number lookup did not work continue; } n1 = PyLong_FromLong((long) pno); - if (PySequence_Contains(numbers, n1)) { + if (PySet_Contains(numbers, n1)) { pdf_array_delete(ctx, annots, j); } Py_DECREF(n1); } } } + fz_catch(ctx) { fz_rethrow(ctx); } diff --git a/fitz/utils.py b/fitz/utils.py index fb06b4e41..0125cf117 100644 --- a/fitz/utils.py +++ b/fitz/utils.py @@ -75,7 +75,7 @@ def write_text(page: Page, **kwargs) -> None: writers = (writers,) clip = writers[0].text_rect textdoc = Document() - tpage = textdoc.newPage(width=page.rect.width, height=page.rect.height) + tpage = textdoc.new_page(width=page.rect.width, height=page.rect.height) for writer in writers: clip |= writer.text_rect writer.write_text(tpage, opacity=opacity, color=color) @@ -166,7 +166,7 @@ def calc_matrix(sr, tr, keep=True, rotate=0): raise ValueError("not a PDF") rect = page.rect & rect # intersect with page rectangle - if rect.isEmpty or rect.isInfinite: + if rect.is_empty or rect.is_infinite: raise ValueError("rect must be finite and not empty") if reuse_xref > 0: @@ -181,7 +181,7 @@ def calc_matrix(sr, tr, keep=True, rotate=0): tar_rect = rect * ~page.transformation_matrix # target rect in PDF coordinates src_rect = src_page.rect if not clip else src_page.rect & clip # source rect - if src_rect.isEmpty or src_rect.isInfinite: + if src_rect.is_empty or src_rect.is_infinite: raise ValueError("clip must be finite and not empty") src_rect = src_rect * ~src_page.transformation_matrix # ... in PDF coord @@ -325,7 +325,7 @@ def insert_image(page, rect, **kwargs): raise ValueError("bad rotate value") r = Rect(rect) - if r.isEmpty or r.isInfinite: + if r.is_empty or r.is_infinite: raise ValueError("rect must be finite and not empty") clip = r * ~page.transformation_matrix @@ -366,7 +366,7 @@ def insert_image(page, rect, **kwargs): return xref -def searchFor(*args, **kwargs) -> list: +def search_for(*args, **kwargs) -> list: """Search for a string on a page. Args: @@ -393,7 +393,7 @@ def searchFor(*args, **kwargs) -> list: return rlist -def searchPageFor( +def search_page_for( doc: Document, pno: int, text: str, @@ -414,7 +414,7 @@ def searchPageFor( a list of rectangles or quads, each containing an occurrence. """ - return doc[pno].searchFor( + return doc[pno].search_for( text, quads=quads, clip=clip, @@ -422,7 +422,7 @@ def searchPageFor( ) -def getTextBlocks( +def get_text_blocks( page: Page, clip: rect_like = None, flags: OptInt = None, @@ -446,7 +446,7 @@ def getTextBlocks( return blocks -def getTextWords( +def get_text_words( page: Page, clip: rect_like = None, flags: OptInt = None, @@ -465,17 +465,17 @@ def getTextWords( return words -def getTextbox( +def get_textbox( page: Page, rect: rect_like, ) -> str: - rc = page.getText("text", clip=rect, flags=0) + rc = page.get_text("text", clip=rect, flags=0) if rc.endswith("\n"): rc = rc[:-1] return rc -def getTextSelection( +def get_text_selection( page: Page, p1: point_like, p2: point_like, @@ -562,7 +562,7 @@ def get_image_rects(page: Page, name, transform=False) -> list: return bboxes -def getText( +def get_text( page: Page, option: str = "text", clip: rect_like = None, @@ -578,7 +578,7 @@ def getText( flags: bit switches to e.g. exclude images or decompose ligatures Returns: - the output of methods getTextWords / getTextBlocks or TextPage + the output of methods get_text_words / get_text_blocks or TextPage methods extractText, extractHTML, extractDICT, extractJSON, extractRAWDICT, extractXHTML or etractXML respectively. Default and misspelling choice is "text". @@ -604,9 +604,9 @@ def getText( flags += TEXT_PRESERVE_IMAGES if option == "words": - return getTextWords(page, clip=clip, flags=flags) + return get_text_words(page, clip=clip, flags=flags) if option == "blocks": - return getTextBlocks(page, clip=clip, flags=flags) + return get_text_blocks(page, clip=clip, flags=flags) CheckParent(page) cb = None if option in ("html", "xml", "xhtml"): # no clipping for MuPDF functions @@ -640,7 +640,7 @@ def getText( return t -def getPageText( +def get_page_text( doc: Document, pno: int, option: str = "text", @@ -650,14 +650,14 @@ def getPageText( """Extract a document page's text by page number. Notes: - Convenience function calling page.getText(). + Convenience function calling page.get_text(). Args: pno: page number option: (str) text, words, blocks, html, dict, json, rawdict, xhtml or xml. Returns: output from page.TextPage(). """ - return doc[pno].getText(option, clip=clip, flags=flags) + return doc[pno].get_text(option, clip=clip, flags=flags) def get_pixmap(page: Page, **kw) -> Pixmap: @@ -776,20 +776,13 @@ def get_links(page: Page) -> list: """ CheckParent(page) - ln = page.firstLink + ln = page.first_link links = [] while ln: nl = getLinkDict(ln) - # if nl["kind"] == LINK_GOTO: - # if type(nl["to"]) is Point and nl["page"] >= 0: - # doc = page.parent - # target_page = doc[nl["page"]] - # ctm = target_page.transformation_matrix - # point = nl["to"] * ctm - # nl["to"] = point links.append(nl) ln = ln.next - if len(links) > 0: + if links != []: linkxrefs = [x for x in page.annot_xrefs() if x[1] == PDF_ANNOT_LINK] if len(linkxrefs) == len(links): for i in range(len(linkxrefs)): @@ -798,7 +791,7 @@ def get_links(page: Page) -> list: return links -def getToC( +def get_toc( doc: Document, simple: bool = True, ) -> list: @@ -881,13 +874,13 @@ def set_toc_item( It allows changing the item's title and link destination. Args: - idx: (int) desired index of the TOC list, as created by getTOC. - dest_dict: (dict) destination dictionary as created by getTOC(False). + idx: (int) desired index of the TOC list, as created by get_toc. + dest_dict: (dict) destination dictionary as created by get_toc(False). Outrules all other parameters. If None, the remaining parameters are used to make a dest dictionary. kind: (int) kind of link (LINK_GOTO, etc.). If None, then only the title will be updated. If LINK_NONE, the TOC item will be deleted. - pno: (int) page number (1-based like in getTOC). Required if LINK_GOTO. + pno: (int) page number (1-based like in get_toc). Required if LINK_GOTO. uri: (str) the URL, required if LINK_URI. title: (str) the new title. No change if None. to: (point-like) destination on the target page. If omitted, (72, 36) @@ -961,7 +954,7 @@ def set_toc_item( return doc._update_toc_item(xref, action=action[2:], title=title) -def getRectArea(*args) -> float: +def get_area(*args) -> float: """Calculate area of rectangle.\nparameter is one of 'px' (default), 'in', 'cm', or 'mm'.""" rect = args[0] if len(args) > 1: @@ -1031,7 +1024,7 @@ def set_metadata(doc: Document, m: dict) -> None: if not bool(val) or not type(val) is str or val == "none": val = "null" else: - val = fitz.getPDFstr(val) + val = get_pdf_str(val) doc.xref_set_key(info_xref, pdf_key, val) doc.init_doc() return @@ -1067,21 +1060,21 @@ def getDestStr(xref: int, ddict: dict) -> str: return dest if ddict["kind"] == LINK_URI: - dest = str_uri % (getPDFstr(ddict["uri"]),) + dest = str_uri % (get_pdf_str(ddict["uri"]),) return dest if ddict["kind"] == LINK_LAUNCH: - fspec = getPDFstr(ddict["file"]) + fspec = get_pdf_str(ddict["file"]) dest = str_launch % (fspec, fspec) return dest if ddict["kind"] == LINK_GOTOR and ddict["page"] < 0: - fspec = getPDFstr(ddict["file"]) - dest = str_gotor2 % (getPDFstr(ddict["to"]), fspec, fspec) + fspec = get_pdf_str(ddict["file"]) + dest = str_gotor2 % (get_pdf_str(ddict["to"]), fspec, fspec) return dest if ddict["kind"] == LINK_GOTOR and ddict["page"] >= 0: - fspec = getPDFstr(ddict["file"]) + fspec = get_pdf_str(ddict["file"]) dest = str_gotor1 % ( ddict["page"], ddict["to"].x, @@ -1095,7 +1088,7 @@ def getDestStr(xref: int, ddict: dict) -> str: return "" -def setToC( +def set_toc( doc: Document, toc: list, collapse: int = 1, @@ -1165,7 +1158,7 @@ def setToC( for i in range(toclen): o = toc[i] lvl = o[0] # level - title = getPDFstr(o[1]) # title + title = get_pdf_str(o[1]) # title pno = min(doc.page_count - 1, max(0, o[2] - 1)) # page number page_xref = doc.page_xref(pno) page_height = doc.page_cropbox(pno).height @@ -1314,7 +1307,7 @@ def cre_annot(lnk, xref_dst, pno_src, ctm): ) else: txt = annot_skel["gotor2"] # annot_gotor_n - to = getPDFstr(lnk["to"]) + to = get_pdf_str(lnk["to"]) to = to[1:-1] f = lnk["file"] annot = txt % (to, f, rect) @@ -1413,7 +1406,7 @@ def getLinkText(page: Page, lnk: dict) -> str: annot = txt % (xref, ipnt.x, ipnt.y, rect) else: txt = annot_skel["goto2"] # annot_goto_n - annot = txt % (getPDFstr(lnk["to"]), rect) + annot = txt % (get_pdf_str(lnk["to"]), rect) elif lnk["kind"] == LINK_GOTOR: if lnk["page"] >= 0: @@ -1424,7 +1417,7 @@ def getLinkText(page: Page, lnk: dict) -> str: annot = txt % (lnk["page"], pnt.x, pnt.y, lnk["file"], lnk["file"], rect) else: txt = annot_skel["gotor2"] # annot_gotor_n - annot = txt % (getPDFstr(lnk["to"]), lnk["file"], rect) + annot = txt % (get_pdf_str(lnk["to"]), lnk["file"], rect) elif lnk["kind"] == LINK_LAUNCH: txt = annot_skel["launch"] # annot_launch @@ -1463,14 +1456,14 @@ def getLinkText(page: Page, lnk: dict) -> str: return annot -def deleteWidget(page: Page, widget: Widget) -> Widget: +def delete_widget(page: Page, widget: Widget) -> Widget: """Delete widget from page and return the next one.""" CheckParent(page) annot = getattr(widget, "_annot", None) if annot is None: raise ValueError("bad type: widget") nextwidget = widget.next - page.deleteAnnot(annot) + page.delete_annot(annot) widget._annot.__del__() widget._annot.parent = None keylist = list(widget.__dict__.keys()) @@ -1479,7 +1472,7 @@ def deleteWidget(page: Page, widget: Widget) -> Widget: return nextwidget -def updateLink(page: Page, lnk: dict) -> None: +def update_link(page: Page, lnk: dict) -> None: """ Update a link on the current page. """ CheckParent(page) annot = getLinkText(page, lnk) @@ -1490,7 +1483,7 @@ def updateLink(page: Page, lnk: dict) -> None: return -def insertLink(page: Page, lnk: dict, mark: bool = True) -> None: +def insert_link(page: Page, lnk: dict, mark: bool = True) -> None: """ Insert a new link for the current page. """ CheckParent(page) annot = getLinkText(page, lnk) @@ -1617,18 +1610,26 @@ def insert_text( return rc -def newPage( +def new_page( doc: Document, pno: int = -1, width: float = 595, height: float = 842, ) -> Page: - """Create and return a new page object.""" + """Create and return a new page object. + + Args: + pno: (int) insert before this page. Default: after last page. + width: (float) page width in points. Default: 595 (ISO A4 width). + height: (float) page height in points. Default 842 (ISO A4 height). + Returns: + A Page object. + """ doc._newPage(pno, width=width, height=height) return doc[pno] -def insertPage( +def insert_page( doc: Document, pno: int, text: typing.Union[str, list, None] = None, @@ -1642,10 +1643,10 @@ def insertPage( """Create a new PDF page and insert some text. Notes: - Function combining Document.newPage() and Page.insert_text(). + Function combining Document.new_page() and Page.insert_text(). For parameter details see these methods. """ - page = doc.newPage(pno=pno, width=width, height=height) + page = doc.new_page(pno=pno, width=width, height=height) if not bool(text): return 0 rc = page.insert_text( @@ -3386,7 +3387,7 @@ def insert_textbox( unused or deficit rectangle area (float) """ rect = Rect(rect) - if rect.isEmpty or rect.isInfinite: + if rect.is_empty or rect.is_infinite: raise ValueError("text box must be finite and not empty") color_str = ColorCode(color, "c") @@ -3805,7 +3806,7 @@ def center_rect(annot_rect, text, font, fsize): if not text: return annot_rect try: - text_width = getTextlength(text, font, fsize) + text_width = get_text_length(text, font, fsize) except ValueError: # unsupported font return annot_rect line_height = fsize * 1.2 @@ -4035,8 +4036,8 @@ def fill_textbox( Args: writer: TextWriter object (= "self") - text: string or list/tuple of strings. rect: rect-like to receive the text. + text: string or list/tuple of strings. pos: point-like start position of first word. font: Font object (default Font('helv')). fontsize: the fontsize. @@ -4046,13 +4047,16 @@ def fill_textbox( right_to_left: (bool) indicate right-to-left language. """ rect = Rect(rect) - if rect.isEmpty or rect.isInfinite: + if rect.is_empty or rect.is_infinite: raise ValueError("fill rect must be finite and not empty.") if type(font) is not Font: font = Font("helv") def textlen(x): - return font.text_length(x, fontsize) # abbreviation + return font.text_length(x, fontsize=fontsize) # abbreviation + + def char_lengths(x): + return font.char_lengths(x, fontsize=fontsize) def append_this(pos, text): return writer.append(pos, text, font=font, fontsize=fontsize) @@ -4063,29 +4067,30 @@ def append_this(pos, text): std_start = rect.x0 + tolerance def norm_words(width, words): - """Cut any word in pieces that is longer than 'width'.""" + """Cut any word in pieces no longer than 'width'.""" nwords = [] + word_lengths = [] for w in words: - wl = textlen(w) + wl_lst = char_lengths(w) + wl = sum(wl_lst) if wl <= width: # nothing to do - copy over nwords.append(w) + word_lengths.append(wl) continue - # word longer than rect width - look at single chars - wl_lst = [textlen(c) for c in w] # lengths of chars - while True: - wls = 0 # length of word piece - for i in range(len(wl_lst)): - wls += wl_lst[i] - if wls > width: - break - - if i == len(wl_lst) - 1: # reached end of the word? - nwords.append(w) - break - nwords.append(w[:i]) # output word segment - w = w[i:] # remainder of word - wl_lst = wl_lst[i:] # remainder of char lengths - return nwords + + # word longer than rect width - split it in parts + n = len(wl_lst) + while n > 0: + wl = sum(wl_lst[:n]) + if wl <= width: + nwords.append(w[: n + 1]) + word_lengths.append(wl) + w = w[n + 1 :] + wl_lst = wl_lst[n + 1 :] + n = len(wl_lst) + else: + n -= 1 + return nwords, word_lengths def output_justify(start, line): """Justified output of a line.""" @@ -4120,8 +4125,6 @@ def output_justify(start, line): # starting point of text if pos is not None: pos = Point(pos) - if not pos in rect: - raise ValueError("'pos' must be inside 'rect'") else: # default is just below rect top-left pos = rect.tl + (tolerance, fontsize * asc) if not pos in rect: @@ -4136,7 +4139,7 @@ def output_justify(start, line): factor = 0 # split in lines if just a string was given - if type(text) not in (tuple, list): + if type(text) is str: textlines = text.splitlines() else: textlines = [] @@ -4146,7 +4149,7 @@ def output_justify(start, line): max_lines = int((rect.y1 - pos.y) / (lheight * fontsize)) new_lines = [] # the final list of textbox lines - no_justify = [] # do not justify these line numbers + no_justify = [] # no justify for these line numbers for i, line in enumerate(textlines): if line in ("", " "): new_lines.append((line, space_len)) @@ -4161,7 +4164,7 @@ def output_justify(start, line): if right_to_left: # reverses Arabic / Hebrew text front to back line = writer.clean_rtl(line) tl = textlen(line) - if textlen(line) <= width: # line short enough + if tl <= width: # line short enough new_lines.append((line, tl)) no_justify.append((len(new_lines) - 1)) continue @@ -4170,22 +4173,23 @@ def output_justify(start, line): words = line.split(" ") # the words in the line # cut in parts any words that are longer than rect width - words = norm_words(std_width, words) - - j = 1 - while len(words) > 0: - line0 = " ".join(words[:-j]) if j > 0 else " ".join(words) - tl = textlen(line0) - if tl <= width: - new_lines.append((line0, tl)) # shortened line fits - if j == 0: # this was the last part of line - no_justify.append((len(new_lines) - 1)) - break - del words[:-j] - j = 0 - width = rect.width - tolerance + words, word_lengths = norm_words(std_width, words) + + n = len(words) + while True: + line0 = " ".join(words[:n]) + wl = sum(word_lengths[:n]) + space_len * (len(word_lengths[:n]) - 1) + if wl <= width: + new_lines.append((line0, wl)) + words = words[n:] + word_lengths = word_lengths[n:] + n = len(words) + line0 = None else: - j += 1 + n -= 1 + + if len(words) == 0: + break nlines = len(new_lines) if nlines > max_lines: @@ -4696,8 +4700,8 @@ def recover_bbox_quad(line_dir: tuple, span: dict, bbox: tuple) -> Quad: height = d * span["size"] # the quad's rectangle height # The following are distances from the bbox corners, at wich we find the - # respective quad points. The calculation depends, on in which circle - # quadrant the text writing angle is positioned. + # respective quad points. The computat depends on in which circle + # quadrant the text writing angle is located. hs = height * sin hc = height * cos if hc >= 0 and hs <= 0: # quadrant 1 @@ -4768,7 +4772,7 @@ def recover_line_quad(line: dict, spans: list = None) -> Quad: line_ll = q0.ll # lower-left of line quad line_lr = q1.lr # lower-right of line quad - mat0 = planishLine(line_ll, line_lr) + mat0 = planish_line(line_ll, line_lr) # map base line to x-axis such that line_ll goes to (0, 0) x_lr = line_lr * mat0 @@ -4815,7 +4819,7 @@ def recover_span_quad(line_dir: tuple, span: dict, chars: list = None) -> Quad: span_ll = q0.ll # lower-left of span quad span_lr = q1.lr # lower-right of span quad - mat0 = planishLine(span_ll, span_lr) + mat0 = planish_line(span_ll, span_lr) # map base line to x-axis such that span_ll goes to (0, 0) x_lr = span_lr * mat0 diff --git a/fitz/version.i b/fitz/version.i index 5c7f2fdc2..ed275ad7e 100644 --- a/fitz/version.i +++ b/fitz/version.i @@ -1,6 +1,6 @@ %pythoncode %{ VersionFitz = "1.18.0" -VersionBind = "1.18.13" -VersionDate = "2021-05-05 06:32:22" -version = (VersionBind, VersionFitz, "20210505063222") +VersionBind = "1.18.14" +VersionDate = "2021-06-01 08:11:38" +version = (VersionBind, VersionFitz, "20210601081138") %} \ No newline at end of file diff --git a/setup.py b/setup.py index 7460c26b7..9fa8c58bb 100644 --- a/setup.py +++ b/setup.py @@ -141,20 +141,21 @@ def load_libraries(): # library_dirs=['./mupdf/platform/win32/x64/Release'], ) -pkg_tab = open("PKG-INFO").read().split("\n") -long_dtab = [] -classifier = [] -for l in pkg_tab: - if l.startswith("Classifier: "): - classifier.append(l[12:]) +pkg_tab = open("PKG-INFO", "rb").read().splitlines() +long_dtab = [] # long description lines +classifier = [] # classifier lines +for line in pkg_tab: + line = line.decode() + if line.startswith("Classifier: "): + classifier.append(line[12:]) continue - if l.startswith(" "): - long_dtab.append(l.strip()) + if line.startswith(" ") or line == "": + long_dtab.append(line.strip()) long_desc = "\n".join(long_dtab) setup( name="PyMuPDF", - version="1.18.12", + version="1.18.14", description="Python bindings for the PDF rendering library MuPDF", long_description=long_desc, classifiers=classifier, diff --git a/tests/README.md b/tests/README.md index 8e9f4ef97..b7bb028b6 100644 --- a/tests/README.md +++ b/tests/README.md @@ -4,8 +4,9 @@ This folder contains a number of basic tests to confirm that PyMuPDF is correctl The following areas are currently covered: * encryption and decryption * extraction of drawings -* "geometry": correct working of points, rectangles, matrices +* "geometry": correct working of points, rectangles, matrices and operator algebra * image bbox computation +* handling of embedded files * image insertion * PDF document joining * computation of quadrilaterals for non-horizontal text @@ -17,9 +18,13 @@ The following areas are currently covered: * handling of PDF page labels * pixmap handling * show PDF pages inside other PDF pages -* textbox text extraction +* text extraction * text searching * handling of PDF Tables of Contents +* annotation handling +* field / widget handling + +This is **_not a coverage test_**, although a significant part of the Python part **_does_** get executed (ca. 60%). Achieving a much higher code coverage remains an ongoing task. To use these scripts, you must have installed `pytest`: @@ -32,26 +37,32 @@ python3.8 -m pytest ============================ test session starts ===================== platform linux -- Python 3.8.5, pytest-6.2.3, py-1.10.0, pluggy-0.13.1 rootdir: .../pymupdf -collected 31 items - -test_badfonts.py . [ 3%] -test_crypting.py . [ 6%] -test_drawings.py . [ 9%] -test_geometry.py ... [ 19%] -test_imagebbox.py . [ 22%] -test_insertimage.py . [ 25%] -test_insertpdf.py . [ 29%] -test_linequad.py . [ 32%] -test_metadata.py .. [ 38%] -test_nonpdf.py ... [ 48%] -test_object_manipulation.py ... [ 58%] -test_pagedelete.py . [ 61%] -test_pagelabels.py . [ 64%] -test_pixmap.py ... [ 74%] -test_showpdfpage.py . [ 77%] -test_textbox.py .. [ 83%] +collected 72 items + +test_annots.py ............... [ 20%] +test_badfonts.py . [ 22%] +test_crypting.py . [ 23%] +test_drawings.py . [ 25%] +test_embeddedfiles.py . [ 26%] +test_font.py .. [ 29%] +test_general.py ........... [ 44%] +test_geometry.py ....... [ 54%] +test_imagebbox.py . [ 55%] +test_insertimage.py . [ 56%] +test_insertpdf.py . [ 58%] +test_linequad.py . [ 59%] +test_metadata.py .. [ 62%] +test_nonpdf.py ... [ 66%] +test_object_manipulation.py ... [ 70%] +test_pagedelete.py . [ 72%] +test_pagelabels.py . [ 73%] +test_pixmap.py ..... [ 80%] +test_showpdfpage.py . [ 81%] +test_textbox.py .. [ 84%] +test_textextract.py . [ 86%] test_textsearch.py . [ 87%] -test_toc.py .... [100%] +test_toc.py .... [ 93%] +test_widgets.py ..... [100%] -======================== 31 passed in 2.25s ========================== +====================== 72 passed in 1.43s =========================== ``` \ No newline at end of file diff --git a/tests/resources/symbols.txt b/tests/resources/symbols.txt index e9f5d7125..0d5d9fc49 100644 --- a/tests/resources/symbols.txt +++ b/tests/resources/symbols.txt @@ -202,7 +202,7 @@ 'lineCap': (0, 0, 0), 'lineJoin': 0, 'opacity': 1.0, - 'rect': Rect(80.0, 362.5, 90.0, 375.0), + 'rect': Rect(60.0, 362.5, 90.0, 375.0), 'width': 1.0}, {'closePath': False, 'color': [0.0], @@ -299,7 +299,7 @@ 'lineCap': (0, 0, 0), 'lineJoin': 0, 'opacity': 1.0, - 'rect': Rect(75.0, 470.0, 100.0, 510.0), + 'rect': Rect(50.0, 470.0, 100.0, 510.0), 'width': 1.0}, {'closePath': False, 'color': [0.9333329796791077, 0.8470590114593506, 0.6823530197143555], @@ -339,7 +339,7 @@ 'lineCap': (0, 0, 0), 'lineJoin': 0, 'opacity': 1.0, - 'rect': Rect(82.60870361328125, 547.7540283203125, 85.5072021484375, 562.2459716796875), + 'rect': Rect(56.52170181274414, 547.7540283203125, 85.5072021484375, 562.2459716796875), 'width': 0.07246380299329758}, {'closePath': False, 'color': [0.8039219975471497, 0.7294120192527771, 0.5882350206375122], @@ -355,7 +355,7 @@ 'lineCap': (0, 0, 0), 'lineJoin': 0, 'opacity': 1.0, - 'rect': Rect(59.4202995300293, 559.3480224609375, 82.60870361328125, 559.3480224609375), + 'rect': Rect(59.4202995300293, 550.6519775390625, 82.60870361328125, 559.3480224609375), 'width': 0.07246380299329758}, {'closePath': False, 'color': None, @@ -464,7 +464,7 @@ 'lineCap': (0, 0, 0), 'lineJoin': 0, 'opacity': 1.0, - 'rect': Rect(92.60209655761719, 552.1010131835938, 94.2029037475586, 557.8989868164062), + 'rect': Rect(92.60209655761719, 552.1010131835938, 100.0, 557.8989868164062), 'width': 1.0}, {'closePath': False, 'color': [0.7215690016746521, 0.5254899859428406, 0.04313730075955391], @@ -534,7 +534,7 @@ 'lineCap': (0, 0, 0), 'lineJoin': 0, 'opacity': 1.0, - 'rect': Rect(63.941001892089844, 552.8259887695312, 65.94200134277344, 557.1740112304688), + 'rect': Rect(63.941001892089844, 552.8259887695312, 75.18939971923828, 557.1740112304688), 'width': 1.0}, {'closePath': True, 'color': [1.0, 1.0, 1.0], @@ -550,7 +550,7 @@ 'lineCap': (0, 0, 0), 'lineJoin': 0, 'opacity': 1.0, - 'rect': Rect(61.352699279785156, 548.47802734375, 61.352699279785156, 561.52197265625), + 'rect': Rect(58.937198638916016, 548.47802734375, 61.352699279785156, 561.52197265625), 'width': 1.1594200134277344}, {'closePath': True, 'color': None, @@ -630,7 +630,7 @@ 'lineCap': (0, 0, 0), 'lineJoin': 0, 'opacity': 1.0, - 'rect': Rect(80.0, 602.5, 90.0, 615.0), + 'rect': Rect(60.0, 602.5, 90.0, 615.0), 'width': 1.0}, {'closePath': False, 'color': [0.0], diff --git a/tests/test_annots.py b/tests/test_annots.py new file mode 100644 index 000000000..44035be30 --- /dev/null +++ b/tests/test_annots.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +""" +Test PDF annotation insertions. +""" +import fitz + +fitz.TOOLS.set_annot_stem("jorj") + +red = (1, 0, 0) +blue = (0, 0, 1) +gold = (1, 1, 0) +green = (0, 1, 0) + +displ = fitz.Rect(0, 50, 0, 50) +r = fitz.Rect(72, 72, 220, 100) +t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!" +rect = fitz.Rect(100, 100, 200, 200) + + +def test_caret(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_caret_annot(rect.tl) + assert annot.type == (14, "Caret") + annot.update(rotate=20) + + +def test_freetext(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_freetext_annot( + rect, + t1, + fontsize=10, + rotate=90, + text_color=blue, + fill_color=gold, + align=fitz.TEXT_ALIGN_CENTER, + ) + annot.set_border(width=0.3, dashes=[2]) + annot.update(text_color=blue, fill_color=gold) + assert annot.type == (2, "FreeText") + + +def test_text(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_text_annot(r.tl, t1) + assert annot.type == (0, "Text") + + +def test_highlight(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_highlight_annot(rect) + assert annot.type == (8, "Highlight") + + +def test_underline(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_underline_annot(rect) + assert annot.type == (9, "Underline") + + +def test_squiggly(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_squiggly_annot(rect) + assert annot.type == (10, "Squiggly") + + +def test_strikeout(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_strikeout_annot(rect) + assert annot.type == (11, "StrikeOut") + page.delete_annot(annot) + + +def test_polyline(): + doc = fitz.open() + page = doc.new_page() + rect = page.rect + (100, 36, -100, -36) + cell = fitz.make_table(rect, rows=10) + for i in range(10): + annot = page.add_polyline_annot((cell[i][0].bl, cell[i][0].br)) + annot.set_line_ends(i, i) + annot.update() + for i, annot in enumerate(page.annots()): + assert annot.line_ends == (i, i) + assert annot.type == (7, "PolyLine") + + +def test_polygon(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_polygon_annot([rect.bl, rect.tr, rect.br, rect.tl]) + assert annot.type == (6, "Polygon") + + +def test_line(): + doc = fitz.open() + page = doc.new_page() + rect = page.rect + (100, 36, -100, -36) + cell = fitz.make_table(rect, rows=10) + for i in range(10): + annot = page.add_line_annot(cell[i][0].bl, cell[i][0].br) + annot.set_line_ends(i, i) + annot.update() + for i, annot in enumerate(page.annots()): + assert annot.line_ends == (i, i) + assert annot.type == (3, "Line") + + +def test_square(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_rect_annot(rect) + assert annot.type == (4, "Square") + + +def test_circle(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_circle_annot(rect) + assert annot.type == (5, "Circle") + + +def test_fileattachment(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_file_annot(rect.tl, b"just anything for testing", "testdata.txt") + assert annot.type == (17, "FileAttachment") + + +def test_stamp(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_stamp_annot(r, stamp=10) + assert annot.type == (13, "Stamp") + annot_id = annot.info["id"] + annot_xref = annot.xref + a1 = page.load_annot(annot_id) + a2 = page.load_annot(annot_xref) + page = doc.reload_page(page) + + +def test_redact(): + doc = fitz.open() + page = doc.new_page() + annot = page.add_redact_annot(r, text="Hello") + annot.update( + cross_out=True, + rotate=-1, + ) + assert annot.type == (12, "Redact") + x = annot._get_redact_values() + pix = annot.get_pixmap() + info = annot.info + annot.set_info(info) + assert not annot.has_popup + annot.set_popup(r) + s = annot.popup_rect + assert s == r + page.apply_redactions() diff --git a/tests/test_drawings.py b/tests/test_drawings.py index acd60ca52..738a6cd00 100644 --- a/tests/test_drawings.py +++ b/tests/test_drawings.py @@ -12,7 +12,7 @@ symbols = os.path.join(scriptdir, "resources", "symbols.txt") -def test_drawings(): +def test_drawings1(): symbols_text = open(symbols).read() # expected result doc = fitz.open(filename) page = doc[0] @@ -20,3 +20,9 @@ def test_drawings(): out = io.StringIO() # pprint output goes here pprint(paths, stream=out) assert symbols_text == out.getvalue() + + +def test_drawings2(): + doc = fitz.open() + page = doc.new_page() + page.draw_circle((100, 100), 2) diff --git a/tests/test_embeddedfiles.py b/tests/test_embeddedfiles.py new file mode 100644 index 000000000..613c9865d --- /dev/null +++ b/tests/test_embeddedfiles.py @@ -0,0 +1,24 @@ +""" +Tests for PDF EmbeddedFiles functions. +""" +import fitz + + +def test_embedded1(): + doc = fitz.open() + buffer = b"123456678790qwexcvnmhofbnmfsdg4589754uiofjkb-" + doc.embfile_add( + "file1", + buffer, + filename="testfile.txt", + ufilename="testfile-u.txt", + desc="Description of some sort", + ) + assert doc.embfile_count() == 1 + assert doc.embfile_names() == ["file1"] + assert doc.embfile_info(0)["name"] == "file1" + doc.embfile_upd(0, filename="new-filename.txt") + assert doc.embfile_info(0)["filename"] == "new-filename.txt" + assert doc.embfile_get(0) == buffer + doc.embfile_del(0) + assert doc.embfile_count() == 0 \ No newline at end of file diff --git a/tests/test_font.py b/tests/test_font.py new file mode 100644 index 000000000..e37af421b --- /dev/null +++ b/tests/test_font.py @@ -0,0 +1,26 @@ +""" +Tests for the Font class. +""" +import fitz + + +def test_font1(): + font = fitz.Font("cjk") + assert font.name == "Droid Sans Fallback Regular" + text = "PyMuPDF" + tl = font.text_length(text, fontsize=20) + cl = font.char_lengths(text, fontsize=20) + assert len(text) == len(cl) + assert abs(sum(cl) - tl) < fitz.EPSILON + for i in range(len(cl)): + assert cl[i] == font.glyph_advance(ord(text[i])) * 20 + font2 = fitz.Font(fontbuffer=font.buffer) + assert font2.name == font.name + assert len(font.valid_codepoints()) > 30000 + + +def test_font2(): + """Old and new length computation must be the same.""" + font = fitz.Font("helv") + text = "PyMuPDF" + assert font.text_length(text) == fitz.get_text_length(text) \ No newline at end of file diff --git a/tests/test_general.py b/tests/test_general.py new file mode 100644 index 000000000..ce2b3cd5e --- /dev/null +++ b/tests/test_general.py @@ -0,0 +1,75 @@ +# encoding utf-8 +""" +Confirm sample doc has no links and no annots. +""" +import os + +import fitz + +scriptdir = os.path.abspath(os.path.dirname(__file__)) +filename = os.path.join(scriptdir, "resources", "001003ED.pdf") +doc = fitz.open(filename) + + +def test_haslinks(): + assert doc.has_links() == False + + +def test_hasannots(): + assert doc.has_annots() == False + + +def test_haswidgets(): + assert doc.is_form_pdf == False + + +def test_isrepaired(): + assert doc.is_repaired == False + fitz.TOOLS.mupdf_warnings() + + +def test_isdirty(): + assert doc.is_dirty == False + + +def test_cansaveincrementally(): + assert doc.can_save_incrementally() == True + + +def test_iswrapped(): + page = doc[0] + assert page.is_wrapped + + +def test_wrapcontents(): + page = doc[0] + page.wrap_contents() + xref = page.get_contents()[0] + cont = page.read_contents() + doc.update_stream(xref, cont) + page.set_contents(xref) + assert len(page.get_contents()) == 1 + page.clean_contents() + + +def test_config(): + assert fitz.TOOLS.fitz_config["py-memory"] == True + + +def test_glyphnames(): + name = "infinity" + infinity = fitz.glyph_name_to_unicode(name) + assert fitz.unicode_to_glyph_name(infinity) == name + + +def test_rgbcodes(): + sRGB = 0xFFFFFF + assert fitz.sRGB_to_pdf(sRGB) == (1, 1, 1) + assert fitz.sRGB_to_rgb(sRGB) == (255, 255, 255) + + +def test_pdfstring(): + fitz.get_pdf_now() + fitz.get_pdf_str("Beijing, chinesisch 北京") + fitz.get_text_length("Beijing, chinesisch 北京", fontname="china-s") + fitz.get_pdf_str("Latin characters êßöäü") diff --git a/tests/test_geometry.py b/tests/test_geometry.py index e4e7940ab..a168fb9cd 100644 --- a/tests/test_geometry.py +++ b/tests/test_geometry.py @@ -1,11 +1,13 @@ """ -* Check various construction methods of rectangles +* Check various construction methods of rects, points, matrices * Check matrix inversions in variations +* Check algebra constructs """ import fitz -def test_rectangles(): +def test_rect(): + assert tuple(fitz.Rect()) == (0, 0, 0, 0) p1 = fitz.Point(10, 20) p2 = fitz.Point(100, 200) p3 = fitz.Point(150, 250) @@ -14,14 +16,104 @@ def test_rectangles(): assert tuple(fitz.Rect(p1, p2)) == r_tuple assert tuple(fitz.Rect(p1, 100, 200)) == r_tuple assert tuple(fitz.Rect(10, 20, p2)) == r_tuple - assert tuple(r.includePoint(p3)) == (10, 20, 150, 250) + assert tuple(r.include_point(p3)) == (10, 20, 150, 250) r = fitz.Rect(10, 20, 100, 200) - assert tuple(r.includeRect((100, 200, 110, 220))) == (10, 20, 110, 220) + assert tuple(r.include_rect((100, 200, 110, 220))) == (10, 20, 110, 220) r = fitz.Rect(10, 20, 100, 200) # include empty rect makes no change - assert tuple(r.includeRect((0, 0, 0, 0))) == r_tuple + assert tuple(r.include_rect((0, 0, 0, 0))) == r_tuple # include infinite rect delivers infinite rect - assert tuple(r.includeRect((1, 1, -1, -1))) == (1, 1, -1, -1) + assert tuple(r.include_rect((1, 1, -1, -1))) == (1, 1, -1, -1) + r = fitz.Rect() + for i in range(4): + r[i] = i + 1 + assert r == fitz.Rect(1, 2, 3, 4) + assert fitz.Rect() / 5 == fitz.Rect() + assert fitz.Rect(1, 1, 2, 2) / fitz.Identity == fitz.Rect(1, 1, 2, 2) + failed = False + try: + r = fitz.Rect(1) + except: + failed = True + assert failed + failed = False + try: + r = fitz.Rect(1, 2, 3, 4, 5) + except: + failed = True + assert failed + failed = False + try: + r = fitz.Rect((1, 2, 3, 4, 5)) + except: + failed = True + assert failed + failed = False + try: + r = fitz.Rect(1, 2, 3, "x") + except: + failed = True + assert failed + failed = False + try: + r = fitz.Rect() + r[5] = 1 + except: + failed = True + assert failed + + +def test_irect(): + p1 = fitz.Point(10, 20) + p2 = fitz.Point(100, 200) + p3 = fitz.Point(150, 250) + r = fitz.IRect(10, 20, 100, 200) + r_tuple = tuple(r) + assert tuple(fitz.IRect(p1, p2)) == r_tuple + assert tuple(fitz.IRect(p1, 100, 200)) == r_tuple + assert tuple(fitz.IRect(10, 20, p2)) == r_tuple + assert tuple(r.include_point(p3)) == (10, 20, 150, 250) + r = fitz.IRect(10, 20, 100, 200) + assert tuple(r.include_rect((100, 200, 110, 220))) == (10, 20, 110, 220) + r = fitz.IRect(10, 20, 100, 200) + # include empty rect makes no change + assert tuple(r.include_rect((0, 0, 0, 0))) == r_tuple + r = fitz.IRect() + for i in range(4): + r[i] = i + 1 + assert r == fitz.IRect(1, 2, 3, 4) + + failed = False + try: + r = fitz.IRect(1) + except: + failed = True + assert failed + failed = False + try: + r = fitz.IRect(1, 2, 3, 4, 5) + except: + failed = True + assert failed + failed = False + try: + r = fitz.IRect((1, 2, 3, 4, 5)) + except: + failed = True + assert failed + failed = False + try: + r = fitz.IRect(1, 2, 3, "x") + except: + failed = True + assert failed + failed = False + try: + r = fitz.IRect() + r[5] = 1 + except: + failed = True + assert failed def test_inversion(): @@ -36,9 +128,185 @@ def test_inversion(): def test_matrix(): + assert tuple(fitz.Matrix()) == (0, 0, 0, 0, 0, 0) m45p = fitz.Matrix(45) m45m = fitz.Matrix(-45) m90 = fitz.Matrix(90) assert abs(m90 - m45p * m45p) < fitz.EPSILON assert abs(fitz.Identity - m45p * m45m) < fitz.EPSILON assert abs(m45p - ~m45m) < fitz.EPSILON + assert fitz.Matrix(2, 3, 1) == fitz.Matrix(1, 3, 2, 1, 0, 0) + m = fitz.Matrix(2, 3, 1) + m.invert() + assert m * fitz.Matrix(2, 3, 1) == fitz.Matrix(1, 0, 0, 1, 0, 0) + assert fitz.Matrix(1, 1).pretranslate(2, 3) == fitz.Matrix(1, 0, 0, 1, 2, 3) + assert fitz.Matrix(1, 1).prescale(2, 3) == fitz.Matrix(2, 0, 0, 3, 0, 0) + assert fitz.Matrix(1, 1).preshear(2, 3) == fitz.Matrix(1, 3, 2, 1, 0, 0) + assert abs(fitz.Matrix(1, 1).prerotate(30) == fitz.Matrix(30)) < fitz.EPSILON + small = 1e-6 + assert fitz.Matrix(1, 1).prerotate(90 + small) == fitz.Matrix(90) + assert fitz.Matrix(1, 1).prerotate(180 + small) == fitz.Matrix(180) + assert fitz.Matrix(1, 1).prerotate(270 + small) == fitz.Matrix(270) + assert fitz.Matrix(1, 1).prerotate(small) == fitz.Matrix(0) + assert fitz.Matrix(1, 1).concat( + fitz.Matrix(1, 2), fitz.Matrix(3, 4) + ) == fitz.Matrix(3, 0, 0, 8, 0, 0) + assert fitz.Matrix(1, 2, 3, 4, 5, 6) / 1 == fitz.Matrix(1, 2, 3, 4, 5, 6) + assert m[0] == m.a + assert m[1] == m.b + assert m[2] == m.c + assert m[3] == m.d + assert m[4] == m.e + assert m[5] == m.f + m = fitz.Matrix() + for i in range(6): + m[i] = i + 1 + assert m == fitz.Matrix(1, 2, 3, 4, 5, 6) + failed = False + try: + m = fitz.Matrix(1, 2, 3) + except: + failed = True + assert failed + failed = False + try: + m = fitz.Matrix(1, 2, 3, 4, 5, 6, 7) + except: + failed = True + assert failed + + failed = False + try: + m = fitz.Matrix((1, 2, 3, 4, 5, 6, 7)) + except: + failed = True + assert failed + + failed = False + try: + m = fitz.Matrix(1, 2, 3, 4, 5, "x") + except: + failed = True + assert failed + + failed = False + try: + m = fitz.Matrix(1, 0, 1, 0, 1, 0) + n = fitz.Matrix(1, 1) / m + except: + failed = True + assert failed + + +def test_point(): + assert tuple(fitz.Point()) == (0, 0) + assert fitz.Point(1, -1).unit == fitz.Point(5, -5).unit + assert fitz.Point(-1, -1).abs_unit == fitz.Point(1, 1).unit + assert fitz.Point(1, 1).distance_to(fitz.Point(1, 1)) == 0 + assert fitz.Point(1, 1).distance_to(fitz.Rect(1, 1, 2, 2)) == 0 + assert fitz.Point().distance_to((1, 1, 2, 2)) > 0 + failed = False + try: + p = fitz.Point(1, 2, 3) + except: + failed = True + assert failed + + failed = False + try: + p = fitz.Point((1, 2, 3)) + except: + failed = True + assert failed + + failed = False + try: + p = fitz.Point(1, "x") + except: + failed = True + assert failed + + failed = False + try: + p = fitz.Point() + p[3] = 1 + except: + failed = True + assert failed + + +def test_algebra(): + p = fitz.Point(1, 2) + m = fitz.Matrix(1, 2, 3, 4, 5, 6) + r = fitz.Rect(1, 1, 2, 2) + assert p + p == p * 2 + assert p - p == fitz.Point() + assert m + m == m * 2 + assert m - m == fitz.Matrix() + assert r + r == r * 2 + assert r - r == fitz.Rect() + assert p + 5 == fitz.Point(6, 7) + assert m + 5 == fitz.Matrix(6, 7, 8, 9, 10, 11) + assert p in r + assert p * m == fitz.Point(12, 16) + assert r * m == fitz.Rect(9, 12, 13, 18) + assert (fitz.Rect(1, 1, 2, 2) & fitz.Rect(3, 3, 4, 4)).is_empty + assert not fitz.Rect(1, 1, 2, 2).intersects((3, 3, 4, 4)) + failed = False + try: + x = m + p + except: + failed = True + assert failed + failed = False + try: + x = m + r + except: + failed = True + assert failed + failed = False + try: + x = p + r + except: + failed = True + assert failed + failed = False + try: + x = r + m + except: + failed = True + assert failed + failed = False + try: + m in r + except: + failed = True + assert failed + + +def test_quad(): + r = fitz.Rect(10, 10, 20, 20) + q = r.quad + assert q.is_rectangular + assert not q.is_empty + assert q.is_convex + q *= fitz.Matrix(1, 1).preshear(2, 3) + assert not q.is_rectangular + assert not q.is_empty + assert q.is_convex + assert r.tl not in q + assert r not in q + assert r.quad not in q + failed = False + try: + q[5] = fitz.Point() + except: + failed = True + assert failed + + failed = False + try: + q /= (1, 0, 1, 0, 1, 0) + except: + failed = True + assert failed diff --git a/tests/test_linequad.py b/tests/test_linequad.py index 1306225be..9f15116d1 100644 --- a/tests/test_linequad.py +++ b/tests/test_linequad.py @@ -24,8 +24,7 @@ def test_quadcalc(): # let text search find the text returning quad coordinates rl = page.search_for(text, quads=True) searchq = rl[0] - for i in range(4): - assert abs(searchq.ul - lineq.ul) <= 1e-4 - assert abs(searchq.ur - lineq.ur) <= 1e-4 - assert abs(searchq.ll - lineq.ll) <= 1e-4 - assert abs(searchq.lr - lineq.lr) <= 1e-4 + assert abs(searchq.ul - lineq.ul) <= 1e-4 + assert abs(searchq.ur - lineq.ur) <= 1e-4 + assert abs(searchq.ll - lineq.ll) <= 1e-4 + assert abs(searchq.lr - lineq.lr) <= 1e-4 diff --git a/tests/test_nonpdf.py b/tests/test_nonpdf.py index 530419e44..276b77d85 100644 --- a/tests/test_nonpdf.py +++ b/tests/test_nonpdf.py @@ -28,5 +28,5 @@ def test_pageids(): def test_layout(): """Memorize a page location, re-layout with ISO-A4, assert pre-determined location.""" loc = doc.make_bookmark((5, 11)) - doc.layout(fitz.Rect(fitz.PaperRect("a4"))) + doc.layout(fitz.Rect(fitz.paper_rect("a4"))) assert doc.find_bookmark(loc) == (5, 6) diff --git a/tests/test_optional_content.py b/tests/test_optional_content.py new file mode 100644 index 000000000..5f9cfab16 --- /dev/null +++ b/tests/test_optional_content.py @@ -0,0 +1,62 @@ +""" +Test of Optional Content code. +""" +import os + +import fitz + +scriptdir = os.path.abspath(os.path.dirname(__file__)) +filename = os.path.join(scriptdir, "resources", "joined.pdf") + + +def test_oc1(): + """Arbitrary calls to OC code to get coverage.""" + doc = fitz.open() + ocg1 = doc.add_ocg("ocg1") + ocg2 = doc.add_ocg("ocg2") + ocg3 = doc.add_ocg("ocg3") + ocmd1 = doc.set_ocmd(xref=0, ocgs=(ocg1, ocg2)) + doc.set_layer(-1) + doc.add_layer("layer1") + test = doc.get_layer() + test = doc.get_layers() + test = doc.get_ocgs() + test = doc.layer_ui_configs() + doc.switch_layer(0) + + +def test_oc2(): + # source file with at least 4 pages + src = fitz.open(filename) + + # new PDF with one page + doc = fitz.open() + page = doc.newPage() + + # define the 4 rectangle quadrants to receive the source pages + r0 = page.rect / 2 + r1 = r0 + (r0.width, 0, r0.width, 0) + r2 = r0 + (0, r0.height, 0, r0.height) + r3 = r2 + (r2.width, 0, r2.width, 0) + + # make 4 OCGs - one for each source page image. + # only first is ON initially + ocg0 = doc.add_ocg("ocg0", on=True) + ocg1 = doc.add_ocg("ocg1", on=False) + ocg2 = doc.add_ocg("ocg2", on=False) + ocg3 = doc.add_ocg("ocg3", on=False) + + ocmd0 = doc.set_ocmd(ve=["and", ocg0, ["not", ["or", ocg1, ocg2, ocg3]]]) + ocmd1 = doc.set_ocmd(ve=["and", ocg1, ["not", ["or", ocg0, ocg2, ocg3]]]) + ocmd2 = doc.set_ocmd(ve=["and", ocg2, ["not", ["or", ocg1, ocg0, ocg3]]]) + ocmd3 = doc.set_ocmd(ve=["and", ocg3, ["not", ["or", ocg1, ocg2, ocg0]]]) + ocmds = (ocmd0, ocmd1, ocmd2, ocmd3) + # insert the 4 source page images, each connected to one OCG + page.showPDFpage(r0, src, 0, oc=ocmd0) + page.showPDFpage(r1, src, 1, oc=ocmd1) + page.showPDFpage(r2, src, 2, oc=ocmd2) + page.showPDFpage(r3, src, 3, oc=ocmd3) + xobj_ocmds = [doc.get_oc(item[0]) for item in page.get_xobjects() if item[1] != 0] + assert set(ocmds) <= set(xobj_ocmds) + assert set((ocg0, ocg1, ocg2, ocg3)) == set(tuple(doc.get_ocgs().keys())) + doc.get_ocmd(ocmd0) diff --git a/tests/test_pagedelete.py b/tests/test_pagedelete.py index dba09bb6f..42be88fe8 100644 --- a/tests/test_pagedelete.py +++ b/tests/test_pagedelete.py @@ -42,7 +42,7 @@ def test_deletion(): # Test page deletion. # Delete pages in range and verify result - doc.delete_pages(r) + del doc[r] assert not doc.has_links() # verify all links have gone assert doc.page_count == page_count - len(r) # correct number deleted? toc_new = doc.get_toc() # this is the modified TOC @@ -60,3 +60,10 @@ def test_deletion(): text = doc[pno].get_text().replace("\n", "") # toc text must equal text on page assert text == item[1] + + doc.delete_page(0) # just for the coverage stats + del doc[5:10] + doc.select(range(doc.page_count)) + doc.copy_page(0) + doc.move_page(0) + doc.fullcopy_page(0) diff --git a/tests/test_pixmap.py b/tests/test_pixmap.py index 99524ebeb..f613ee852 100644 --- a/tests/test_pixmap.py +++ b/tests/test_pixmap.py @@ -5,6 +5,7 @@ * pixmap from file and from binary image and compare """ import os +import tempfile import fitz @@ -48,4 +49,41 @@ def test_filepixmap(): stream = open(imgfile, "rb").read() pix2 = fitz.Pixmap(stream) assert repr(pix1) == repr(pix2) - assert pix1.samples == pix2.samples + assert pix1.digest == pix2.digest + + +def test_pilsave(): + # pixmaps from file then save to pillow image + # make pixmap from this and confirm equality + pix1 = fitz.Pixmap(imgfile) + stream = pix1.pil_tobytes("JPEG") + pix2 = fitz.Pixmap(stream) + assert repr(pix1) == repr(pix2) + + +def test_save(): + # pixmaps from file then save to image + # make pixmap from this and confirm equality + pix1 = fitz.Pixmap(imgfile) + stream2 = pix1.tobytes("png") + fp = tempfile.TemporaryFile() + pix1.save(str(fp.name), output="png") + try: + os.remove(fp.name) + except: + pass + + +def test_setalpha(): + # pixmap from JPEG file, then add an alpha channel + # with 30% transparency + pix1 = fitz.Pixmap(imgfile) + opa = int(255 * 0.3) # corresponding to 30% transparency + alphas = [opa] * (pix1.width * pix1.height) + alphas = bytearray(alphas) + pix2 = fitz.Pixmap(pix1, 1) # add alpha channel + pix2.set_alpha(alphas) # make image 30% transparent + samples = pix2.samples # copy of samples + # confirm correct the alpha bytes + t = bytearray([samples[i] for i in range(3, len(samples), 4)]) + assert t == alphas diff --git a/tests/test_textbox.py b/tests/test_textbox.py index e18784c12..ddb55c5b0 100644 --- a/tests/test_textbox.py +++ b/tests/test_textbox.py @@ -29,14 +29,15 @@ def test_textbox1(): align=fitz.TEXT_ALIGN_LEFT, fontsize=12, ) - tw.write_text(page) + tw.write_text(page, morph=(rect.tl, fitz.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect) def test_textbox2(): - """Basic text insertion.""" + """Use basic text insertion.""" doc = fitz.open() + ocg = doc.add_ocg("ocg1") page = doc.new_page() rect = fitz.Rect(50, 50, 400, 400) blue = (0, 0, 1) @@ -46,6 +47,49 @@ def test_textbox2(): align=fitz.TEXT_ALIGN_LEFT, fontsize=12, color=blue, + oc=ocg, ) # check text containment assert page.get_text() == page.get_text(clip=rect) + + +def test_textbox3(): + """Use TextWriter for text insertion.""" + doc = fitz.open() + page = doc.new_page() + rect = fitz.Rect(50, 50, 400, 400) + blue = (0, 0, 1) + tw = fitz.TextWriter(page.rect, color=blue) + tw.fill_textbox( + rect, + text, + align=fitz.TEXT_ALIGN_LEFT, + fontsize=12, + right_to_left=True, + ) + tw.write_text(page, morph=(rect.tl, fitz.Matrix(1, 1))) + # check text containment + assert page.get_text() == page.get_text(clip=rect) + doc.scrub() + doc.subset_fonts() + + +def test_textbox4(): + """Use TextWriter for text insertion.""" + doc = fitz.open() + ocg = doc.add_ocg("ocg1") + page = doc.new_page() + rect = fitz.Rect(50, 50, 400, 600) + blue = (0, 0, 1) + tw = fitz.TextWriter(page.rect, color=blue) + tw.fill_textbox( + rect, + text, + align=fitz.TEXT_ALIGN_LEFT, + fontsize=12, + font=fitz.Font("cour"), + right_to_left=True, + ) + tw.write_text(page, oc=ocg, morph=(rect.tl, fitz.Matrix(1, 1))) + # check text containment + assert page.get_text() == page.get_text(clip=rect) diff --git a/tests/test_textextract.py b/tests/test_textextract.py new file mode 100644 index 000000000..3f5d6ec46 --- /dev/null +++ b/tests/test_textextract.py @@ -0,0 +1,28 @@ +""" +Exract page text in various formats. +No checks performed - just contribute to code coverage. +""" +import os + +import fitz + +scriptdir = os.path.abspath(os.path.dirname(__file__)) +filename = os.path.join(scriptdir, "resources", "symbol-list.pdf") + + +def test_extract1(): + doc = fitz.open(filename) + page = doc[0] + text = page.get_text("text") + blocks = page.get_text("blocks") + words = page.get_text("words") + d1 = page.get_text("dict") + d2 = page.get_text("json") + d3 = page.get_text("rawdict") + d3 = page.get_text("rawjson") + text = page.get_text("html") + text = page.get_text("xhtml") + text = page.get_text("xml") + rects = fitz.get_highlight_selection(page, start=page.rect.tl, stop=page.rect.br) + text = fitz.ConversionHeader("xml") + text = fitz.ConversionTrailer("xml") diff --git a/tests/test_textsearch.py b/tests/test_textsearch.py index dae18794b..602870966 100644 --- a/tests/test_textsearch.py +++ b/tests/test_textsearch.py @@ -15,5 +15,6 @@ def test_search(): page = doc[0] needle = "mupdf" rlist = page.search_for(needle) + assert rlist != [] for rect in rlist: assert needle in page.get_textbox(rect).lower() diff --git a/tests/test_toc.py b/tests/test_toc.py index fe6456b13..d23dd38e1 100644 --- a/tests/test_toc.py +++ b/tests/test_toc.py @@ -4,11 +4,11 @@ * Verify manipulation of single TOC item works """ import os - import fitz scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "001003ED.pdf") +filename2 = os.path.join(scriptdir, "resources", "2.pdf") full_toc = os.path.join(scriptdir, "resources", "full_toc.txt") simple_toc = os.path.join(scriptdir, "resources", "simple_toc.txt") doc = fitz.open(filename) @@ -32,6 +32,7 @@ def test_erase_toc(): def test_setcolors(): + doc = fitz.open(filename2) toc = doc.get_toc(False) for i in range(len(toc)): d = toc[i][3] @@ -40,9 +41,11 @@ def test_setcolors(): d["italic"] = True doc.set_toc_item(i, dest_dict=d) - toc = doc.get_toc(False) - for t in toc: + toc2 = doc.get_toc(False) + assert len(toc2) == len(toc) + + for t in toc2: d = t[3] assert d["bold"] - assert d["iatlic"] - assrtd["color"] == (1, 0, 0) + assert d["italic"] + assert d["color"] == (1, 0, 0) diff --git a/tests/test_widgets.py b/tests/test_widgets.py new file mode 100644 index 000000000..31d6dcff2 --- /dev/null +++ b/tests/test_widgets.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +""" +Test PDF field (widget) insertion. +""" +import fitz + +doc = fitz.open() +page = doc.new_page() +gold = (1, 1, 0) # define some colors +blue = (0, 0, 1) +gray = (0.9, 0.9, 0.9) +fontsize = 11.0 # define a fontsize +lineheight = fontsize + 4.0 +rect = fitz.Rect(50, 72, 400, 200) + + +def test_text(): + doc = fitz.open() + page = doc.new_page() + widget = fitz.Widget() # create a widget object + widget.border_color = blue # border color + widget.border_width = 0.3 # border width + widget.border_style = "d" + widget.border_dashes = (2, 3) + widget.field_name = "Textfield-1" # field name + widget.field_label = "arbitrary text - e.g. to help filling the field" + widget.field_type = fitz.PDF_WIDGET_TYPE_TEXT # field type + widget.fill_color = gold # field background + widget.rect = rect # set field rectangle + widget.text_color = blue # rext color + widget.text_font = "TiRo" # use font Times-Roman + widget.text_fontsize = fontsize # set fontsize + widget.text_maxlen = 50 # restrict number of characters + widget.field_value = "Times-Roman" + page.add_widget(widget) # create the field + field = page.first_widget + assert field.field_type_string == "Text" + + +def test_checkbox(): + doc = fitz.open() + page = doc.new_page() + widget = fitz.Widget() + widget.border_style = "b" + widget.field_name = "Button-1" + widget.field_label = "a simple check box button" + widget.field_type = fitz.PDF_WIDGET_TYPE_CHECKBOX + # widget.field_flags = + widget.fill_color = gold + widget.rect = rect + widget.text_color = blue + widget.text_font = "ZaDb" + widget.field_value = True + page.add_widget(widget) # create the field + field = page.first_widget + assert field.field_type_string == "CheckBox" + + +def test_listbox(): + doc = fitz.open() + page = doc.new_page() + widget = fitz.Widget() + widget.field_name = "ListBox-1" + widget.field_label = "is not a drop down: scroll with cursor in field" + widget.field_type = fitz.PDF_WIDGET_TYPE_LISTBOX + widget.fill_color = gold + widget.choice_values = ( + "Frankfurt", + "Hamburg", + "Stuttgart", + "Hannover", + "Berlin", + "München", + "Köln", + "Potsdam", + ) + widget.rect = rect + widget.text_color = blue + widget.text_fontsize = fontsize + widget.field_flags = fitz.PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE + widget.field_value = widget.choice_values[-1] + print("About to add '%s'" % widget.field_name) + page.add_widget(widget) # create the field + field = page.first_widget + assert field.field_type_string == "ListBox" + + +def test_combobox(): + doc = fitz.open() + page = doc.new_page() + widget = fitz.Widget() + widget.field_flags = fitz.PDF_CH_FIELD_IS_EDIT # make field editable + widget.field_name = "ComboBox-1" + widget.field_label = "an editable combo box ..." + widget.field_type = fitz.PDF_WIDGET_TYPE_COMBOBOX + widget.fill_color = gold + widget.choice_values = ( + "Spanien", + "Frankreich", + "Holland", + "Dänemark", + "Schweden", + "Norwegen", + "England", + "Polen", + "Russland", + "Italien", + "Portugal", + "Griechenland", + ) + widget.rect = rect + widget.text_color = blue + widget.text_fontsize = fontsize + widget.field_flags = fitz.PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE + widget.field_value = widget.choice_values[-1] + page.add_widget(widget) # create the field + field = page.first_widget + assert field.field_type_string == "ComboBox" + + +def test_text2(): + doc = fitz.open() + doc.new_page() + page = [p for p in doc.pages()][0] + widget = fitz.Widget() + widget.field_name = "textfield-2" + widget.field_label = "multi-line text with tabs is also possible!" + widget.field_flags = fitz.PDF_TX_FIELD_IS_MULTILINE + widget.field_type = fitz.PDF_WIDGET_TYPE_TEXT + widget.fill_color = gray + widget.rect = rect + widget.text_color = blue + widget.text_font = "TiRo" + widget.text_fontsize = fontsize + widget.field_value = "This\n\tis\n\t\ta\n\t\t\tmulti-\n\t\tline\n\ttext." + page.add_widget(widget) # create the field + widgets = [w for w in page.widgets()] + field = widgets[0] + assert field.field_type_string == "Text" \ No newline at end of file