From f22cfd6aeaab44cbc5feda5ef76affe7e98009e0 Mon Sep 17 00:00:00 2001 From: Abel Salgado Romero Date: Sat, 2 Sep 2023 18:41:35 +0200 Subject: [PATCH] Bump org.apache.pdfbox:pdfbox to v3.0.0 Only updates test utilities --- .../org/asciidoctor/WhenBackendIsPdf.java | 5 +- .../org/asciidoctor/util/RougeColors.java | 2 +- .../asciidoctor/util/pdf/ColorsProcessor.java | 122 ++++++-------- .../java/org/asciidoctor/util/pdf/Image.java | 64 ++------ .../asciidoctor/util/pdf/ImageProcessor.java | 150 +++++------------- .../resources/ColorsProcessor.properties | 36 ----- build.gradle | 2 +- 7 files changed, 106 insertions(+), 275 deletions(-) delete mode 100644 asciidoctorj-pdf/src/test/resources/pdfbox/resources/ColorsProcessor.properties diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/WhenBackendIsPdf.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/WhenBackendIsPdf.java index 9e6349d..685a097 100644 --- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/WhenBackendIsPdf.java +++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/WhenBackendIsPdf.java @@ -1,5 +1,6 @@ package org.asciidoctor; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.asciidoctor.util.RougeColors; import org.asciidoctor.util.pdf.ColorsProcessor; import org.asciidoctor.util.pdf.ImageProcessor; @@ -55,6 +56,7 @@ public void pdf_source_code_should_be_highlighted() throws IOException { ColorsProcessor colorsProcessor = new ColorsProcessor("program", "System.out.println", "printHello", "HelloWorld", "", "else", "Math.sqrt"); colorsProcessor.parse(outputFile1.getAbsolutePath()); Map> colors = colorsProcessor.getColors(); + assertThat(colors.get("program").get(0), equalTo(RougeColors.GREY)); assertThat(colors.get("System.out.println").get(0), equalTo(RougeColors.LIGHT_BLUE)); assertThat(colors.get("printHello").get(0), equalTo(RougeColors.DARK_BLUE)); @@ -101,9 +103,8 @@ public void pdf_text_should_be_hyphenated_english() throws IOException { private void removeFileIfItExists(File file) throws IOException { if (file.exists()) { if (!file.delete()) { - throw new IOException("can't delete file"); + throw new IOException("Can't delete file"); } } } - } diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/RougeColors.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/RougeColors.java index 3855411..236c941 100644 --- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/RougeColors.java +++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/RougeColors.java @@ -7,7 +7,7 @@ * * @author abelsromero */ -public class RougeColors { +public final class RougeColors { public static final Color GREEN = new Color(0,136,0); diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ColorsProcessor.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ColorsProcessor.java index d382bde..76bacd0 100644 --- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ColorsProcessor.java +++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ColorsProcessor.java @@ -1,22 +1,23 @@ package org.asciidoctor.util.pdf; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.contentstream.operator.color.*; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.util.PDFTextStripper; -import org.apache.pdfbox.util.ResourceLoader; -import org.apache.pdfbox.util.TextPosition; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.pdfbox.text.TextPosition; import java.awt.*; -import java.io.IOException; -import java.util.*; +import java.io.*; import java.util.List; +import java.util.*; /** * Parses a PDF document looking for certain words, if found it stores the * associated colors. - * + *

* Note: currently stores the color of the last character in fact. + * Based on https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextColors.java?revision=1904918&view=markup * * @author abelsromero */ @@ -29,46 +30,29 @@ public class ColorsProcessor extends PDFTextStripper { /** * List of words mapped to the different colors in which they appear */ - private Map> colors = new HashMap>(); + private Map> colors = new HashMap<>(); - /** - * Constructor - * - * @param words Words to look for into the document - * - * @throws java.io.IOException If there is an error loading text stripper properties. - */ - public ColorsProcessor(String... words) throws IOException { - super(ResourceLoader.loadProperties( - "pdfbox/resources/ColorsProcessor.properties", true)); - super.setSortByPosition(true); + public ColorsProcessor(String... words) { + addOperator(new SetStrokingColorSpace(this)); + addOperator(new SetNonStrokingColorSpace(this)); + addOperator(new SetStrokingDeviceCMYKColor(this)); + addOperator(new SetNonStrokingDeviceCMYKColor(this)); + addOperator(new SetNonStrokingDeviceRGBColor(this)); + addOperator(new SetStrokingDeviceRGBColor(this)); + addOperator(new SetNonStrokingDeviceGrayColor(this)); + addOperator(new SetStrokingDeviceGrayColor(this)); + addOperator(new SetStrokingColor(this)); + addOperator(new SetStrokingColorN(this)); + addOperator(new SetNonStrokingColor(this)); + addOperator(new SetNonStrokingColorN(this)); + setSortByPosition(true); this.words = Arrays.asList(words); } - /** - * Parses a document extracting the colors for the specified words in - * the constructor - * - * @param filename PDF document path - */ - public void parse (String filename) throws IOException { - PDDocument document = null; - try { - document = PDDocument.load(filename, false); - List allPages = document.getDocumentCatalog().getAllPages(); - for( int i=0; i * Note: \00A0: non break space */ private static final List TERMINALS = Arrays.asList(" ", "\n", "\t", "(", ")", "\u00A0"); /** * Processes text events. - * + *

* Stores characters in a buffer until a terminal symbol is found * (e.g. space), then treats the characters stored as a single word. * * @param text The text to be processed */ @Override - protected void processTextPosition( TextPosition text ) { - String chars = text.getCharacter(); + protected void processTextPosition(TextPosition text) { +// super.processTextPosition(text); + + String chars = text.toString(); // Some line breaks do not enter here, I ignore why if (TERMINALS.contains(chars)) { String word = charsBuffer.toString(); if (words.contains(word)) { - addColor(charsBuffer.toString(), previousColor); + registerColor(charsBuffer.toString(), previousColor); } charsBuffer = new StringBuffer(); } else { charsBuffer.append(chars); previousText = text; - try { - previousColor = getGraphicsState().getNonStrokingColor().getJavaColor(); - } catch (IOException e) { - e.printStackTrace(); - } + previousColor = getGraphicsState().getNonStrokingColor(); } - } - /** - * Adds a color mapping to the colors attribute - * - * @param word Word to add - * @param color Color of the word - */ - private void addColor(String word, Color color) { + private void registerColor(String word, PDColor color) { List values = colors.get(word); if (values == null) { - List aux = new ArrayList(); - aux.add(color); + List aux = new ArrayList<>(); + aux.add(toRGB(color)); colors.put(word, aux); } else { - values.add(color); + values.add(toRGB(color)); } } - /** - * Returns the words and their colors after parsing a file - * - * @return List of found images - */ + private Color toRGB(PDColor pdColor) { + float[] components = pdColor.getComponents(); + // Rough conversion, but enough for out tests + int r = Float.valueOf(256 * components[0]).intValue(); + int g = Float.valueOf(256 * components[1]).intValue(); + int b = Float.valueOf(256 * components[2]).intValue(); + return new Color(r, g, b); + } + public Map> getColors() { return colors; } - } diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/Image.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/Image.java index e8a5d55..d7fdcf7 100644 --- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/Image.java +++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/Image.java @@ -5,76 +5,42 @@ * * @author abelsromero */ -public class Image { +final class Image { // Page where the image is localed - private int page; - + private final int page; // Position inside the page - private float xPosition; - private float yPosition; - + private final float xPosition; + private final float yPosition; // size in pixels - private int originalWidth; - private int originalHeight; + private final int originalWidth; + private final int originalHeight; - // size in pixels - private int renderedWidth; - private int renderedHeight; + Image(int page, float xPosition, float yPosition, int originalWidth, int originalHeight) { + this.page = page; + this.xPosition = xPosition; + this.yPosition = yPosition; + this.originalWidth = originalWidth; + this.originalHeight = originalHeight; + } public int getPage() { return page; } - public void setPage(int page) { - this.page = page; - } - - public float getXPosition() { + public float getxPosition() { return xPosition; } - public void setXPosition(float xPosition) { - this.xPosition = xPosition; - } - - public float getYPosition() { + public float getyPosition() { return yPosition; } - public void setYPosition(float yPosition) { - this.yPosition = yPosition; - } - public int getOriginalWidth() { return originalWidth; } - public void setOriginalWidth(int originalWidth) { - this.originalWidth = originalWidth; - } - public int getOriginalHeight() { return originalHeight; } - - public void setOriginalHeight(int originalHeight) { - this.originalHeight = originalHeight; - } - - public int getRenderedWidth() { - return renderedWidth; - } - - public void setRenderedWidth(int renderedWidth) { - this.renderedWidth = renderedWidth; - } - - public int getRenderedHeight() { - return renderedHeight; - } - - public void setRenderedHeight(int renderedHeight) { - this.renderedHeight = renderedHeight; - } } diff --git a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ImageProcessor.java b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ImageProcessor.java index 42a052a..9ac8d61 100644 --- a/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ImageProcessor.java +++ b/asciidoctorj-pdf/src/test/java/org/asciidoctor/util/pdf/ImageProcessor.java @@ -1,150 +1,72 @@ package org.asciidoctor.util.pdf; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.contentstream.PDFStreamEngine; +import org.apache.pdfbox.contentstream.operator.DrawObject; +import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; +import org.apache.pdfbox.contentstream.operator.state.*; +import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.graphics.PDGraphicsState; -import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject; -import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectForm; -import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; +import org.apache.pdfbox.pdmodel.PDPageTree; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.util.Matrix; -import org.apache.pdfbox.util.PDFOperator; -import org.apache.pdfbox.util.PDFStreamEngine; -import org.apache.pdfbox.util.ResourceLoader; +import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Map; /** * Processes a PDF document to extract the metadata of the contained images. - * - * Based on https://svn.apache.org/viewvc/pdfbox/tags/1.8.9/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java - * - * @author abelsromero + *

+ * Based on https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java?revision=1904918&view=markup */ public class ImageProcessor extends PDFStreamEngine { - - private static final String INVOKE_OPERATOR = "Do"; private int currentPage = 0; - private List images = new ArrayList<>(); + private final List images = new ArrayList<>(); - /** - * Default constructor - * - * @throws IOException If there is an error loading text stripper properties. - */ - public ImageProcessor() throws IOException { - super(ResourceLoader.loadProperties( - "org/apache/pdfbox/resources/PDFTextStripper.properties", true)); + public ImageProcessor() { + addOperator(new Concatenate(this)); + addOperator(new DrawObject(this)); + addOperator(new SetGraphicsStateParameters(this)); + addOperator(new Save(this)); + addOperator(new Restore(this)); + addOperator(new SetMatrix(this)); } - /** - * Parses a document extracting the images - * - * @param filename PDF document path - */ public void parse(String filename) throws IOException { - PDDocument document = null; - try { - document = PDDocument.load(filename, false); - List allPages = document.getDocumentCatalog().getAllPages(); - for( int i=0; i xobjects = getResources().getXObjects(); - PDXObject xobject = xobjects.get( objectName.getName() ); - - if (xobject instanceof PDXObjectImage) { - PDXObjectImage image = (PDXObjectImage)xobject; - int imageWidth = image.getWidth(); - int imageHeight = image.getHeight(); - PDPage page = getCurrentPage(); - double pageHeight = page.getMediaBox().getHeight(); + @Override + protected void processOperator(Operator operator, List operands) throws IOException { - Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix(); - float yScaling = ctmNew.getYScale(); - float angle = (float)Math.acos(ctmNew.getValue(0, 0)/ctmNew.getXScale()); - if (ctmNew.getValue(0, 1) < 0 && ctmNew.getValue(1, 0) > 0) { - angle = (-1)*angle; - } - ctmNew.setValue(2, 1, (float)(pageHeight - ctmNew.getYPosition() - Math.cos(angle)*yScaling)); - ctmNew.setValue(2, 0, (float)(ctmNew.getXPosition() - Math.sin(angle)*yScaling)); - // because of the moved 0,0-reference, we have to shear in the opposite direction - ctmNew.setValue(0, 1, (-1)*ctmNew.getValue(0, 1)); - ctmNew.setValue(1, 0, (-1)*ctmNew.getValue(1, 0)); + if (OperatorName.DRAW_OBJECT.equals(operator.getName())) { + COSName objectName = (COSName) operands.get(0); + PDXObject xobject = getResources().getXObject(objectName); - Image im = new Image(); - im.setPage(currentPage); - im.setXPosition(ctmNew.getXPosition()); - im.setYPosition(ctmNew.getYPosition()); - im.setOriginalWidth(imageWidth); - im.setOriginalHeight(imageHeight); - im.setRenderedWidth(Math.round(ctmNew.getXScale())); - im.setRenderedHeight(Math.round(ctmNew.getYScale())); + if (xobject instanceof PDImageXObject) { + final PDImageXObject pdImage = (PDImageXObject) xobject; + final Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix(); - images.add(im); - } else if (xobject instanceof PDXObjectForm) { - // save the graphics state - getGraphicsStack().push( (PDGraphicsState)getGraphicsState().clone() ); - PDPage page = getCurrentPage(); - - PDXObjectForm form = (PDXObjectForm)xobject; - COSStream invoke = (COSStream)form.getCOSObject(); - PDResources pdResources = form.getResources(); - if(pdResources == null) - { - pdResources = page.findResources(); - } - // if there is an optional form matrix, we have to - // map the form space to the user space - Matrix matrix = form.getMatrix(); - if (matrix != null) - { - Matrix xobjectCTM = matrix.multiply( getGraphicsState().getCurrentTransformationMatrix()); - getGraphicsState().setCurrentTransformationMatrix(xobjectCTM); - } - processSubStream( page, pdResources, invoke ); - - // restore the graphics state - setGraphicsState( getGraphicsStack().pop() ); + final Image image = new Image(currentPage, ctmNew.getTranslateX(), ctmNew.getTranslateY(), pdImage.getWidth(), pdImage.getHeight()); + images.add(image); } } else { - super.processOperator( operator, arguments ); + super.processOperator(operator, operands); } } - /** - * Returns the list of found images after parsing a file. - * - * @return List of found images - */ public List getImages() { return images; } diff --git a/asciidoctorj-pdf/src/test/resources/pdfbox/resources/ColorsProcessor.properties b/asciidoctorj-pdf/src/test/resources/pdfbox/resources/ColorsProcessor.properties deleted file mode 100644 index 664ce9a..0000000 --- a/asciidoctorj-pdf/src/test/resources/pdfbox/resources/ColorsProcessor.properties +++ /dev/null @@ -1,36 +0,0 @@ -b=org.apache.pdfbox.util.operator.pagedrawer.CloseFillNonZeroAndStrokePath -b*=org.apache.pdfbox.util.operator.pagedrawer.CloseFillEvenOddAndStrokePath -BT=org.apache.pdfbox.util.operator.BeginText -cm=org.apache.pdfbox.util.operator.Concatenate -CS=org.apache.pdfbox.util.operator.SetStrokingColorSpace -cs=org.apache.pdfbox.util.operator.SetNonStrokingColorSpace -ET=org.apache.pdfbox.util.operator.EndText -G=org.apache.pdfbox.util.operator.SetStrokingGrayColor -g=org.apache.pdfbox.util.operator.SetNonStrokingGrayColor -gs=org.apache.pdfbox.util.operator.SetGraphicsStateParameters -K=org.apache.pdfbox.util.operator.SetStrokingCMYKColor -k=org.apache.pdfbox.util.operator.SetNonStrokingCMYKColor -q=org.apache.pdfbox.util.operator.GSave -Q=org.apache.pdfbox.util.operator.GRestore -RG=org.apache.pdfbox.util.operator.SetStrokingRGBColor -rg=org.apache.pdfbox.util.operator.SetNonStrokingRGBColor -s=org.apache.pdfbox.util.operator.CloseAndStrokePath -SC=org.apache.pdfbox.util.operator.SetStrokingColor -sc=org.apache.pdfbox.util.operator.SetNonStrokingColor -SCN=org.apache.pdfbox.util.operator.SetStrokingColor -scn=org.apache.pdfbox.util.operator.SetNonStrokingColor -T*=org.apache.pdfbox.util.operator.NextLine -Tc=org.apache.pdfbox.util.operator.SetCharSpacing -Td=org.apache.pdfbox.util.operator.MoveText -TD=org.apache.pdfbox.util.operator.MoveTextSetLeading -Tf=org.apache.pdfbox.util.operator.SetTextFont -Tj=org.apache.pdfbox.util.operator.ShowText -TJ=org.apache.pdfbox.util.operator.ShowTextGlyph -TL=org.apache.pdfbox.util.operator.SetTextLeading -Tm=org.apache.pdfbox.util.operator.SetMatrix -Tr=org.apache.pdfbox.util.operator.SetTextRenderingMode -Ts=org.apache.pdfbox.util.operator.SetTextRise -Tw=org.apache.pdfbox.util.operator.SetWordSpacing -Tz=org.apache.pdfbox.util.operator.SetHorizontalTextScaling -\'=org.apache.pdfbox.util.operator.MoveAndShow -\"=org.apache.pdfbox.util.operator.SetMoveAndShow diff --git a/build.gradle b/build.gradle index ee56857..1107f3c 100644 --- a/build.gradle +++ b/build.gradle @@ -34,7 +34,7 @@ ext { arquillianVersion = '1.1.10.Final' arquillianSpockVersion = '1.0.0.Beta3' jrubyVersion = '9.4.0.0' - pdfboxVersion = '1.8.17' + pdfboxVersion = '3.0.0' junitVersion = '4.13.2' hamcrestVersion = '2.2'