Skip to content

Commit

Permalink
NXP-8918: allow PDF/A conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
Florent Guillaume committed Feb 24, 2012
1 parent 09cc14f commit e8475de
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 48 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2006-2009 Nuxeo SAS (http://nuxeo.com/) and contributors.
* (C) Copyright 2006-2012 Nuxeo SAS (http://nuxeo.com/) and contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser General Public License
Expand All @@ -12,17 +12,16 @@
* Lesser General Public License for more details.
*
* Contributors:
* Nuxeo - initial API and implementation
*
* $Id$
* Nuxeo
* Florent Guillaume
*/

package org.nuxeo.ecm.platform.convert.plugins;

import java.io.File;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -50,6 +49,10 @@

import com.sun.star.uno.RuntimeException;

/**
* Converter based on JOD which uses an external OpenOffice process to do actual
* conversions.
*/
public class JODBasedConverter implements ExternalConverter {

protected static final String TMP_PATH_PARAMETER = "TmpDirectory";
Expand All @@ -58,6 +61,21 @@ public class JODBasedConverter implements ExternalConverter {

private static final DocumentFormatRegistry formatRegistry = new DefaultDocumentFormatRegistry();

/**
* Boolean conversion parameter for PDF/A-1.
*
* @since 5.6
*/
public static final String PDFA1_PARAM = "PDF/A-1";

protected static final Map<DocumentFamily, String> PDF_FILTER_NAMES = new HashMap<DocumentFamily, String>();
{
PDF_FILTER_NAMES.put(DocumentFamily.TEXT, "writer_pdf_Export");
PDF_FILTER_NAMES.put(DocumentFamily.SPREADSHEET, "calc_pdf_Export");
PDF_FILTER_NAMES.put(DocumentFamily.PRESENTATION, "impress_pdf_Export");
PDF_FILTER_NAMES.put(DocumentFamily.DRAWING, "draw_pdf_Export");
}

protected ConverterDescriptor descriptor;

protected String getDestinationMimeType() {
Expand All @@ -69,11 +87,42 @@ protected String getDestinationMimeType() {
* <p>
* It takes the actual destination mimetype from the plugin configuration.
*
* @return the DestinationFormat for this given plugin. {@see
* org.nuxeo.ecm.platform.transform.interfaces.Plugin}
* @param sourceFormat the source format
* @param pdfa1 true if PDF/A-1 is required
*/
private DocumentFormat getDestinationFormat() {
return formatRegistry.getFormatByMediaType(getDestinationMimeType());
protected DocumentFormat getDestinationFormat(DocumentFormat sourceFormat,
boolean pdfa1) {
String mimeType = getDestinationMimeType();
boolean topdf = "application/pdf".equals(mimeType);
boolean html2pdf = "text/html".equals(sourceFormat.getMediaType())
&& topdf;
DocumentFormat destinationFormat;
if (topdf) {
destinationFormat = new DocumentFormat(pdfa1 ? "PDF/A-1" : "PDF",
"pdf", "application/pdf");
Map<String, Object> storeProperties = new HashMap<String, Object>();
DocumentFamily sourceFamily = sourceFormat.getInputFamily();
String filterName;
if (html2pdf) {
// we have to be strict regarding output FilterName,
// use "writer_web_pdf_Export" instead of "writer_pdf_Export"
filterName = "writer_web_pdf_Export";
} else {
filterName = PDF_FILTER_NAMES.get(sourceFamily);
}
storeProperties.put("FilterName", filterName);
if (pdfa1) {
Map<String, Object> filterData = new HashMap<String, Object>();
filterData.put("SelectPdfVersion", Integer.valueOf(1)); // PDF/A-1
filterData.put("UseTaggedPDF", Boolean.TRUE); // per spec
storeProperties.put("FilterData", filterData);
}
destinationFormat.setStoreProperties(sourceFamily, storeProperties);
} else {
// use default JODConverter registry
destinationFormat = formatRegistry.getFormatByMediaType(mimeType);
}
return destinationFormat;
}

/**
Expand Down Expand Up @@ -104,26 +153,7 @@ protected void finalize() throws Throwable {
super.finalize();
}

private static boolean adaptFilterNameForHTML2PDF(DocumentFormat sourceFormat,
DocumentFormat destinationFormat) {

// TODO: solve this
// due to a random bug, we have to be strict regarding otuput FilterName
// html file have to use "writer_web_pdf_Export" instead of JODConverter
// simplification "writer_pdf_Export"
// patch dynamically
if ("text/html".equals(sourceFormat.getMediaType())
&& "application/pdf".equals(destinationFormat.getMediaType())) {
// change the FilterName
DocumentFamily family = sourceFormat.getInputFamily();
Map<String, String> storeProperties = new HashMap<String, String>();
storeProperties.put("FilterName", "writer_web_pdf_Export");
destinationFormat.setStoreProperties(family, storeProperties);
return true;
}
return false;
}

@Override
public BlobHolder convert(BlobHolder blobHolder,
Map<String, Serializable> parameters) throws ConversionException {
Blob inputBlob;
Expand All @@ -142,6 +172,9 @@ public BlobHolder convert(BlobHolder blobHolder,
// This plugin do deal only with one input source.
String sourceMimetype = inputBlob.getMimeType();

boolean pdfa1 = parameters != null
&& Boolean.TRUE.equals(parameters.get(PDFA1_PARAM));

if (documentConverter != null) {
File sourceFile = null;
File outFile = null;
Expand All @@ -164,13 +197,12 @@ public BlobHolder convert(BlobHolder blobHolder,
// stream.reset(); // works on a JCRBlobInputStream
// }
FileUtils.copyToFile(stream, sourceFile);
DocumentFormat sourceFormat = null;

DocumentFormat sourceFormat = null;
if (sourceMimetype != null) {
// Try to fetch it from the registry.
sourceFormat = getSourceFormat(sourceMimetype);
}

// If not found in the registry or not given as a parameter.
// Try to sniff ! What does that smell ? :)
if (sourceFormat == null) {
Expand All @@ -179,7 +211,8 @@ public BlobHolder convert(BlobHolder blobHolder,

// From plugin settings because we know the destination
// mimetype.
DocumentFormat destinationFormat = getDestinationFormat();
DocumentFormat destinationFormat = getDestinationFormat(
sourceFormat, pdfa1);

// allow HTML2PDF filtering

Expand Down Expand Up @@ -225,8 +258,6 @@ public BlobHolder convert(BlobHolder blobHolder,
}

} else {
adaptFilterNameForHTML2PDF(sourceFormat, destinationFormat);

outFile = File.createTempFile("NXJOOoConverterDocumentOut",
'.' + destinationFormat.getExtension());

Expand Down Expand Up @@ -273,10 +304,12 @@ sourceMimetype, getDestinationMimeType(),

}

@Override
public void init(ConverterDescriptor descriptor) {
this.descriptor = descriptor;
}

@Override
public ConverterCheckResult isConverterAvailable() {
ConverterCheckResult result = new ConverterCheckResult();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
Expand All @@ -35,8 +38,8 @@
import org.nuxeo.ecm.platform.convert.ooomanager.OOoManagerService;
import org.nuxeo.runtime.api.Framework;
import org.nuxeo.runtime.test.NXRuntimeTestCase;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;

public abstract class BaseConverterTest extends Assert {

Expand Down Expand Up @@ -97,4 +100,17 @@ public static String readPdfText(File pdfFile) throws IOException {
return text.trim();
}

public static boolean isPDFA(File pdfFile) throws Exception {
PDDocument pddoc = PDDocument.load(pdfFile);
XMPMetadata xmp = pddoc.getDocumentCatalog().getMetadata().exportXMPMetadata();
Document doc = xmp.getXMPDocument();
// <rdf:Description xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/"
// rdf:about="">
// <pdfaid:part>1</pdfaid:part>
// <pdfaid:conformance>A</pdfaid:conformance>
// </rdf:Description>
NodeList list = doc.getElementsByTagName("pdfaid:conformance");
return list != null && "A".equals(list.item(0).getTextContent());
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2006-2009 Nuxeo SAS (http://nuxeo.com/) and contributors.
* (C) Copyright 2006-2012 Nuxeo SAS (http://nuxeo.com/) and contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser General Public License
Expand All @@ -12,30 +12,37 @@
* Lesser General Public License for more details.
*
* Contributors:
* Nuxeo - initial API and implementation
*
* $Id$
* Nuxeo
* Florent Guillaume
*/

package org.nuxeo.ecm.platform.convert.tests;

import java.io.File;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Test;
import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
import org.nuxeo.ecm.core.convert.api.ConversionService;
import org.nuxeo.ecm.core.convert.api.ConverterCheckResult;
import org.nuxeo.ecm.platform.convert.plugins.JODBasedConverter;
import org.nuxeo.runtime.api.Framework;


public class TestAnyToPDFConverters extends BaseConverterTest {

private static final Log log = LogFactory.getLog(TestAnyToPDFConverters.class);

protected void doTestPDFConverter(String srcMT, String fileName)
throws Exception {
doTestPDFConverter(srcMT, fileName, false); // normal PDF
doTestPDFConverter(srcMT, fileName, true); // PDF/A-1
}

protected void doTestPDFConverter(String srcMT, String fileName,
boolean pdfa) throws Exception {

ConversionService cs = Framework.getLocalService(ConversionService.class);

Expand All @@ -53,14 +60,24 @@ protected void doTestPDFConverter(String srcMT, String fileName)

BlobHolder hg = getBlobFromPath("test-docs/" + fileName, srcMT);

BlobHolder result = cs.convert(converterName, hg, null);
Map<String,Serializable> parameters = new HashMap<String, Serializable>();
if (pdfa) {
parameters.put(JODBasedConverter.PDFA1_PARAM, Boolean.TRUE);
}
BlobHolder result = cs.convert(converterName, hg, parameters);
assertNotNull(result);

File pdfFile = File.createTempFile("testingPDFConverter", ".pdf");
result.getBlob().transferTo(pdfFile);
String text = readPdfText(pdfFile);
assertTrue(text.contains("Hello"));
log.info(srcMT + " to PDF conversion : OK");
try {
result.getBlob().transferTo(pdfFile);
String text = readPdfText(pdfFile);
assertTrue(text.contains("Hello"));
if (pdfa) {
assertTrue("Output is not PDF/A", isPDFA(pdfFile));
}
} finally {
pdfFile.delete();
}
}

@Test
Expand All @@ -75,7 +92,7 @@ public void testAnyToTextConverter() throws Exception {
return;
}

// doTestPDFConverter("text/html", "hello.html");
doTestPDFConverter("text/html", "hello.html");
// doTestPDFConverter("text/xml", "hello.xml");
doTestPDFConverter("application/vnd.ms-excel", "hello.xls");
doTestPDFConverter("application/vnd.sun.xml.writer", "hello.sxw");
Expand Down

0 comments on commit e8475de

Please sign in to comment.