Skip to content

Commit

Permalink
show javascript, bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Frooodle committed Aug 6, 2023
1 parent 379791a commit 54f53be
Show file tree
Hide file tree
Showing 14 changed files with 386 additions and 58 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plugins {
}

group = 'stirling.software'
version = '0.12.0'
version = '0.12.1'
sourceCompatibility = '17'

repositories {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package stirling.software.SPDF.controller.api.other;

import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.*;
import org.apache.pdfbox.pdmodel.PDPageContentStream.*;
import org.springframework.web.bind.annotation.*;
import org.springframework.http.*;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.*;
import io.swagger.v3.oas.annotations.media.*;
import io.swagger.v3.oas.annotations.parameters.*;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.text.TextPosition;
import org.apache.tomcat.util.http.ResponseUtil;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.ArrayList;

import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;

import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.itextpdf.io.font.constants.StandardFonts;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.properties.TextAlignment;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.media.Schema;

import java.io.*;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.text.*;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.*;
import io.swagger.v3.oas.annotations.media.Schema;
import org.springframework.http.ResponseEntity;
@RestController
@Tag(name = "Other", description = "Other APIs")
public class ShowJavascript {

private static final Logger logger = LoggerFactory.getLogger(ShowJavascript.class);
@PostMapping(consumes = "multipart/form-data", value = "/show-javascript")
@Operation(summary = "Extract header from PDF file", description = "This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> extractHeader(
@RequestPart(value = "fileInput") @Parameter(description = "The input PDF file from which the javascript is to be extracted.", required = true) MultipartFile inputFile)
throws Exception {

try (
PdfDocument itextDoc = new PdfDocument(new PdfReader(inputFile.getInputStream()))
) {

String name = "";
String script = "";
String entryName = "File: "+inputFile.getOriginalFilename() + ", Script: ";
//Javascript
PdfDictionary namesDict = itextDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.Names);
if (namesDict != null) {
PdfDictionary javascriptDict = namesDict.getAsDictionary(PdfName.JavaScript);
if (javascriptDict != null) {

PdfArray namesArray = javascriptDict.getAsArray(PdfName.Names);
for (int i = 0; i < namesArray.size(); i += 2) {
if(namesArray.getAsString(i) != null)
name = namesArray.getAsString(i).toString();

PdfObject jsCode = namesArray.get(i+1);
if (jsCode instanceof PdfStream) {
byte[] jsCodeBytes = ((PdfStream)jsCode).getBytes();
String jsCodeStr = new String(jsCodeBytes, StandardCharsets.UTF_8);
script = "//" + entryName + name + "\n" +jsCodeStr;

} else if (jsCode instanceof PdfDictionary) {
// If the JS code is in a dictionary, you'll need to know the key to use.
// Assuming the key is PdfName.JS:
PdfStream jsCodeStream = ((PdfDictionary)jsCode).getAsStream(PdfName.JS);
if (jsCodeStream != null) {
byte[] jsCodeBytes = jsCodeStream.getBytes();
String jsCodeStr = new String(jsCodeBytes, StandardCharsets.UTF_8);
script = "//" + entryName + name + "\n" +jsCodeStr;
}
}
}

}
}
if(script.equals("")) {
script = "PDF '" +inputFile.getOriginalFilename() + "' does not contain Javascript";
}
return WebResponseUtils.bytesToWebResponse(script.getBytes(), name + ".js");
}

}




}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMeta;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import com.itextpdf.kernel.xmp.options.SerializeOptions;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
Expand Down Expand Up @@ -193,11 +194,13 @@ public ResponseEntity<byte[]> getPdfInfo(
if (embeddedFiles != null) {

PdfArray namesArray = embeddedFiles.getAsArray(PdfName.Names);
for (int i = 0; i < namesArray.size(); i += 2) {
ObjectNode embeddedFileNode = objectMapper.createObjectNode();
embeddedFileNode.put("Name", namesArray.getAsString(i).toString());
// Add other details if required
embeddedFilesArray.add(embeddedFileNode);
if(namesArray != null) {
for (int i = 0; i < namesArray.size(); i += 2) {
ObjectNode embeddedFileNode = objectMapper.createObjectNode();
embeddedFileNode.put("Name", namesArray.getAsString(i).toString());
// Add other details if required
embeddedFilesArray.add(embeddedFileNode);
}
}

}
Expand All @@ -224,15 +227,33 @@ public ResponseEntity<byte[]> getPdfInfo(
if (namesDict != null) {
PdfDictionary javascriptDict = namesDict.getAsDictionary(PdfName.JavaScript);
if (javascriptDict != null) {

PdfArray namesArray = javascriptDict.getAsArray(PdfName.Names);
for (int i = 0; i < namesArray.size(); i += 2) {
ObjectNode jsNode = objectMapper.createObjectNode();
jsNode.put("JS Name", namesArray.getAsString(i).toString());
jsNode.put("JS Code", namesArray.getAsString(i + 1).toString());
if(namesArray.getAsString(i) != null)
jsNode.put("JS Name", namesArray.getAsString(i).toString());

// Here we check for a PdfStream object and retrieve the JS code from it
PdfObject jsCode = namesArray.get(i+1);
if (jsCode instanceof PdfStream) {
byte[] jsCodeBytes = ((PdfStream)jsCode).getBytes();
String jsCodeStr = new String(jsCodeBytes, StandardCharsets.UTF_8);
jsNode.put("JS Script Length", jsCodeStr.length());
} else if (jsCode instanceof PdfDictionary) {
// If the JS code is in a dictionary, you'll need to know the key to use.
// Assuming the key is PdfName.JS:
PdfStream jsCodeStream = ((PdfDictionary)jsCode).getAsStream(PdfName.JS);
if (jsCodeStream != null) {
byte[] jsCodeBytes = jsCodeStream.getBytes();
String jsCodeStr = new String(jsCodeBytes, StandardCharsets.UTF_8);
jsNode.put("JS Script Character Length", jsCodeStr.length());
}
}

javascriptArray.add(jsNode);
}

}
}
other.set("JavaScript", javascriptArray);
Expand Down Expand Up @@ -305,16 +326,15 @@ public ResponseEntity<byte[]> getPdfInfo(
}
other.set("Bookmarks/Outline/TOC", bookmarksArray);

byte[] xmpBytes = itextDoc.getXmpMetadata();
String xmpString = null;
try {
byte[] xmpBytes = itextDoc.getXmpMetadata();
if (xmpBytes != null) {
if (xmpBytes != null) {
try {
XMPMeta xmpMeta = XMPMetaFactory.parseFromBuffer(xmpBytes);
xmpString = xmpMeta.dumpObject();

xmpString = new String(XMPMetaFactory.serializeToBuffer(xmpMeta, new SerializeOptions()));
} catch (XMPException e) {
e.printStackTrace();
}
} catch (XMPException e) {
e.printStackTrace();
}
other.put("XMPMetadata", xmpString);

Expand Down Expand Up @@ -416,8 +436,10 @@ public ResponseEntity<byte[]> getPdfInfo(
for (PdfAnnotation annotation : annotations) {
if (annotation instanceof PdfLinkAnnotation) {
PdfLinkAnnotation linkAnnotation = (PdfLinkAnnotation) annotation;
String uri = linkAnnotation.getAction().toString();
uniqueURIs.add(uri); // Add to set to ensure uniqueness
if(linkAnnotation != null && linkAnnotation.getAction() != null) {
String uri = linkAnnotation.getAction().toString();
uniqueURIs.add(uri); // Add to set to ensure uniqueness
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package stirling.software.SPDF.controller.api.security;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.PDPageTree;
Expand All @@ -21,7 +22,9 @@
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.media.Schema;
import stirling.software.SPDF.utils.WebResponseUtils;

import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import java.io.IOException;
import java.io.InputStream;

Expand Down Expand Up @@ -75,8 +78,24 @@ public ResponseEntity<byte[]> sanitizePDF(
return WebResponseUtils.pdfDocToWebResponse(document, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_sanitized.pdf");
}
}
private void sanitizeJavaScript(PDDocument document) throws IOException {
for (PDPage page : document.getPages()) {
private void sanitizeJavaScript(PDDocument document) throws IOException {
// Get the root dictionary (catalog) of the PDF
PDDocumentCatalog catalog = document.getDocumentCatalog();

// Get the Names dictionary
COSDictionary namesDict = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.NAMES);

if (namesDict != null) {
// Get the JavaScript dictionary
COSDictionary javaScriptDict = (COSDictionary) namesDict.getDictionaryObject(COSName.getPDFName("JavaScript"));

if (javaScriptDict != null) {
// Remove the JavaScript dictionary
namesDict.removeItem(COSName.getPDFName("JavaScript"));
}
}

for (PDPage page : document.getPages()) {
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationWidget) {
PDAnnotationWidget widget = (PDAnnotationWidget) annotation;
Expand All @@ -89,13 +108,28 @@ private void sanitizeJavaScript(PDDocument document) throws IOException {
PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
if (acroForm != null) {
for (PDField field : acroForm.getFields()) {
if (field.getActions().getF() instanceof PDActionJavaScript) {
field.getActions().setF(null);
}
PDFormFieldAdditionalActions actions = field.getActions();
if(actions != null) {
if (actions.getC() instanceof PDActionJavaScript) {
actions.setC(null);
}
if (actions.getF() instanceof PDActionJavaScript) {
actions.setF(null);
}
if (actions.getK() instanceof PDActionJavaScript) {
actions.setK(null);
}
if (actions.getV() instanceof PDActionJavaScript) {
actions.setV(null);
}
}
}
}
}
}
}




private void sanitizeEmbeddedFiles(PDDocument document) {
PDPageTree allPages = document.getPages();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,15 @@ public ModelAndView extractImageScansForm() {
modelAndView.addObject("currentPage", "extract-image-scans");
return modelAndView;
}


@GetMapping("/show-javascript")
@Hidden
public String extractJavascriptForm(Model model) {
model.addAttribute("currentPage", "show-javascript");
return "other/show-javascript";
}


@GetMapping("/add-page-numbers")
@Hidden
public String addPageNumbersForm(Model model) {
Expand Down
11 changes: 10 additions & 1 deletion src/main/resources/messages_en_GB.properties
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,21 @@ home.PdfToSinglePage.title=PDF to Single Large Page
home.PdfToSinglePage.desc=Merges all PDF pages into one large single page
PdfToSinglePage.tags=single page


home.showJS.title=Show Javascript
home.showJS.desc=Searches and displays any JS injected into a PDF
showJS.tags=JS

###########################
# #
# WEB PAGES #
# #
###########################

#showJS
showJS.title=Show Javascript
showJS.header=Show Javascript
showJS.downloadJS=Download Javascript
showJS.submit=Show


#pdfToSinglePage
Expand Down
3 changes: 3 additions & 0 deletions src/main/resources/static/css/prism.css

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 54f53be

Please sign in to comment.