More fixes for RequestPart mixing

xdsnet · Jul 30, 2023 · f98f089 · f98f089
2 parents 6b618f3 + 0732ffa
commit f98f089
Show file tree

Hide file tree

Showing 55 changed files with 3,787 additions and 651 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 # Build jbig2enc in a separate stage
-FROM frooodle/stirling-pdf-base:latest
+FROM frooodle/stirling-pdf-base:beta4
 
 # Create scripts folder and copy local scripts
 RUN mkdir /scripts

diff --git a/README.md b/README.md
@@ -8,6 +8,8 @@
 [![Paypal Donate](https://img.shields.io/badge/Paypal%20Donate-yellow?style=flat&logo=paypal)](https://www.paypal.com/paypalme/froodleplex)
 [![Github Sponser](https://img.shields.io/badge/Github%20Sponsor-yellow?style=flat&logo=github)](https://github.com/sponsors/Frooodle)
 
+[![Deploy to DO](https://www.deploytodo.com/do-btn-blue.svg)](https://cloud.digitalocean.com/apps/new?repo=https://github.com/Frooodle/Stirling-PDF/tree/digitalOcean&refcode=c3210994b1af)
+
 This is a powerful locally hosted web based PDF manipulation tool using docker that allows you to perform various operations on PDF files, such as splitting merging, converting, reorganizing, adding images, rotating, compressing, and more. This locally hosted web application started as a 100% ChatGPT-made application and has evolved to include a wide range of features to handle all your PDF needs.
 
 Stirling PDF makes no outbound calls for any record keeping or tracking.
@@ -27,6 +29,11 @@ Feel free to request any features or bug fixes either in github issues or our [D
 - Convert PDFs to and from images
 - Reorganize PDF pages into different orders.
 - Add/Generate signatures
+- Format PDFs into a multi-paged page
+- Scale page contents size by set % 
+- Adjust Contrast
+- Crop PDF
+- Auto Split PDF (With physically scanned page dividers)
 - Flatten PDFs
 - Repair PDFs
 - Detect and remove blank pages
@@ -39,8 +46,14 @@ Feel free to request any features or bug fixes either in github issues or our [D
 - Add watermark(s)
 - Convert Any common file to PDF (using LibreOffice)
 - Convert PDF to Word/Powerpoint/Others (using LibreOffice)
+- Convert HTML to PDF
+- URL to PDF
 - Extract images from PDF
+- Extract images from Scans
+- Add page numbers
+- Auto rename file by detecting PDF header text
 - OCR on PDF (Using OCRMyPDF)
+- PDF/A conversion (Using OCRMyPDF)
 - Edit metadata
 - Dark mode support.
 - Custom download options (see [here](https://github.com/Frooodle/Stirling-PDF/blob/main/images/settings.png) for example)

diff --git a/build.gradle b/build.gradle
@@ -8,7 +8,7 @@ plugins {
 }
 
 group = 'stirling.software'
-version = '0.11.0'
+version = '0.11.2'
 sourceCompatibility = '17'
 
 repositories {

diff --git a/images/stirling-home.png b/images/stirling-home.png
diff --git a/scripts/PropSync.java b/scripts/PropSync.java
@@ -0,0 +1,80 @@
+package stirling.software.Stirling.Stats;
+
+import java.nio.file.*;
+import java.nio.charset.MalformedInputException;
+import java.nio.charset.StandardCharsets;
+import java.io.*;
+import java.util.*;
+
+public class PropSync {
+
+    public static void main(String[] args) throws IOException {
+        File folder = new File("C:\\Users\\systo\\git\\Stirling-PDF\\src\\main\\resources");
+        File[] files = folder.listFiles((dir, name) -> name.matches("messages_.*\\.properties"));
+
+        List<String> enLines = Files.readAllLines(Paths.get(folder + "\\messages_en_GB.properties"), StandardCharsets.UTF_8);
+        Map<String, String> enProps = linesToProps(enLines);
+
+        for (File file : files) {
+            if (!file.getName().equals("messages_en_GB.properties")) {
+                System.out.println("Processing file: " + file.getName());
+                List<String> lines;
+                try {
+                    lines = Files.readAllLines(file.toPath(), StandardCharsets.UTF_8);
+                } catch (MalformedInputException e) {
+                    System.out.println("Skipping due to not UTF8 format for file: " + file.getName());
+                    continue;
+                } catch (IOException e) {
+                    throw new UncheckedIOException(e);
+                }
+
+                Map<String, String> currentProps = linesToProps(lines);
+                List<String> newLines = syncPropsWithLines(enProps, currentProps, enLines);
+
+                Files.write(file.toPath(), newLines, StandardCharsets.UTF_8);
+                System.out.println("Finished processing file: " + file.getName());
+            }
+        }
+    }
+
+    private static Map<String, String> linesToProps(List<String> lines) {
+        Map<String, String> props = new LinkedHashMap<>();
+        for (String line : lines) {
+            if (!line.trim().isEmpty() && line.contains("=")) {
+                String[] parts = line.split("=", 2);
+                props.put(parts[0].trim(), parts[1].trim());
+            }
+        }
+        return props;
+    }
+
+    private static List<String> syncPropsWithLines(Map<String, String> enProps, Map<String, String> currentProps, List<String> enLines) {
+        List<String> newLines = new ArrayList<>();
+        boolean needsTranslateComment = false; // flag to check if we need to add "TODO: Translate"
+
+        for (String line : enLines) {
+            if (line.contains("=")) {
+                String key = line.split("=", 2)[0].trim();
+
+                if (currentProps.containsKey(key)) {
+                    newLines.add(key + "=" + currentProps.get(key));
+                    needsTranslateComment = false;
+                } else {
+                    if (!needsTranslateComment) {
+                        newLines.add("##########################");
+                        newLines.add("###  TODO: Translate   ###");
+                        newLines.add("##########################");
+                        needsTranslateComment = true;
+                    }
+                    newLines.add(line);
+                }
+            } else {
+                // handle comments and other non-property lines
+                newLines.add(line);
+                needsTranslateComment = false;  // reset the flag when we encounter comments or empty lines
+            }
+        }
+
+        return newLines;
+    }
+}
diff --git a/scripts/init.sh b/scripts/init.sh
@@ -5,5 +5,17 @@ echo "Copying original files without overwriting existing files"
 mkdir -p /usr/share/tesseract-ocr
 cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
 
+# Check if TESSERACT_LANGS environment variable is set and is not empty
+if [[ -n "$TESSERACT_LANGS" ]]; then
+  # Convert comma-separated values to a space-separated list
+  LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
+
+  # Install each language pack
+  for LANG in $LANGS; do
+    apt-get install -y "tesseract-ocr-$LANG"
+  done
+fi
+
+
 # Run the main command
 exec "$@"
diff --git a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java
@@ -83,7 +83,9 @@ public void init() {
         addEndpointToGroup("Convert", "pdf-to-text");
         addEndpointToGroup("Convert", "pdf-to-html");
         addEndpointToGroup("Convert", "pdf-to-xml");
-
+        addEndpointToGroup("Convert", "html-to-pdf");
+        addEndpointToGroup("Convert", "url-to-pdf");
+
         // Adding endpoints to "Security" group
         addEndpointToGroup("Security", "add-password");
         addEndpointToGroup("Security", "remove-password");
@@ -125,12 +127,15 @@ public void init() {
         addEndpointToGroup("CLI", "pdf-to-html");
         addEndpointToGroup("CLI", "pdf-to-xml");
         addEndpointToGroup("CLI", "ocr-pdf");
+        addEndpointToGroup("CLI", "html-to-pdf");
+        addEndpointToGroup("CLI", "url-to-pdf");
+
 
         //python
         addEndpointToGroup("Python", "extract-image-scans");
         addEndpointToGroup("Python", "remove-blanks");
-
-
+        addEndpointToGroup("Python", "html-to-pdf");
+        addEndpointToGroup("Python", "url-to-pdf");
 
         //openCV
         addEndpointToGroup("OpenCV", "extract-image-scans");

diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java
@@ -4,9 +4,13 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
-import java.util.*;
+
 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.PostMapping;
 import org.springframework.web.bind.annotation.RequestPart;
@@ -17,14 +21,15 @@
 import io.swagger.v3.oas.annotations.tags.Tag;
 import stirling.software.SPDF.utils.GeneralUtils;
 import stirling.software.SPDF.utils.ProcessExecutor;
+import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
 import stirling.software.SPDF.utils.WebResponseUtils;
 
 @RestController
 @Tag(name = "Convert", description = "Convert APIs")
 public class ConvertHtmlToPDF {
 
 
-	 @PostMapping(consumes = "multipart/form-data", value = "/convert-to-pdf")
+	 @PostMapping(consumes = "multipart/form-data", value = "/html-to-pdf")
 	    @Operation(
 	        summary = "Convert an HTML or ZIP (containing HTML and CSS) to PDF",
 	        description = "This endpoint takes an HTML or ZIP file input and converts it to a PDF format."
@@ -40,61 +45,83 @@ public ResponseEntity<byte[]> HtmlToPdf(
 	        if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
 	            throw new IllegalArgumentException("File must be either .html or .zip format.");
 	        }
-
 	        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
-	        Path tempInputFile;
-
-	        if (originalFilename.endsWith(".html")) {
-	            tempInputFile = Files.createTempFile("input_", ".html");
-	            Files.write(tempInputFile, fileInput.getBytes());
-	        } else {
-	            tempInputFile = unzipAndGetMainHtml(fileInput);
+	        Path tempInputFile = null;
+	        byte[] pdfBytes;
+	        try {
+		        if (originalFilename.endsWith(".html")) {
+		            tempInputFile = Files.createTempFile("input_", ".html");
+		            Files.write(tempInputFile, fileInput.getBytes());
+		        } else {
+		            tempInputFile = unzipAndGetMainHtml(fileInput);
+		        }
+
+		        List<String> command = new ArrayList<>();
+		        command.add("weasyprint");
+		        command.add(tempInputFile.toString()); 
+		        command.add(tempOutputFile.toString());
+		        ProcessExecutorResult returnCode;
+		        if (originalFilename.endsWith(".zip")) {	        	
+		        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
+	                .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
+		        } else {
+
+		        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
+		                                        .runCommandWithOutputHandling(command);
+		        }
+
+		        pdfBytes = Files.readAllBytes(tempOutputFile);
+	        } finally {
+		        // Clean up temporary files
+		        Files.delete(tempOutputFile);
+		        Files.delete(tempInputFile);
+
+		        if (originalFilename.endsWith(".zip")) {
+		        	GeneralUtils.deleteDirectory(tempInputFile.getParent());
+		        }
 	        }
+	        String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf";  // Remove file extension and append .pdf
+	        return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
+	    }
 
-	        List<String> command = new ArrayList<>();
-	        command.add("weasyprint");
-	        command.add(tempInputFile.toString()); 
-	        command.add(tempOutputFile.toString());
-	        int returnCode = 0;
-	        if (originalFilename.endsWith(".zip")) {	        	
-	        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
-                .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
-	        } else {
-
-	        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
-	                                        .runCommandWithOutputHandling(command);
-	        }
 
-	        byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
 
-	        // Clean up temporary files
-	        Files.delete(tempOutputFile);
-	        Files.delete(tempInputFile);
-	        if (originalFilename.endsWith(".zip")) {
-	        	GeneralUtils.deleteDirectory(tempInputFile.getParent());
+	    private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
+	        Path tempDirectory = Files.createTempDirectory("unzipped_");
+	        try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
+	            ZipEntry entry = zipIn.getNextEntry();
+	            while (entry != null) {
+	                Path filePath = tempDirectory.resolve(entry.getName());
+	                if (entry.isDirectory()) {
+	                    Files.createDirectories(filePath);  // Explicitly create the directory structure
+	                } else {
+	                    Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
+	                    Files.copy(zipIn, filePath);
+	                }
+	                zipIn.closeEntry();
+	                entry = zipIn.getNextEntry();
+	            }
 	        }
 
-	        String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf";  // Remove file extension and append .pdf
-	        return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
-	    }
+	        //search for the main HTML file.
+	        try (Stream<Path> walk = Files.walk(tempDirectory)) {
+	            List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
+	                                       .collect(Collectors.toList());
 
+	            if (htmlFiles.isEmpty()) {
+	                throw new IOException("No HTML files found in the unzipped directory.");
+	            }
 
+	            // Prioritize 'index.html' if it exists, otherwise use the first .html file
+	            for (Path htmlFile : htmlFiles) {
+	                if (htmlFile.getFileName().toString().equals("index.html")) {
+	                    return htmlFile;
+	                }
+	            }
 
-    private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
-        Path tempDirectory = Files.createTempDirectory("unzipped_");
-        try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
-            ZipEntry entry = zipIn.getNextEntry();
-            while (entry != null) {
-                Path filePath = tempDirectory.resolve(entry.getName());
-                if (!entry.isDirectory()) {
-                    Files.copy(zipIn, filePath);
-                }
-                zipIn.closeEntry();
-                entry = zipIn.getNextEntry();
-            }
-        }
-        return tempDirectory.resolve("index.html");
-    }
+	            return htmlFiles.get(0);
+	        }
+	    }
 
 
 

diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java
@@ -43,7 +43,7 @@ public ResponseEntity<Resource> convertToImage(
             @Parameter(description = "Choose between a single image containing all pages or separate images for each page", schema = @Schema(allowableValues = {"single", "multiple"}))
                     String singleOrMultiple,
             @RequestParam("colorType")
-            @Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"rgb", "greyscale", "blackwhite"}))
+            @Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"color", "greyscale", "blackwhite"}))
                     String colorType,
             @RequestParam("dpi")
             @Parameter(description = "The DPI (dots per inch) for the output image(s)")
@@ -94,7 +94,7 @@ public ResponseEntity<byte[]> convertToPdf(
             @Parameter(description = "Whether to stretch the images to fit the PDF page or maintain the aspect ratio", example = "false")
                     boolean stretchToFit,
             @RequestParam("colorType")
-            @Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"rgb", "greyscale", "blackwhite"}))
+            @Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"color", "greyscale", "blackwhite"}))
                     String colorType,
             @RequestParam(defaultValue = "false", name = "autoRotate")
             @Parameter(description = "Whether to automatically rotate the images to better fit the PDF page", example = "true")

diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java
@@ -19,6 +19,7 @@
 import io.swagger.v3.oas.annotations.Parameter;
 import io.swagger.v3.oas.annotations.tags.Tag;
 import stirling.software.SPDF.utils.ProcessExecutor;
+import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
 import stirling.software.SPDF.utils.WebResponseUtils;
 
 @RestController
@@ -41,7 +42,7 @@ public byte[] convertToPdf(MultipartFile inputFile) throws IOException, Interrup
 
         // Run the LibreOffice command
         List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
-        int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
+        ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
 
         // Read the converted PDF file
         byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
@@ -62,10 +63,10 @@ private boolean isValidFileExtension(String fileExtension) {
         summary = "Convert a file to a PDF using LibreOffice",
         description = "This endpoint converts a given file to a PDF using LibreOffice API  Input:Any Output:PDF Type:SISO"
     )
-    public ResponseEntity<byte[]> processPdfWithOCR(
+    public ResponseEntity<byte[]> processFileToPDF(
         @RequestPart(required = true, value = "fileInput")
         @Parameter(
-            description = "The input file to be converted to a PDF file using OCR",
+            description = "The input file to be converted to a PDF file using LibreOffice",
             required = true
         )
             MultipartFile inputFile