Skip to content

Commit

Permalink
More fixes for RequestPart mixing
Browse files Browse the repository at this point in the history
  • Loading branch information
Frooodle committed Jul 30, 2023
2 parents 6b618f3 + 0732ffa commit f98f089
Show file tree
Hide file tree
Showing 55 changed files with 3,787 additions and 651 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build jbig2enc in a separate stage
FROM frooodle/stirling-pdf-base:latest
FROM frooodle/stirling-pdf-base:beta4

# Create scripts folder and copy local scripts
RUN mkdir /scripts
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
[![Paypal Donate](https://img.shields.io/badge/Paypal%20Donate-yellow?style=flat&logo=paypal)](https://www.paypal.com/paypalme/froodleplex)
[![Github Sponser](https://img.shields.io/badge/Github%20Sponsor-yellow?style=flat&logo=github)](https://github.com/sponsors/Frooodle)

[![Deploy to DO](https://www.deploytodo.com/do-btn-blue.svg)](https://cloud.digitalocean.com/apps/new?repo=https://github.com/Frooodle/Stirling-PDF/tree/digitalOcean&refcode=c3210994b1af)

This is a powerful locally hosted web based PDF manipulation tool using docker that allows you to perform various operations on PDF files, such as splitting merging, converting, reorganizing, adding images, rotating, compressing, and more. This locally hosted web application started as a 100% ChatGPT-made application and has evolved to include a wide range of features to handle all your PDF needs.

Stirling PDF makes no outbound calls for any record keeping or tracking.
Expand All @@ -27,6 +29,11 @@ Feel free to request any features or bug fixes either in github issues or our [D
- Convert PDFs to and from images
- Reorganize PDF pages into different orders.
- Add/Generate signatures
- Format PDFs into a multi-paged page
- Scale page contents size by set %
- Adjust Contrast
- Crop PDF
- Auto Split PDF (With physically scanned page dividers)
- Flatten PDFs
- Repair PDFs
- Detect and remove blank pages
Expand All @@ -39,8 +46,14 @@ Feel free to request any features or bug fixes either in github issues or our [D
- Add watermark(s)
- Convert Any common file to PDF (using LibreOffice)
- Convert PDF to Word/Powerpoint/Others (using LibreOffice)
- Convert HTML to PDF
- URL to PDF
- Extract images from PDF
- Extract images from Scans
- Add page numbers
- Auto rename file by detecting PDF header text
- OCR on PDF (Using OCRMyPDF)
- PDF/A conversion (Using OCRMyPDF)
- Edit metadata
- Dark mode support.
- Custom download options (see [here](https://github.com/Frooodle/Stirling-PDF/blob/main/images/settings.png) for example)
Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plugins {
}

group = 'stirling.software'
version = '0.11.0'
version = '0.11.2'
sourceCompatibility = '17'

repositories {
Expand Down
Binary file modified images/stirling-home.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
80 changes: 80 additions & 0 deletions scripts/PropSync.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package stirling.software.Stirling.Stats;

import java.nio.file.*;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.io.*;
import java.util.*;

public class PropSync {

public static void main(String[] args) throws IOException {
File folder = new File("C:\\Users\\systo\\git\\Stirling-PDF\\src\\main\\resources");
File[] files = folder.listFiles((dir, name) -> name.matches("messages_.*\\.properties"));

List<String> enLines = Files.readAllLines(Paths.get(folder + "\\messages_en_GB.properties"), StandardCharsets.UTF_8);
Map<String, String> enProps = linesToProps(enLines);

for (File file : files) {
if (!file.getName().equals("messages_en_GB.properties")) {
System.out.println("Processing file: " + file.getName());
List<String> lines;
try {
lines = Files.readAllLines(file.toPath(), StandardCharsets.UTF_8);
} catch (MalformedInputException e) {
System.out.println("Skipping due to not UTF8 format for file: " + file.getName());
continue;
} catch (IOException e) {
throw new UncheckedIOException(e);
}

Map<String, String> currentProps = linesToProps(lines);
List<String> newLines = syncPropsWithLines(enProps, currentProps, enLines);

Files.write(file.toPath(), newLines, StandardCharsets.UTF_8);
System.out.println("Finished processing file: " + file.getName());
}
}
}

private static Map<String, String> linesToProps(List<String> lines) {
Map<String, String> props = new LinkedHashMap<>();
for (String line : lines) {
if (!line.trim().isEmpty() && line.contains("=")) {
String[] parts = line.split("=", 2);
props.put(parts[0].trim(), parts[1].trim());
}
}
return props;
}

private static List<String> syncPropsWithLines(Map<String, String> enProps, Map<String, String> currentProps, List<String> enLines) {
List<String> newLines = new ArrayList<>();
boolean needsTranslateComment = false; // flag to check if we need to add "TODO: Translate"

for (String line : enLines) {
if (line.contains("=")) {
String key = line.split("=", 2)[0].trim();

if (currentProps.containsKey(key)) {
newLines.add(key + "=" + currentProps.get(key));
needsTranslateComment = false;
} else {
if (!needsTranslateComment) {
newLines.add("##########################");
newLines.add("### TODO: Translate ###");
newLines.add("##########################");
needsTranslateComment = true;
}
newLines.add(line);
}
} else {
// handle comments and other non-property lines
newLines.add(line);
needsTranslateComment = false; // reset the flag when we encounter comments or empty lines
}
}

return newLines;
}
}
12 changes: 12 additions & 0 deletions scripts/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,17 @@ echo "Copying original files without overwriting existing files"
mkdir -p /usr/share/tesseract-ocr
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr

# Check if TESSERACT_LANGS environment variable is set and is not empty
if [[ -n "$TESSERACT_LANGS" ]]; then
# Convert comma-separated values to a space-separated list
LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')

# Install each language pack
for LANG in $LANGS; do
apt-get install -y "tesseract-ocr-$LANG"
done
fi


# Run the main command
exec "$@"
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ public void init() {
addEndpointToGroup("Convert", "pdf-to-text");
addEndpointToGroup("Convert", "pdf-to-html");
addEndpointToGroup("Convert", "pdf-to-xml");

addEndpointToGroup("Convert", "html-to-pdf");
addEndpointToGroup("Convert", "url-to-pdf");

// Adding endpoints to "Security" group
addEndpointToGroup("Security", "add-password");
addEndpointToGroup("Security", "remove-password");
Expand Down Expand Up @@ -125,12 +127,15 @@ public void init() {
addEndpointToGroup("CLI", "pdf-to-html");
addEndpointToGroup("CLI", "pdf-to-xml");
addEndpointToGroup("CLI", "ocr-pdf");
addEndpointToGroup("CLI", "html-to-pdf");
addEndpointToGroup("CLI", "url-to-pdf");


//python
addEndpointToGroup("Python", "extract-image-scans");
addEndpointToGroup("Python", "remove-blanks");


addEndpointToGroup("Python", "html-to-pdf");
addEndpointToGroup("Python", "url-to-pdf");

//openCV
addEndpointToGroup("OpenCV", "extract-image-scans");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.*;

import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
Expand All @@ -17,14 +21,15 @@
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;

@RestController
@Tag(name = "Convert", description = "Convert APIs")
public class ConvertHtmlToPDF {


@PostMapping(consumes = "multipart/form-data", value = "/convert-to-pdf")
@PostMapping(consumes = "multipart/form-data", value = "/html-to-pdf")
@Operation(
summary = "Convert an HTML or ZIP (containing HTML and CSS) to PDF",
description = "This endpoint takes an HTML or ZIP file input and converts it to a PDF format."
Expand All @@ -40,61 +45,83 @@ public ResponseEntity<byte[]> HtmlToPdf(
if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
throw new IllegalArgumentException("File must be either .html or .zip format.");
}

Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile;

if (originalFilename.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
Files.write(tempInputFile, fileInput.getBytes());
} else {
tempInputFile = unzipAndGetMainHtml(fileInput);
Path tempInputFile = null;
byte[] pdfBytes;
try {
if (originalFilename.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
Files.write(tempInputFile, fileInput.getBytes());
} else {
tempInputFile = unzipAndGetMainHtml(fileInput);
}

List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode;
if (originalFilename.endsWith(".zip")) {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
} else {

returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
}

pdfBytes = Files.readAllBytes(tempOutputFile);
} finally {
// Clean up temporary files
Files.delete(tempOutputFile);
Files.delete(tempInputFile);

if (originalFilename.endsWith(".zip")) {
GeneralUtils.deleteDirectory(tempInputFile.getParent());
}
}
String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}

List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = 0;
if (originalFilename.endsWith(".zip")) {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
} else {

returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
}

byte[] pdfBytes = Files.readAllBytes(tempOutputFile);

// Clean up temporary files
Files.delete(tempOutputFile);
Files.delete(tempInputFile);
if (originalFilename.endsWith(".zip")) {
GeneralUtils.deleteDirectory(tempInputFile.getParent());
private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(entry.getName());
if (entry.isDirectory()) {
Files.createDirectories(filePath); // Explicitly create the directory structure
} else {
Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
Files.copy(zipIn, filePath);
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}

String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
//search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
.collect(Collectors.toList());

if (htmlFiles.isEmpty()) {
throw new IOException("No HTML files found in the unzipped directory.");
}

// Prioritize 'index.html' if it exists, otherwise use the first .html file
for (Path htmlFile : htmlFiles) {
if (htmlFile.getFileName().toString().equals("index.html")) {
return htmlFile;
}
}

private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(entry.getName());
if (!entry.isDirectory()) {
Files.copy(zipIn, filePath);
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}
return tempDirectory.resolve("index.html");
}
return htmlFiles.get(0);
}
}



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public ResponseEntity<Resource> convertToImage(
@Parameter(description = "Choose between a single image containing all pages or separate images for each page", schema = @Schema(allowableValues = {"single", "multiple"}))
String singleOrMultiple,
@RequestParam("colorType")
@Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"rgb", "greyscale", "blackwhite"}))
@Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"color", "greyscale", "blackwhite"}))
String colorType,
@RequestParam("dpi")
@Parameter(description = "The DPI (dots per inch) for the output image(s)")
Expand Down Expand Up @@ -94,7 +94,7 @@ public ResponseEntity<byte[]> convertToPdf(
@Parameter(description = "Whether to stretch the images to fit the PDF page or maintain the aspect ratio", example = "false")
boolean stretchToFit,
@RequestParam("colorType")
@Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"rgb", "greyscale", "blackwhite"}))
@Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"color", "greyscale", "blackwhite"}))
String colorType,
@RequestParam(defaultValue = "false", name = "autoRotate")
@Parameter(description = "Whether to automatically rotate the images to better fit the PDF page", example = "true")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;

@RestController
Expand All @@ -41,7 +42,7 @@ public byte[] convertToPdf(MultipartFile inputFile) throws IOException, Interrup

// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);

// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
Expand All @@ -62,10 +63,10 @@ private boolean isValidFileExtension(String fileExtension) {
summary = "Convert a file to a PDF using LibreOffice",
description = "This endpoint converts a given file to a PDF using LibreOffice API Input:Any Output:PDF Type:SISO"
)
public ResponseEntity<byte[]> processPdfWithOCR(
public ResponseEntity<byte[]> processFileToPDF(
@RequestPart(required = true, value = "fileInput")
@Parameter(
description = "The input file to be converted to a PDF file using OCR",
description = "The input file to be converted to a PDF file using LibreOffice",
required = true
)
MultipartFile inputFile
Expand Down
Loading

0 comments on commit f98f089

Please sign in to comment.