Skip to content

Commit

Permalink
Sanitize attachment filenames and bump dependency versions
Browse files Browse the repository at this point in the history
(cherry picked from commit 1f5cd2d)
  • Loading branch information
nickrussler committed May 25, 2024
1 parent a1221a6 commit daac97f
Show file tree
Hide file tree
Showing 13 changed files with 56,892 additions and 29 deletions.
6 changes: 3 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ repositories {

dependencies {
implementation 'com.google.guava:guava:29.0-jre'
implementation 'com.sun.mail:javax.mail:1.6.2'
implementation 'com.sun.mail:jakarta.mail:2.0.1'
implementation 'com.beust:jcommander:1.78'
implementation 'org.apache.tika:tika-core:1.24.1'
implementation 'com.github.markusbernhardt:proxy-vole:1.0.5'
implementation 'org.simplejavamail:simple-java-mail:6.5.2'
implementation 'org.simplejavamail:outlook-module:6.5.2'
implementation 'org.simplejavamail:simple-java-mail:8.10.1'
implementation 'org.simplejavamail:outlook-module:8.10.1'
implementation 'org.slf4j:slf4j-simple:1.7.30'

testImplementation 'junit:junit:4.+'
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/mimeparser/ContentTypeCleaner.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;
import javax.mail.MessagingException;
import javax.mail.internet.ContentType;
import javax.mail.internet.MimePart;
import javax.mail.internet.MimeUtility;
import javax.mail.internet.ParseException;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.ContentType;
import jakarta.mail.internet.MimePart;
import jakarta.mail.internet.MimeUtility;
import jakarta.mail.internet.ParseException;
import util.Logger;

/**
Expand Down
27 changes: 19 additions & 8 deletions src/main/java/mimeparser/MimeMessageConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,14 @@
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.MessagingException;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeUtility;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.MimeMessage;
import jakarta.mail.internet.MimeUtility;
import org.apache.tika.io.FilenameUtils;
import org.apache.tika.mime.MimeTypes;
import org.simplejavamail.api.email.AttachmentResource;
import org.simplejavamail.converter.EmailConverter;
import util.LogLevel;
import util.Logger;
import util.StringReplacer;
import util.StringReplacerCallback;
import util.*;

/**
* Converts email (eml, msg) files into pdf files.
Expand Down Expand Up @@ -154,12 +152,17 @@ public static void convertToPdf(
Date sentDate = message.getSentDate();
sentDateStr = DATE_FORMATTER.format(sentDate);
} catch (Exception e) {
Logger.error("Could not parse the date");
e.printStackTrace();
}

if (sentDateStr == null) {
Logger.error("Could not parse the date, fallback to raw value");
Logger.error("Attempt to fallback to raw date value");
sentDateStr = message.getHeader("date", null);

if (sentDateStr == null) {
Logger.error("No Date value found, proceeding without date value");
}
}

/* ######### Parse the mime structure ######### */
Expand Down Expand Up @@ -367,11 +370,19 @@ public String replace(Matcher m) throws Exception {
String attachmentFilename = null;
try {
attachmentFilename = attachmentResource.getDataSource().getName();

// see simple-java-mail MimeMessageParser.java (https://tinyurl.com/45f98j3x)
if (attachmentFilename.equals("UnknownAttachment")) {
attachmentFilename = null;
}
} catch (Exception e) {
// ignore this error
}

if (!Strings.isNullOrEmpty(attachmentFilename)) {
// sanitize filename
attachmentFilename = FileNameSanitizer.sanitizeFileName(attachmentFilename, '_');

attachFile = new File(attachmentDir, attachmentFilename);
} else {
String extension = "";
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/mimeparser/MimeMessageParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.internet.ContentDisposition;
import javax.mail.internet.ContentType;
import jakarta.mail.MessagingException;
import jakarta.mail.Multipart;
import jakarta.mail.Part;
import jakarta.mail.internet.ContentDisposition;
import jakarta.mail.internet.ContentType;
import util.Logger;

/**
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/mimeparser/MimeObjectEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package mimeparser;

import javax.mail.internet.ContentType;
import jakarta.mail.internet.ContentType;

/**
* Wrapper class that is used to bundle an object with it's contentType.
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/mimeparser/WalkMimeCallback.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package mimeparser;

import javax.mail.Part;
import jakarta.mail.Part;

/**
* Interface for the Recursive Callback.
Expand Down
50 changes: 50 additions & 0 deletions src/main/java/util/FileNameSanitizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package util;

import com.google.common.annotations.VisibleForTesting;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class FileNameSanitizer {

private static Pattern ILLEGAL_CHARACTERS;
private static final Set<String> RESERVED_NAMES = new HashSet<>();

static {
setOsName(System.getProperty("os.name").toLowerCase());
}

@VisibleForTesting
public static void setOsName(String osName) {
RESERVED_NAMES.clear();
if (osName.toLowerCase().contains("win")) {
ILLEGAL_CHARACTERS = Pattern.compile("[<>:\"/\\\\|?*]");
String[] reservedNames = {"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"
};
RESERVED_NAMES.addAll(Arrays.asList(reservedNames));
} else {
ILLEGAL_CHARACTERS = Pattern.compile("[/]");
}
}

public static String sanitizeFileName(String fileName, char replacement) {
if (fileName == null || fileName.isEmpty()) {
return fileName;
}

String sanitizedFileName = ILLEGAL_CHARACTERS.matcher(fileName).replaceAll(Matcher.quoteReplacement(String.valueOf(replacement)));

if (RESERVED_NAMES.contains(sanitizedFileName.toUpperCase())) {
sanitizedFileName = "_" + sanitizedFileName.toUpperCase() + "_";
}

if (sanitizedFileName.isEmpty()) {
throw new IllegalArgumentException("Sanitized file name cannot be empty");
}

return sanitizedFileName;
}
}
35 changes: 35 additions & 0 deletions src/test/java/cli/MainTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,41 @@ public void main_attachments() throws IOException, URISyntaxException {
}
}

@Test
public void main_attachmentsWithSlashInAttachmentName() throws IOException, URISyntaxException {
File tmpPdf = File.createTempFile("emailtopdf", ".pdf");
String eml = new File(MainTest.class.getClassLoader().getResource("eml/testAttachmentsWithSlashInName.eml").toURI()).getAbsolutePath();

String[] args = new String[]{"-o", tmpPdf.getAbsolutePath(), "-a", eml};

LogLevel old = Logger.level;
Logger.level = LogLevel.Error;

Main.main(args);

Logger.level = old;

File attachmentDir = new File(tmpPdf.getParent(), Files.getNameWithoutExtension(tmpPdf.getName()) + "-attachments");

List<String> attachments = Arrays.asList(attachmentDir.list());
assertThat(attachments, hasItems("test_IMAG0144.jpg")); // '/' is replace with '_'

if (!tmpPdf.delete()) {
tmpPdf.deleteOnExit();
}

for (String fileName : attachments) {
File f = new File(attachmentDir, fileName);
if (!f.delete()) {
f.deleteOnExit();
}
}

if (!attachmentDir.delete()) {
attachmentDir.deleteOnExit();
}
}

@Test
public void main_attachmentsSniffFileExtension() throws IOException, URISyntaxException {
File tmpPdf = File.createTempFile("emailtopdf", ".pdf");
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/mimeparser/ContentTypeCleanerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalToIgnoringCase;

import javax.mail.MessagingException;
import javax.mail.internet.ContentType;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.ContentType;
import org.junit.Test;

/**
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/mimeparser/MimeMessageConverterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import org.junit.BeforeClass;
import org.junit.Test;

import javax.mail.MessagingException;
import javax.mail.internet.MimeMessage;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.MimeMessage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/mimeparser/MimeMessageParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
import java.util.HashMap;
import javax.mail.MessagingException;
import javax.mail.internet.MimeMessage;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.MimeMessage;
import org.junit.BeforeClass;
import org.junit.Test;

Expand Down
56 changes: 56 additions & 0 deletions src/test/java/util/FileNameSanitizerTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package util;

import org.junit.Test;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;

public class FileNameSanitizerTest {

@Test
public void sanitizeFileName_returnsNullForNull_Windows() {
FileNameSanitizer.setOsName("Windows 10");

String originalFileName = null;
String sanitizedFileName = FileNameSanitizer.sanitizeFileName(originalFileName, '_');
assertThat(sanitizedFileName, equalTo(null));
}

@Test
public void sanitizeFileName_shouldReplaceIllegalCharacters_Windows() {
FileNameSanitizer.setOsName("Windows 10");

String originalFileName = "illegal:/\\*?\"<>|.txt";
String sanitizedFileName = FileNameSanitizer.sanitizeFileName(originalFileName, '_');
assertThat(sanitizedFileName, equalTo("illegal_________.txt"));
}

@Test
public void sanitizeFileName_shouldReplaceIllegalCharacters_Unix() {
FileNameSanitizer.setOsName("Linux");

String originalFileName = "illegal:/\\*?\"<>|.txt";
String sanitizedFileName = FileNameSanitizer.sanitizeFileName(originalFileName, '_');
assertThat(sanitizedFileName, equalTo("illegal:_\\*?\"<>|.txt"));
}

@Test
public void sanitizeFileName_shouldHandleReservedNames_Windows() {
FileNameSanitizer.setOsName("Windows 10");

String originalFileName = "CON";
String sanitizedFileName = FileNameSanitizer.sanitizeFileName(originalFileName, '_');
assertThat(sanitizedFileName, equalTo("_CON_"));

originalFileName = "prn";
sanitizedFileName = FileNameSanitizer.sanitizeFileName(originalFileName, '_');
assertThat(sanitizedFileName, equalTo("_PRN_"));
}

@Test
public void sanitizeFileName_shouldNotChangeValidFileName() {
String originalFileName = "valid_filename.txt";
String sanitizedFileName = FileNameSanitizer.sanitizeFileName(originalFileName, '_');
assertThat(sanitizedFileName, equalTo("valid_filename.txt"));
}
}
Loading

0 comments on commit daac97f

Please sign in to comment.