Skip to content

Commit ea5cd06

Browse files
committed
Zip bomb detection
1 parent 4964dfa commit ea5cd06

File tree

2 files changed

+51
-17
lines changed

2 files changed

+51
-17
lines changed

pom.xml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
<ahocorasick.version>0.6.3</ahocorasick.version>
4747
<commons.lang3.version>3.20.0</commons.lang3.version>
4848
<cache2k.version>2.6.1.Final</cache2k.version>
49-
<zt-zip.version>1.17</zt-zip.version>
5049
<seerapi-client.version>5.9</seerapi-client.version>
5150

5251
<!-- Test deps -->
@@ -107,12 +106,6 @@
107106
<scope>runtime</scope>
108107
</dependency>
109108

110-
<dependency>
111-
<groupId>org.zeroturnaround</groupId>
112-
<artifactId>zt-zip</artifactId>
113-
<version>${zt-zip.version}</version>
114-
</dependency>
115-
116109
<dependency>
117110
<groupId>com.imsweb</groupId>
118111
<artifactId>seerapi-client-java</artifactId>

src/main/java/com/imsweb/staging/ExternalStagingFileDataProvider.java

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
*/
44
package com.imsweb.staging;
55

6-
import java.io.BufferedReader;
6+
import java.io.BufferedInputStream;
7+
import java.io.ByteArrayOutputStream;
78
import java.io.IOException;
89
import java.io.InputStream;
9-
import java.io.InputStreamReader;
1010
import java.nio.charset.StandardCharsets;
1111
import java.nio.file.Files;
1212
import java.nio.file.Path;
@@ -15,7 +15,6 @@
1515
import java.util.HashSet;
1616
import java.util.Map;
1717
import java.util.Set;
18-
import java.util.stream.Collectors;
1918
import java.util.zip.ZipEntry;
2019
import java.util.zip.ZipInputStream;
2120

@@ -47,6 +46,11 @@ public class ExternalStagingFileDataProvider extends StagingDataProvider {
4746
private final Map<String, StagingSchema> _schemas = new HashMap<>();
4847
private final Map<String, GlossaryDefinition> _glossaryTerms = new HashMap<>();
4948

49+
private static final int THRESHOLD_ENTRIES = 10000;
50+
private static final long THRESHOLD_ENTRY_SIZE = 10L * 1024 * 1024;
51+
private static final long THRESHOLD_SIZE = 100_000_000; // 100 MB
52+
private static final double THRESHOLD_RATIO = 50;
53+
5054
/**
5155
* Constructor loads all schemas and sets up table cache
5256
* @param zipFilePath full path to algorithm zip file
@@ -80,11 +84,28 @@ public ExternalStagingFileDataProvider(InputStream is) throws IOException {
8084
init(is);
8185
}
8286

87+
private record EntryData(String json, long uncompressedSize) {}
88+
8389
/**
84-
* Read a zip entry from an inputstream and return as a byte array
90+
* Read a zip entry from an inputstream and return as a String
8591
*/
86-
private static String extractEntry(InputStream is) {
87-
return new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)).lines().collect(Collectors.joining("\n"));
92+
private static EntryData readEntrySafely(InputStream is) throws IOException {
93+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
94+
byte[] buffer = new byte[8192];
95+
long total = 0;
96+
int read;
97+
98+
while ((read = is.read(buffer)) != -1) {
99+
total += read;
100+
101+
if (total > THRESHOLD_ENTRY_SIZE)
102+
throw new IllegalStateException("Zip entry is too large; maximum permitted per entry is " + THRESHOLD_ENTRY_SIZE + " bytes");
103+
104+
baos.write(buffer, 0, read);
105+
}
106+
107+
String json = baos.toString(StandardCharsets.UTF_8);
108+
return new EntryData(json, total);
88109
}
89110

90111
/**
@@ -93,17 +114,37 @@ private static String extractEntry(InputStream is) {
93114
private void init(InputStream is) throws IOException {
94115
Set<String> algorithms = new HashSet<>();
95116
Set<String> versions = new HashSet<>();
117+
long totalSizeArchive = 0L;
118+
int totalEntries = 0;
96119

97120
TrieBuilder builder = Trie.builder().onlyWholeWords().ignoreCase();
98121

99-
try (ZipInputStream stream = new ZipInputStream(is)) {
122+
try (ZipInputStream stream = new ZipInputStream(new BufferedInputStream(is))) {
100123
ZipEntry entry;
101124
while ((entry = stream.getNextEntry()) != null) {
102125
if (entry.isDirectory() || !entry.getName().endsWith(".json"))
103126
continue;
104127

128+
totalEntries++;
129+
130+
if (totalEntries > THRESHOLD_ENTRIES)
131+
throw new IllegalStateException("Algorithm zip file has too many entries; maximum permitted is " + THRESHOLD_ENTRIES);
132+
133+
EntryData data = readEntrySafely(stream);
134+
135+
totalSizeArchive += data.uncompressedSize;
136+
if (totalSizeArchive > THRESHOLD_SIZE)
137+
throw new IllegalStateException("Algorithm zip file uncompressed size is too large; maximum permitted is " + THRESHOLD_SIZE + " bytes");
138+
139+
long compressedSize = entry.getCompressedSize(); // may be -1 if unknown
140+
if (compressedSize > 0) {
141+
double ratio = (double)data.uncompressedSize / (double)compressedSize;
142+
if (ratio > THRESHOLD_RATIO)
143+
throw new IllegalStateException("Zip entry compression ratio too high (" + ratio + "); potential zip bomb");
144+
}
145+
105146
if (entry.getName().startsWith("tables")) {
106-
StagingTable table = getMapper().reader().readValue(getMapper().getFactory().createParser(extractEntry(stream)), StagingTable.class);
147+
StagingTable table = getMapper().reader().readValue(getMapper().getFactory().createParser(data.json()), StagingTable.class);
107148

108149
initTable(table);
109150

@@ -113,7 +154,7 @@ private void init(InputStream is) throws IOException {
113154
_tables.put(table.getId(), table);
114155
}
115156
else if (entry.getName().startsWith("schemas")) {
116-
StagingSchema schema = getMapper().reader().readValue(getMapper().getFactory().createParser(extractEntry(stream)), StagingSchema.class);
157+
StagingSchema schema = getMapper().reader().readValue(getMapper().getFactory().createParser(data.json()), StagingSchema.class);
117158

118159
initSchema(schema);
119160

@@ -123,7 +164,7 @@ else if (entry.getName().startsWith("schemas")) {
123164
_schemas.put(schema.getId(), schema);
124165
}
125166
else if (entry.getName().startsWith("glossary")) {
126-
GlossaryDefinition glossary = getMapper().reader().readValue(getMapper().getFactory().createParser(extractEntry(stream)), GlossaryDefinition.class);
167+
GlossaryDefinition glossary = getMapper().reader().readValue(getMapper().getFactory().createParser(data.json()), GlossaryDefinition.class);
127168
_glossaryTerms.put(glossary.getName(), glossary);
128169
builder.addKeyword(glossary.getName());
129170
}

0 commit comments

Comments
 (0)