33 */
44package com .imsweb .staging ;
55
6- import java .io .BufferedReader ;
6+ import java .io .BufferedInputStream ;
7+ import java .io .ByteArrayOutputStream ;
78import java .io .IOException ;
89import java .io .InputStream ;
9- import java .io .InputStreamReader ;
1010import java .nio .charset .StandardCharsets ;
1111import java .nio .file .Files ;
1212import java .nio .file .Path ;
1515import java .util .HashSet ;
1616import java .util .Map ;
1717import java .util .Set ;
18- import java .util .stream .Collectors ;
1918import java .util .zip .ZipEntry ;
2019import java .util .zip .ZipInputStream ;
2120
@@ -47,6 +46,11 @@ public class ExternalStagingFileDataProvider extends StagingDataProvider {
4746 private final Map <String , StagingSchema > _schemas = new HashMap <>();
4847 private final Map <String , GlossaryDefinition > _glossaryTerms = new HashMap <>();
4948
49+ private static final int THRESHOLD_ENTRIES = 10000 ;
50+ private static final long THRESHOLD_ENTRY_SIZE = 10L * 1024 * 1024 ;
51+ private static final long THRESHOLD_SIZE = 100_000_000 ; // 100 MB
52+ private static final double THRESHOLD_RATIO = 50 ;
53+
5054 /**
5155 * Constructor loads all schemas and sets up table cache
5256 * @param zipFilePath full path to algorithm zip file
@@ -80,11 +84,28 @@ public ExternalStagingFileDataProvider(InputStream is) throws IOException {
8084 init (is );
8185 }
8286
87+ private record EntryData (String json , long uncompressedSize ) {}
88+
8389 /**
84- * Read a zip entry from an inputstream and return as a byte array
90+ * Read a zip entry from an inputstream and return as a String
8591 */
86- private static String extractEntry (InputStream is ) {
87- return new BufferedReader (new InputStreamReader (is , StandardCharsets .UTF_8 )).lines ().collect (Collectors .joining ("\n " ));
92+ private static EntryData readEntrySafely (InputStream is ) throws IOException {
93+ ByteArrayOutputStream baos = new ByteArrayOutputStream ();
94+ byte [] buffer = new byte [8192 ];
95+ long total = 0 ;
96+ int read ;
97+
98+ while ((read = is .read (buffer )) != -1 ) {
99+ total += read ;
100+
101+ if (total > THRESHOLD_ENTRY_SIZE )
102+ throw new IllegalStateException ("Zip entry is too large; maximum permitted per entry is " + THRESHOLD_ENTRY_SIZE + " bytes" );
103+
104+ baos .write (buffer , 0 , read );
105+ }
106+
107+ String json = baos .toString (StandardCharsets .UTF_8 );
108+ return new EntryData (json , total );
88109 }
89110
90111 /**
@@ -93,17 +114,37 @@ private static String extractEntry(InputStream is) {
93114 private void init (InputStream is ) throws IOException {
94115 Set <String > algorithms = new HashSet <>();
95116 Set <String > versions = new HashSet <>();
117+ long totalSizeArchive = 0L ;
118+ int totalEntries = 0 ;
96119
97120 TrieBuilder builder = Trie .builder ().onlyWholeWords ().ignoreCase ();
98121
99- try (ZipInputStream stream = new ZipInputStream (is )) {
122+ try (ZipInputStream stream = new ZipInputStream (new BufferedInputStream ( is ) )) {
100123 ZipEntry entry ;
101124 while ((entry = stream .getNextEntry ()) != null ) {
102125 if (entry .isDirectory () || !entry .getName ().endsWith (".json" ))
103126 continue ;
104127
128+ totalEntries ++;
129+
130+ if (totalEntries > THRESHOLD_ENTRIES )
131+ throw new IllegalStateException ("Algorithm zip file has too many entries; maximum permitted is " + THRESHOLD_ENTRIES );
132+
133+ EntryData data = readEntrySafely (stream );
134+
135+ totalSizeArchive += data .uncompressedSize ;
136+ if (totalSizeArchive > THRESHOLD_SIZE )
137+ throw new IllegalStateException ("Algorithm zip file uncompressed size is too large; maximum permitted is " + THRESHOLD_SIZE + " bytes" );
138+
139+ long compressedSize = entry .getCompressedSize (); // may be -1 if unknown
140+ if (compressedSize > 0 ) {
141+ double ratio = (double )data .uncompressedSize / (double )compressedSize ;
142+ if (ratio > THRESHOLD_RATIO )
143+ throw new IllegalStateException ("Zip entry compression ratio too high (" + ratio + "); potential zip bomb" );
144+ }
145+
105146 if (entry .getName ().startsWith ("tables" )) {
106- StagingTable table = getMapper ().reader ().readValue (getMapper ().getFactory ().createParser (extractEntry ( stream )), StagingTable .class );
147+ StagingTable table = getMapper ().reader ().readValue (getMapper ().getFactory ().createParser (data . json ( )), StagingTable .class );
107148
108149 initTable (table );
109150
@@ -113,7 +154,7 @@ private void init(InputStream is) throws IOException {
113154 _tables .put (table .getId (), table );
114155 }
115156 else if (entry .getName ().startsWith ("schemas" )) {
116- StagingSchema schema = getMapper ().reader ().readValue (getMapper ().getFactory ().createParser (extractEntry ( stream )), StagingSchema .class );
157+ StagingSchema schema = getMapper ().reader ().readValue (getMapper ().getFactory ().createParser (data . json ( )), StagingSchema .class );
117158
118159 initSchema (schema );
119160
@@ -123,7 +164,7 @@ else if (entry.getName().startsWith("schemas")) {
123164 _schemas .put (schema .getId (), schema );
124165 }
125166 else if (entry .getName ().startsWith ("glossary" )) {
126- GlossaryDefinition glossary = getMapper ().reader ().readValue (getMapper ().getFactory ().createParser (extractEntry ( stream )), GlossaryDefinition .class );
167+ GlossaryDefinition glossary = getMapper ().reader ().readValue (getMapper ().getFactory ().createParser (data . json ( )), GlossaryDefinition .class );
127168 _glossaryTerms .put (glossary .getName (), glossary );
128169 builder .addKeyword (glossary .getName ());
129170 }
0 commit comments