57
57
my $limitTime = (defined $query -> {timeLimit }) ? " timeout $query ->{timeLimit}s" : " " ;
58
58
my $ulimitMemory = (defined $query -> {memoryLimit }) ? " ulimit -m $memoryLimitInBytes ; ulimit -v $memoryLimitInBytes ;" : " " ;
59
59
60
- # Run each detector. Can re-use the input file.
60
+ my @patternsToTry = &expandPatternSpaceForDetectors($query -> {pattern });
61
+
62
+ # This will contain N_DETECTORS * scalar(@patternsToTry) opinions.
61
63
my @detectorOpinions ;
62
- &log (" Applying detectors to pattern /$query ->{pattern}/" );
63
- for my $d (@DETECTORS ) {
64
- &log (" Querying detector $d ->{name}" );
65
- my $t0 = [gettimeofday];
66
- my $stderrFile = " /tmp/detect-vuln-$$ -stderr" ;
67
- my ($rc , $out ) = &cmd(" $ulimitMemory $limitTime $d ->{driver} $patternFile 2>$stderrFile " );
68
- my $elapsed = tv_interval($t0 );
69
- chomp $out ;
70
-
71
- # Clean up in case there was a timeout.
72
- my $stderr = &readFile(" file" => $stderrFile );
73
- my @filesToClean = ($stderr =~ m / CLEANUP: (\S +)/ g );
74
- &log (" Cleaning up @filesToClean " );
75
- unlink @filesToClean ;
76
- unlink $stderrFile ;
77
-
78
- my $opinion = { " name" => $d -> {name },
79
- " secToDecide" => sprintf (" %.4f" , $elapsed ),
80
- };
81
-
82
- if ($rc eq 124) {
83
- &log (" Detector $d ->{name} timed out" );
84
- $opinion -> {hasOpinion } = 0;
85
- $opinion -> {opinion } = " TIMEOUT" ;
86
- }
87
- elsif ($rc ) {
88
- &log (" Detector $d ->{name} said rc $rc " );
89
- $opinion -> {hasOpinion } = 0;
90
- $opinion -> {opinion } = " INTERNAL-ERROR" ;
91
- }
92
- else {
93
- &log (" Detector $d ->{name} said: $out " );
94
- my $result = decode_json($out );
95
- # Extract the details needed to make the summary.
96
- # Otherwise we repeat ourselves too much.
97
- $opinion -> {hasOpinion } = 1;
98
- $opinion -> {opinion } = $result -> {opinion };
64
+ # Try each pattern.
65
+ for my $pattern (@patternsToTry ) {
66
+ &log (" Applying detectors to pattern /$pattern /" );
67
+
68
+ # Craft query file.
69
+ my $newQuery = decode_json(encode_json($query ));
70
+ $newQuery -> {pattern } = $pattern ;
71
+ my $tmpPatternFile = &makeQueryFile($newQuery );
72
+
73
+ # Ask each detector.
74
+ for my $d (@DETECTORS ) {
75
+ &log (" Querying detector $d ->{name}" );
76
+ my $t0 = [gettimeofday];
77
+ my $stderrFile = " /tmp/detect-vuln-$$ -stderr" ;
78
+ my ($rc , $out ) = &cmd(" $ulimitMemory $limitTime $d ->{driver} $tmpPatternFile 2>$stderrFile " );
79
+ my $elapsed = tv_interval($t0 );
80
+ chomp $out ;
81
+
82
+ # Clean up in case there was a timeout.
83
+ my $stderr = &readFile(" file" => $stderrFile );
84
+ my @filesToClean = ($stderr =~ m / CLEANUP: (\S +)/ g );
85
+ &log (" Cleaning up @filesToClean " );
86
+ unlink @filesToClean ;
87
+ unlink $stderrFile ;
88
+
89
+ my $opinion = { " name" => $d -> {name },
90
+ " secToDecide" => sprintf (" %.4f" , $elapsed ),
91
+ };
92
+
93
+ if ($rc eq 124) {
94
+ &log (" Detector $d ->{name} timed out" );
95
+ $opinion -> {hasOpinion } = 0;
96
+ $opinion -> {opinion } = " TIMEOUT" ;
97
+ }
98
+ elsif ($rc ) {
99
+ &log (" Detector $d ->{name} said rc $rc " );
100
+ $opinion -> {hasOpinion } = 0;
101
+ $opinion -> {opinion } = " INTERNAL-ERROR" ;
102
+ }
103
+ else {
104
+ &log (" Detector $d ->{name} said: $out " );
105
+ my $result = decode_json($out );
106
+ # Extract the details needed to make the summary.
107
+ # Otherwise we repeat ourselves too much.
108
+ $opinion -> {hasOpinion } = 1;
109
+ $opinion -> {opinion } = $result -> {opinion };
110
+
111
+ # Note the pattern we queried about, so we can distinguish from the original.
112
+ $opinion -> {patternVariant } = $pattern ;
113
+ }
114
+
115
+ push @detectorOpinions , $opinion ;
99
116
}
100
117
101
- push @detectorOpinions , $opinion ;
118
+ unlink $tmpPatternFile ;
102
119
}
103
120
104
121
$query -> {detectorOpinions } = \@detectorOpinions ;
@@ -149,6 +166,13 @@ sub getDetectors {
149
166
return @detectors ;
150
167
}
151
168
169
+ sub makeQueryFile {
170
+ my ($query ) = @_ ;
171
+ my $tmpFile = " /tmp/detect-vuln-$$ .json" ;
172
+ &writeToFile(" file" => $tmpFile , " contents" => encode_json($query ));
173
+ return $tmpFile ;
174
+ }
175
+
152
176
# input: (\@list, $e)
153
177
# output: true if $e is in @list, else false
154
178
sub listContains {
@@ -173,3 +197,50 @@ sub readFile {
173
197
174
198
return $contents ;
175
199
}
200
+
201
+ # input: %args: keys: file contents
202
+ # output: $file
203
+ sub writeToFile {
204
+ my %args = @_ ;
205
+
206
+ open (my $fh , ' >' , $args {file });
207
+ print $fh $args {contents };
208
+ close $fh ;
209
+
210
+ return $args {file };
211
+ }
212
+
213
+ sub expandPatternSpaceForDetectors {
214
+ my ($pattern ) = @_ ;
215
+
216
+ my @patternsToTry = ($pattern );
217
+
218
+ # If pattern is unanchored, a backtracking regex engine will run the loop:
219
+ # for (1 .. n):
220
+ # _match(regex, substr)
221
+ # This means that if each match is linear-time, the worst-case behavior is quadratic.
222
+ # For example, /a+$/ is quadratic in Node.js.
223
+ # The detectors don't seem to acknowledge this loop.
224
+ # We can simulate it by prefixing un-anchored regexes with '^(.*?)'.
225
+ # This is also how a linear-time engine scans all starting indices in parallel; see Cox's writings.
226
+ if (substr ($query -> {pattern }, 0, 1) ne " ^" ) {
227
+ my $anchoredPattern = " ^(.*?)$query ->{pattern}" ;
228
+ push @patternsToTry , $anchoredPattern ;
229
+ }
230
+
231
+ # If pattern contains curlies "{\d*,\d*}", the detectors may time out due to graph expansion.
232
+ # We can try a more general pattern with "*" and "+" instead.
233
+ # The detectors might give false positives but that's OK, that's what the validate stage is for.
234
+ # I'm not being careful about escaped curly braces, so let's hope there are no meta-regexes here.
235
+ my $genericCurlies = $query -> {pattern };
236
+ # {0, and {, both mean "0 or more"
237
+ $genericCurlies =~ s / {0,\d *}/ \* / g ;
238
+ $genericCurlies =~ s / {,\d *}/ \* / g ;
239
+ # {[1-9] means "1 or more"
240
+ $genericCurlies =~ s / {[1-9]\d *,\d *}/ \+ / g ;
241
+ if ($genericCurlies ne $pattern ) {
242
+ push @patternsToTry , $genericCurlies ;
243
+ }
244
+
245
+ return @patternsToTry ;
246
+ }
0 commit comments