Skip to content

Commit 23f6b2a

Browse files
authored
Merge pull request davisjam#49 from davisjam/DetectorEnhancement_AnchoredRegex
detect-vuln: test on expanded pattern space
2 parents 3a9b580 + 9075951 commit 23f6b2a

File tree

1 file changed

+110
-39
lines changed

1 file changed

+110
-39
lines changed

src/detect/detect-vuln.pl

Lines changed: 110 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -57,48 +57,65 @@
5757
my $limitTime = (defined $query->{timeLimit}) ? "timeout $query->{timeLimit}s" : "";
5858
my $ulimitMemory = (defined $query->{memoryLimit}) ? "ulimit -m $memoryLimitInBytes; ulimit -v $memoryLimitInBytes;" : "";
5959

60-
# Run each detector. Can re-use the input file.
60+
my @patternsToTry = &expandPatternSpaceForDetectors($query->{pattern});
61+
62+
# This will contain N_DETECTORS * scalar(@patternsToTry) opinions.
6163
my @detectorOpinions;
62-
&log("Applying detectors to pattern /$query->{pattern}/");
63-
for my $d (@DETECTORS) {
64-
&log("Querying detector $d->{name}");
65-
my $t0 = [gettimeofday];
66-
my $stderrFile = "/tmp/detect-vuln-$$-stderr";
67-
my ($rc, $out) = &cmd("$ulimitMemory $limitTime $d->{driver} $patternFile 2>$stderrFile");
68-
my $elapsed = tv_interval($t0);
69-
chomp $out;
70-
71-
# Clean up in case there was a timeout.
72-
my $stderr = &readFile("file"=>$stderrFile);
73-
my @filesToClean = ($stderr =~ m/CLEANUP: (\S+)/g);
74-
&log("Cleaning up @filesToClean");
75-
unlink @filesToClean;
76-
unlink $stderrFile;
77-
78-
my $opinion = { "name" => $d->{name},
79-
"secToDecide" => sprintf("%.4f", $elapsed),
80-
};
81-
82-
if ($rc eq 124) {
83-
&log("Detector $d->{name} timed out");
84-
$opinion->{hasOpinion} = 0;
85-
$opinion->{opinion} = "TIMEOUT";
86-
}
87-
elsif ($rc) {
88-
&log("Detector $d->{name} said rc $rc");
89-
$opinion->{hasOpinion} = 0;
90-
$opinion->{opinion} = "INTERNAL-ERROR";
91-
}
92-
else {
93-
&log("Detector $d->{name} said: $out");
94-
my $result = decode_json($out);
95-
# Extract the details needed to make the summary.
96-
# Otherwise we repeat ourselves too much.
97-
$opinion->{hasOpinion} = 1;
98-
$opinion->{opinion} = $result->{opinion};
64+
# Try each pattern.
65+
for my $pattern (@patternsToTry) {
66+
&log("Applying detectors to pattern /$pattern/");
67+
68+
# Craft query file.
69+
my $newQuery = decode_json(encode_json($query));
70+
$newQuery->{pattern} = $pattern;
71+
my $tmpPatternFile = &makeQueryFile($newQuery);
72+
73+
# Ask each detector.
74+
for my $d (@DETECTORS) {
75+
&log("Querying detector $d->{name}");
76+
my $t0 = [gettimeofday];
77+
my $stderrFile = "/tmp/detect-vuln-$$-stderr";
78+
my ($rc, $out) = &cmd("$ulimitMemory $limitTime $d->{driver} $tmpPatternFile 2>$stderrFile");
79+
my $elapsed = tv_interval($t0);
80+
chomp $out;
81+
82+
# Clean up in case there was a timeout.
83+
my $stderr = &readFile("file"=>$stderrFile);
84+
my @filesToClean = ($stderr =~ m/CLEANUP: (\S+)/g);
85+
&log("Cleaning up @filesToClean");
86+
unlink @filesToClean;
87+
unlink $stderrFile;
88+
89+
my $opinion = { "name" => $d->{name},
90+
"secToDecide" => sprintf("%.4f", $elapsed),
91+
};
92+
93+
if ($rc eq 124) {
94+
&log("Detector $d->{name} timed out");
95+
$opinion->{hasOpinion} = 0;
96+
$opinion->{opinion} = "TIMEOUT";
97+
}
98+
elsif ($rc) {
99+
&log("Detector $d->{name} said rc $rc");
100+
$opinion->{hasOpinion} = 0;
101+
$opinion->{opinion} = "INTERNAL-ERROR";
102+
}
103+
else {
104+
&log("Detector $d->{name} said: $out");
105+
my $result = decode_json($out);
106+
# Extract the details needed to make the summary.
107+
# Otherwise we repeat ourselves too much.
108+
$opinion->{hasOpinion} = 1;
109+
$opinion->{opinion} = $result->{opinion};
110+
111+
# Note the pattern we queried about, so we can distinguish from the original.
112+
$opinion->{patternVariant} = $pattern;
113+
}
114+
115+
push @detectorOpinions, $opinion;
99116
}
100117

101-
push @detectorOpinions, $opinion;
118+
unlink $tmpPatternFile;
102119
}
103120

104121
$query->{detectorOpinions} = \@detectorOpinions;
@@ -149,6 +166,13 @@ sub getDetectors {
149166
return @detectors;
150167
}
151168

169+
sub makeQueryFile {
170+
my ($query) = @_;
171+
my $tmpFile = "/tmp/detect-vuln-$$.json";
172+
&writeToFile("file"=>$tmpFile, "contents"=>encode_json($query));
173+
return $tmpFile;
174+
}
175+
152176
# input: (\@list, $e)
153177
# output: true if $e is in @list, else false
154178
sub listContains {
@@ -173,3 +197,50 @@ sub readFile {
173197

174198
return $contents;
175199
}
200+
201+
# input: %args: keys: file contents
202+
# output: $file
203+
sub writeToFile {
204+
my %args = @_;
205+
206+
open(my $fh, '>', $args{file});
207+
print $fh $args{contents};
208+
close $fh;
209+
210+
return $args{file};
211+
}
212+
213+
sub expandPatternSpaceForDetectors {
214+
my ($pattern) = @_;
215+
216+
my @patternsToTry = ($pattern);
217+
218+
# If pattern is unanchored, a backtracking regex engine will run the loop:
219+
# for (1 .. n):
220+
# _match(regex, substr)
221+
# This means that if each match is linear-time, the worst-case behavior is quadratic.
222+
# For example, /a+$/ is quadratic in Node.js.
223+
# The detectors don't seem to acknowledge this loop.
224+
# We can simulate it by prefixing un-anchored regexes with '^(.*?)'.
225+
# This is also how a linear-time engine scans all starting indices in parallel; see Cox's writings.
226+
if (substr($query->{pattern}, 0, 1) ne "^") {
227+
my $anchoredPattern = "^(.*?)$query->{pattern}";
228+
push @patternsToTry, $anchoredPattern;
229+
}
230+
231+
# If pattern contains curlies "{\d*,\d*}", the detectors may time out due to graph expansion.
232+
# We can try a more general pattern with "*" and "+" instead.
233+
# The detectors might give false positives but that's OK, that's what the validate stage is for.
234+
# I'm not being careful about escaped curly braces, so let's hope there are no meta-regexes here.
235+
my $genericCurlies = $query->{pattern};
236+
# {0, and {, both mean "0 or more"
237+
$genericCurlies =~ s/{0,\d*}/\*/g;
238+
$genericCurlies =~ s/{,\d*}/\*/g;
239+
# {[1-9] means "1 or more"
240+
$genericCurlies =~ s/{[1-9]\d*,\d*}/\+/g;
241+
if ($genericCurlies ne $pattern) {
242+
push @patternsToTry, $genericCurlies;
243+
}
244+
245+
return @patternsToTry;
246+
}

0 commit comments

Comments
 (0)