Skip to content

Commit 8486589

Browse files
authored
[analyzer] Limit isTainted() by skipping complicated symbols (llvm#105493)
As discussed in https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570/10 Some `isTainted()` queries can blow up the analysis times, and effectively halt the analysis under specific workloads. We don't really have the time now to do a caching re-implementation of `isTainted()`, so we need to workaround the case. The workaround with the smallest blast radius was to limit what symbols `isTainted()` does the query (by walking the SymExpr). So far, the threshold 10 worked for us, but this value can be overridden using the "max-tainted-symbol-complexity" config value. This new option is "deprecated" from the getgo, as I expect this issue to be fixed within the next few months and I don't want users to override this value anyways. If they do, this message will let them know that they are on their own, and the next release may break them (as we no longer recognize this option if we drop it). Mitigates llvm#89720 CPP-5414
1 parent ad435bc commit 8486589

File tree

4 files changed

+61
-1
lines changed

4 files changed

+61
-1
lines changed

clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def

+5
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,11 @@ ANALYZER_OPTION(
407407
ANALYZER_OPTION(unsigned, MaxSymbolComplexity, "max-symbol-complexity",
408408
"The maximum complexity of symbolic constraint.", 35)
409409

410+
// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
411+
// Ideally, we should get rid of this option soon.
412+
ANALYZER_OPTION(unsigned, MaxTaintedSymbolComplexity, "max-tainted-symbol-complexity",
413+
"[DEPRECATED] The maximum complexity of a symbol to carry taint", 9)
414+
410415
ANALYZER_OPTION(unsigned, MaxTimesInlineLarge, "max-times-inline-large",
411416
"The maximum times a large function could be inlined.", 32)
412417

clang/lib/StaticAnalyzer/Checkers/Taint.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "clang/StaticAnalyzer/Checkers/Taint.h"
1414
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
15+
#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
1516
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
1617
#include <optional>
1718

@@ -256,6 +257,12 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
256257
if (!Sym)
257258
return TaintedSymbols;
258259

260+
// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
261+
if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
262+
Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
263+
return {};
264+
}
265+
259266
// Traverse all the symbols this symbol depends on to see if any are tainted.
260267
for (SymbolRef SubSym : Sym->symbols()) {
261268
if (!isa<SymbolData>(SubSym))

clang/test/Analysis/analyzer-config.c

+1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
// CHECK-NEXT: max-inlinable-size = 100
9595
// CHECK-NEXT: max-nodes = 225000
9696
// CHECK-NEXT: max-symbol-complexity = 35
97+
// CHECK-NEXT: max-tainted-symbol-complexity = 9
9798
// CHECK-NEXT: max-times-inline-large = 32
9899
// CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
99100
// CHECK-NEXT: mode = deep

clang/test/Analysis/taint-generic.c

+48-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ void clang_analyzer_isTainted_char(char);
6363
void clang_analyzer_isTainted_wchar(wchar_t);
6464
void clang_analyzer_isTainted_charp(char*);
6565
void clang_analyzer_isTainted_int(int);
66+
void clang_analyzer_dump_int(int);
6667

6768
int coin();
6869

@@ -459,7 +460,53 @@ unsigned radar11369570_hanging(const unsigned char *arr, int l) {
459460
longcmp(a, t, c);
460461
l -= 12;
461462
}
462-
return 5/a; // expected-warning {{Division by a tainted value, possibly zero}}
463+
return 5/a; // FIXME: Should be a "div by tainted" warning here.
464+
}
465+
466+
// This computation used to take a very long time.
467+
void complex_taint_queries(const int *p) {
468+
int tainted = 0;
469+
scanf("%d", &tainted);
470+
471+
// Make "tmp" tainted.
472+
int tmp = tainted + tainted;
473+
clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}}
474+
475+
// Make "tmp" SymExpr a lot more complicated by applying computation.
476+
// This should balloon the symbol complexity.
477+
tmp += p[0] + p[0];
478+
tmp += p[1] + p[1];
479+
tmp += p[2] + p[2];
480+
clang_analyzer_dump_int(tmp); // expected-warning{{((((conj_}} symbol complexity: 8
481+
clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}}
482+
483+
tmp += p[3] + p[3];
484+
clang_analyzer_dump_int(tmp); // expected-warning{{(((((conj_}} symbol complexity: 10
485+
clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} 10 is already too complex to be traversed
486+
487+
tmp += p[4] + p[4];
488+
tmp += p[5] + p[5];
489+
tmp += p[6] + p[6];
490+
tmp += p[7] + p[7];
491+
tmp += p[8] + p[8];
492+
tmp += p[9] + p[9];
493+
tmp += p[10] + p[10];
494+
tmp += p[11] + p[11];
495+
tmp += p[12] + p[12];
496+
tmp += p[13] + p[13];
497+
tmp += p[14] + p[14];
498+
tmp += p[15] + p[15];
499+
500+
// The SymExpr still holds the full history of the computation, yet, "isTainted" doesn't traverse the tree as the complexity is over the threshold.
501+
clang_analyzer_dump_int(tmp);
502+
// expected-warning@-1{{(((((((((((((((((conj_}} symbol complexity: 34
503+
clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} FIXME: Ideally, this should still result in "tainted".
504+
505+
// By making it even one step more complex, then it would hit the "max-symbol-complexity"
506+
// threshold and the engine would cut the SymExpr and replace it by a new conjured symbol.
507+
tmp += p[16];
508+
clang_analyzer_dump_int(tmp); // expected-warning{{conj_}} symbol complexity: 1
509+
clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}}
463510
}
464511

465512
// Check that we do not assert of the following code.

0 commit comments

Comments
 (0)