Skip to content

C++: data flow through partial chi operands where type is known #2676

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ private predicate instructionTaintStep(Instruction i1, Instruction i2) {
or
i2.(UnaryInstruction).getUnary() = i1
or
i2.(ChiInstruction).getPartial() = i1 and
not isChiForAllAliasedMemory(i2)
or
exists(BinaryInstruction bin |
bin = i2 and
predictableInstruction(i2.getAnOperand().getDef()) and
Expand Down Expand Up @@ -205,6 +208,19 @@ private predicate modelTaintToParameter(Function f, int parameterIn, int paramet
)
}

/**
* Holds if `chi` is on the chain of chi-instructions for all aliased memory.
* Taint shoud not pass through these instructions since they tend to mix up
* unrelated objects.
*/
private predicate isChiForAllAliasedMemory(Instruction instr) {
instr.(ChiInstruction).getTotal() instanceof AliasedDefinitionInstruction
or
isChiForAllAliasedMemory(instr.(ChiInstruction).getTotal())
or
isChiForAllAliasedMemory(instr.(PhiInstruction).getAnInput())
}

private predicate modelTaintToReturnValue(Function f, int parameterIn) {
// Taint flow from parameter to return value
exists(FunctionInput modelIn, FunctionOutput modelOut |
Expand Down
21 changes: 12 additions & 9 deletions cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,17 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
}

private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo) {
iTo.(CopyInstruction).getSourceValue() = iFrom or
iTo.(PhiInstruction).getAnOperand().getDef() = iFrom or
iTo.(CopyInstruction).getSourceValue() = iFrom
or
iTo.(PhiInstruction).getAnOperand().getDef() = iFrom
or
// Treat all conversions as flow, even conversions between different numeric types.
iTo.(ConvertInstruction).getUnary() = iFrom or
iTo.(CheckedConvertOrNullInstruction).getUnary() = iFrom or
iTo.(InheritanceConversionInstruction).getUnary() = iFrom or
iTo.(ConvertInstruction).getUnary() = iFrom
or
iTo.(CheckedConvertOrNullInstruction).getUnary() = iFrom
or
iTo.(InheritanceConversionInstruction).getUnary() = iFrom
or
// A chi instruction represents a point where a new value (the _partial_
// operand) may overwrite an old value (the _total_ operand), but the alias
// analysis couldn't determine that it surely will overwrite every bit of it or
Expand All @@ -279,10 +284,8 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction
// due to shortcomings of the alias analysis. We may get false flow in cases
// where the data is indeed overwritten.
//
// Allowing flow through the partial operand would be more noisy, especially
// for variables that have escaped: for soundness, the IR has to assume that
// every write to an unknown address can affect every escaped variable, and
// this assumption shows up as data flowing through partial chi operands.
// Flow through the partial operand belongs in the taint-tracking libraries
// for now.
iTo.getAnOperand().(ChiTotalOperand).getDef() = iFrom
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ int main(int argc, char *argv[]) {
char buf[100] = "VAR = ";
sink(strcat(buf, getenv("VAR")));

sink(buf); // BUG: no taint
sink(untainted_buf); // the two buffers would be conflated if we added flow through partial chi inputs
sink(buf);
sink(untainted_buf); // the two buffers would be conflated if we added flow through all partial chi inputs

return 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:8:22:33 | (const char *)... |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:20:22:25 | call to getenv |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:20:22:32 | (const char *)... |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:24:8:24:10 | buf |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | test_diff.cpp:1:11:1:20 | p#0 |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:32:11:32:26 | p#0 |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:38:11:38:21 | env_pointer |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:3:21:3:22 | s1 | AST only |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:21:8:21:10 | buf | AST only |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:15:22:17 | buf | AST only |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:24:8:24:10 | buf | AST only |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:31:40:31:53 | dotted_address | AST only |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:39:36:39:61 | (const char *)... | AST only |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:39:51:39:61 | env_pointer | AST only |
Expand Down