Skip to content

Commit a13391b

Browse files
authored
Merge pull request #15802 from hvitved/dataflow/variable-capture-overlapping-paths
Variable capture: Avoid overlapping and false-positive data flow paths
2 parents daee22d + 24e35f6 commit a13391b

File tree

6 files changed

+89
-214
lines changed

6 files changed

+89
-214
lines changed

csharp/ql/test/library-tests/dataflow/global/DataFlowPath.expected

Lines changed: 0 additions & 86 deletions
Large diffs are not rendered by default.

csharp/ql/test/library-tests/dataflow/global/TaintTrackingPath.expected

Lines changed: 0 additions & 86 deletions
Large diffs are not rendered by default.

python/ql/test/experimental/dataflow/variable-capture/by_value.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def by_value1():
3434
a = SOURCE
3535
def inner(a_val=a):
3636
SINK(a_val) #$ captured
37-
SINK_F(a) #$ SPURIOUS: captured
37+
SINK_F(a)
3838
a = NONSOURCE
3939
inner()
4040

ruby/ql/test/library-tests/dataflow/global/Flow.expected

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
testFailures
22
edges
33
| blocks.rb:14:12:14:20 | call to source | blocks.rb:8:10:8:14 | yield ... | provenance | |
4-
| captured_variables.rb:9:24:9:24 | x | captured_variables.rb:10:10:10:23 | -> { ... } [captured x] | provenance | |
54
| captured_variables.rb:9:24:9:24 | x | captured_variables.rb:11:5:11:6 | fn [captured x] | provenance | |
6-
| captured_variables.rb:10:5:10:6 | fn [captured x] | captured_variables.rb:11:5:11:6 | fn [captured x] | provenance | |
7-
| captured_variables.rb:10:10:10:23 | -> { ... } [captured x] | captured_variables.rb:10:5:10:6 | fn [captured x] | provenance | |
85
| captured_variables.rb:11:5:11:6 | fn [captured x] | captured_variables.rb:10:20:10:20 | x | provenance | |
96
| captured_variables.rb:13:20:13:29 | call to taint | captured_variables.rb:9:24:9:24 | x | provenance | |
107
| captured_variables.rb:15:28:15:28 | x | captured_variables.rb:16:5:18:5 | -> { ... } [captured x] | provenance | |
@@ -16,18 +13,12 @@ edges
1613
| captured_variables.rb:27:25:27:57 | call to capture_escape_return2 [captured x] | captured_variables.rb:24:14:24:14 | x | provenance | |
1714
| captured_variables.rb:27:48:27:57 | call to taint | captured_variables.rb:22:28:22:28 | x | provenance | |
1815
| captured_variables.rb:27:48:27:57 | call to taint | captured_variables.rb:27:25:27:57 | call to capture_escape_return2 [captured x] | provenance | |
19-
| captured_variables.rb:29:33:29:33 | x | captured_variables.rb:30:10:32:5 | -> { ... } [captured x] | provenance | |
2016
| captured_variables.rb:29:33:29:33 | x | captured_variables.rb:33:29:33:30 | fn [captured x] | provenance | |
21-
| captured_variables.rb:30:5:30:6 | fn [captured x] | captured_variables.rb:33:29:33:30 | fn [captured x] | provenance | |
22-
| captured_variables.rb:30:10:32:5 | -> { ... } [captured x] | captured_variables.rb:30:5:30:6 | fn [captured x] | provenance | |
2317
| captured_variables.rb:33:29:33:30 | fn [captured x] | captured_variables.rb:31:14:31:14 | x | provenance | |
2418
| captured_variables.rb:35:29:35:38 | call to taint | captured_variables.rb:29:33:29:33 | x | provenance | |
2519
| captured_variables.rb:37:13:37:14 | fn [captured x] | captured_variables.rb:38:5:38:6 | fn [captured x] | provenance | |
2620
| captured_variables.rb:38:5:38:6 | fn [captured x] | captured_variables.rb:42:14:42:14 | x | provenance | |
27-
| captured_variables.rb:40:31:40:31 | x | captured_variables.rb:41:10:43:5 | -> { ... } [captured x] | provenance | |
2821
| captured_variables.rb:40:31:40:31 | x | captured_variables.rb:44:13:44:14 | fn [captured x] | provenance | |
29-
| captured_variables.rb:41:5:41:6 | fn [captured x] | captured_variables.rb:44:13:44:14 | fn [captured x] | provenance | |
30-
| captured_variables.rb:41:10:43:5 | -> { ... } [captured x] | captured_variables.rb:41:5:41:6 | fn [captured x] | provenance | |
3122
| captured_variables.rb:44:13:44:14 | fn [captured x] | captured_variables.rb:37:13:37:14 | fn [captured x] | provenance | |
3223
| captured_variables.rb:46:27:46:36 | call to taint | captured_variables.rb:40:31:40:31 | x | provenance | |
3324
| captured_variables.rb:48:5:48:12 | call to taint | captured_variables.rb:49:16:52:3 | do ... end [captured x] | provenance | |
@@ -65,11 +56,8 @@ edges
6556
| captured_variables.rb:83:6:83:8 | foo [@field] | captured_variables.rb:60:5:62:7 | self in get_field [@field] | provenance | |
6657
| captured_variables.rb:83:6:83:8 | foo [@field] | captured_variables.rb:83:6:83:18 | call to get_field | provenance | |
6758
| captured_variables.rb:83:6:83:8 | foo [@field] | instance_variables.rb:13:5:15:7 | self in get_field [@field] | provenance | |
68-
| captured_variables.rb:85:5:85:12 | call to taint | captured_variables.rb:86:6:89:1 | -> { ... } [captured y] | provenance | |
6959
| captured_variables.rb:85:5:85:12 | call to taint | captured_variables.rb:90:1:90:2 | fn [captured y] | provenance | |
7060
| captured_variables.rb:85:5:85:12 | call to taint | captured_variables.rb:91:6:91:6 | y | provenance | |
71-
| captured_variables.rb:86:1:86:2 | fn [captured y] | captured_variables.rb:90:1:90:2 | fn [captured y] | provenance | |
72-
| captured_variables.rb:86:6:89:1 | -> { ... } [captured y] | captured_variables.rb:86:1:86:2 | fn [captured y] | provenance | |
7361
| captured_variables.rb:88:9:88:16 | call to taint | captured_variables.rb:90:1:90:2 | [post] fn [captured y] | provenance | |
7462
| captured_variables.rb:90:1:90:2 | [post] fn [captured y] | captured_variables.rb:91:6:91:6 | y | provenance | |
7563
| captured_variables.rb:90:1:90:2 | fn [captured y] | captured_variables.rb:87:10:87:10 | y | provenance | |
@@ -81,18 +69,12 @@ edges
8169
| captured_variables.rb:101:11:101:11 | x | captured_variables.rb:104:31:104:31 | x | provenance | |
8270
| captured_variables.rb:104:17:104:24 | call to taint | captured_variables.rb:100:21:100:21 | x | provenance | |
8371
| captured_variables.rb:104:31:104:31 | x | captured_variables.rb:105:10:105:10 | x | provenance | |
84-
| captured_variables.rb:109:9:109:17 | call to taint | captured_variables.rb:110:14:116:5 | -> { ... } [captured x] | provenance | |
8572
| captured_variables.rb:109:9:109:17 | call to taint | captured_variables.rb:117:5:117:10 | middle [captured x] | provenance | |
8673
| captured_variables.rb:109:9:109:17 | call to taint | captured_variables.rb:118:10:118:10 | x | provenance | |
87-
| captured_variables.rb:110:5:110:10 | middle [captured x] | captured_variables.rb:117:5:117:10 | middle [captured x] | provenance | |
88-
| captured_variables.rb:110:14:116:5 | -> { ... } [captured x] | captured_variables.rb:110:5:110:10 | middle [captured x] | provenance | |
89-
| captured_variables.rb:111:9:111:13 | inner [captured x] | captured_variables.rb:115:9:115:13 | inner [captured x] | provenance | |
90-
| captured_variables.rb:111:17:114:9 | -> { ... } [captured x] | captured_variables.rb:111:9:111:13 | inner [captured x] | provenance | |
9174
| captured_variables.rb:113:17:113:25 | call to taint | captured_variables.rb:115:9:115:13 | [post] inner [captured x] | provenance | |
9275
| captured_variables.rb:115:9:115:13 | [post] inner [captured x] | captured_variables.rb:117:5:117:10 | [post] middle [captured x] | provenance | |
9376
| captured_variables.rb:115:9:115:13 | inner [captured x] | captured_variables.rb:112:18:112:18 | x | provenance | |
9477
| captured_variables.rb:117:5:117:10 | [post] middle [captured x] | captured_variables.rb:118:10:118:10 | x | provenance | |
95-
| captured_variables.rb:117:5:117:10 | middle [captured x] | captured_variables.rb:111:17:114:9 | -> { ... } [captured x] | provenance | |
9678
| captured_variables.rb:117:5:117:10 | middle [captured x] | captured_variables.rb:115:9:115:13 | inner [captured x] | provenance | |
9779
| captured_variables.rb:147:5:147:6 | [post] self [@x] | captured_variables.rb:153:14:155:7 | do ... end [captured self, @x] | provenance | |
9880
| captured_variables.rb:147:10:147:18 | call to taint | captured_variables.rb:147:5:147:6 | [post] self [@x] | provenance | |
@@ -116,10 +98,7 @@ edges
11698
| captured_variables.rb:194:1:194:1 | c [@x] | captured_variables.rb:185:5:189:7 | self in baz [@x] | provenance | |
11799
| captured_variables.rb:197:9:197:17 | call to taint | captured_variables.rb:199:10:199:10 | x | provenance | |
118100
| captured_variables.rb:206:13:206:21 | call to taint | captured_variables.rb:208:14:208:14 | x | provenance | |
119-
| captured_variables.rb:219:9:219:17 | call to taint | captured_variables.rb:222:11:224:5 | -> { ... } [captured x] | provenance | |
120101
| captured_variables.rb:219:9:219:17 | call to taint | captured_variables.rb:226:5:226:7 | fn1 [captured x] | provenance | |
121-
| captured_variables.rb:222:5:222:7 | fn1 [captured x] | captured_variables.rb:226:5:226:7 | fn1 [captured x] | provenance | |
122-
| captured_variables.rb:222:11:224:5 | -> { ... } [captured x] | captured_variables.rb:222:5:222:7 | fn1 [captured x] | provenance | |
123102
| captured_variables.rb:226:5:226:7 | [post] fn1 [captured y] | captured_variables.rb:227:10:227:10 | y | provenance | |
124103
| captured_variables.rb:226:5:226:7 | fn1 [captured x] | captured_variables.rb:226:5:226:7 | [post] fn1 [captured y] | provenance | |
125104
| instance_variables.rb:10:19:10:19 | x | instance_variables.rb:11:18:11:18 | x | provenance | |
@@ -264,8 +243,6 @@ nodes
264243
| blocks.rb:8:10:8:14 | yield ... | semmle.label | yield ... |
265244
| blocks.rb:14:12:14:20 | call to source | semmle.label | call to source |
266245
| captured_variables.rb:9:24:9:24 | x | semmle.label | x |
267-
| captured_variables.rb:10:5:10:6 | fn [captured x] | semmle.label | fn [captured x] |
268-
| captured_variables.rb:10:10:10:23 | -> { ... } [captured x] | semmle.label | -> { ... } [captured x] |
269246
| captured_variables.rb:10:20:10:20 | x | semmle.label | x |
270247
| captured_variables.rb:11:5:11:6 | fn [captured x] | semmle.label | fn [captured x] |
271248
| captured_variables.rb:13:20:13:29 | call to taint | semmle.label | call to taint |
@@ -281,16 +258,12 @@ nodes
281258
| captured_variables.rb:27:25:27:57 | call to capture_escape_return2 [captured x] | semmle.label | call to capture_escape_return2 [captured x] |
282259
| captured_variables.rb:27:48:27:57 | call to taint | semmle.label | call to taint |
283260
| captured_variables.rb:29:33:29:33 | x | semmle.label | x |
284-
| captured_variables.rb:30:5:30:6 | fn [captured x] | semmle.label | fn [captured x] |
285-
| captured_variables.rb:30:10:32:5 | -> { ... } [captured x] | semmle.label | -> { ... } [captured x] |
286261
| captured_variables.rb:31:14:31:14 | x | semmle.label | x |
287262
| captured_variables.rb:33:29:33:30 | fn [captured x] | semmle.label | fn [captured x] |
288263
| captured_variables.rb:35:29:35:38 | call to taint | semmle.label | call to taint |
289264
| captured_variables.rb:37:13:37:14 | fn [captured x] | semmle.label | fn [captured x] |
290265
| captured_variables.rb:38:5:38:6 | fn [captured x] | semmle.label | fn [captured x] |
291266
| captured_variables.rb:40:31:40:31 | x | semmle.label | x |
292-
| captured_variables.rb:41:5:41:6 | fn [captured x] | semmle.label | fn [captured x] |
293-
| captured_variables.rb:41:10:43:5 | -> { ... } [captured x] | semmle.label | -> { ... } [captured x] |
294267
| captured_variables.rb:42:14:42:14 | x | semmle.label | x |
295268
| captured_variables.rb:44:13:44:14 | fn [captured x] | semmle.label | fn [captured x] |
296269
| captured_variables.rb:46:27:46:36 | call to taint | semmle.label | call to taint |
@@ -323,8 +296,6 @@ nodes
323296
| captured_variables.rb:83:6:83:8 | foo [@field] | semmle.label | foo [@field] |
324297
| captured_variables.rb:83:6:83:18 | call to get_field | semmle.label | call to get_field |
325298
| captured_variables.rb:85:5:85:12 | call to taint | semmle.label | call to taint |
326-
| captured_variables.rb:86:1:86:2 | fn [captured y] | semmle.label | fn [captured y] |
327-
| captured_variables.rb:86:6:89:1 | -> { ... } [captured y] | semmle.label | -> { ... } [captured y] |
328299
| captured_variables.rb:87:10:87:10 | y | semmle.label | y |
329300
| captured_variables.rb:88:9:88:16 | call to taint | semmle.label | call to taint |
330301
| captured_variables.rb:90:1:90:2 | [post] fn [captured y] | semmle.label | [post] fn [captured y] |
@@ -341,10 +312,6 @@ nodes
341312
| captured_variables.rb:104:31:104:31 | x | semmle.label | x |
342313
| captured_variables.rb:105:10:105:10 | x | semmle.label | x |
343314
| captured_variables.rb:109:9:109:17 | call to taint | semmle.label | call to taint |
344-
| captured_variables.rb:110:5:110:10 | middle [captured x] | semmle.label | middle [captured x] |
345-
| captured_variables.rb:110:14:116:5 | -> { ... } [captured x] | semmle.label | -> { ... } [captured x] |
346-
| captured_variables.rb:111:9:111:13 | inner [captured x] | semmle.label | inner [captured x] |
347-
| captured_variables.rb:111:17:114:9 | -> { ... } [captured x] | semmle.label | -> { ... } [captured x] |
348315
| captured_variables.rb:112:18:112:18 | x | semmle.label | x |
349316
| captured_variables.rb:113:17:113:25 | call to taint | semmle.label | call to taint |
350317
| captured_variables.rb:115:9:115:13 | [post] inner [captured x] | semmle.label | [post] inner [captured x] |
@@ -380,8 +347,6 @@ nodes
380347
| captured_variables.rb:206:13:206:21 | call to taint | semmle.label | call to taint |
381348
| captured_variables.rb:208:14:208:14 | x | semmle.label | x |
382349
| captured_variables.rb:219:9:219:17 | call to taint | semmle.label | call to taint |
383-
| captured_variables.rb:222:5:222:7 | fn1 [captured x] | semmle.label | fn1 [captured x] |
384-
| captured_variables.rb:222:11:224:5 | -> { ... } [captured x] | semmle.label | -> { ... } [captured x] |
385350
| captured_variables.rb:226:5:226:7 | [post] fn1 [captured y] | semmle.label | [post] fn1 [captured y] |
386351
| captured_variables.rb:226:5:226:7 | fn1 [captured x] | semmle.label | fn1 [captured x] |
387352
| captured_variables.rb:227:10:227:10 | y | semmle.label | y |

ruby/ql/test/library-tests/dataflow/global/captured_variables.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,17 @@ def multi_capture
228228
end
229229

230230
multi_capture
231+
232+
def m1
233+
x = taint(19)
234+
235+
fn1 = -> {
236+
sink x
237+
}
238+
239+
x = nil
240+
241+
fn1.call()
242+
end
243+
244+
m1

shared/dataflow/codeql/dataflow/VariableCapture.qll

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -601,16 +601,22 @@ module Flow<LocationSig Location, InputSig<Location> Input> implements OutputSig
601601
* observed in a similarly synthesized post-update node for this read of `v`.
602602
*/
603603
private predicate synthRead(
604-
CapturedVariable v, BasicBlock bb, int i, boolean topScope, Expr closure
604+
CapturedVariable v, BasicBlock bb, int i, boolean topScope, Expr closure, boolean alias
605605
) {
606606
exists(ClosureExpr ce | closureCaptures(ce, v) |
607-
ce.hasCfgNode(bb, i) and ce = closure
607+
ce.hasCfgNode(bb, i) and ce = closure and alias = false
608608
or
609-
localOrNestedClosureAccess(ce, closure, bb, i)
609+
localOrNestedClosureAccess(ce, closure, bb, i) and alias = true
610610
) and
611611
if v.getCallable() != bb.getEnclosingCallable() then topScope = false else topScope = true
612612
}
613613

614+
private predicate synthRead(
615+
CapturedVariable v, BasicBlock bb, int i, boolean topScope, Expr closure
616+
) {
617+
synthRead(v, bb, i, topScope, closure, _)
618+
}
619+
614620
/**
615621
* Holds if there is an access of a captured variable inside a closure in the
616622
* `i`th node of `bb`, such that we need to synthesize a `this.` qualifier.
@@ -919,16 +925,22 @@ module Flow<LocationSig Location, InputSig<Location> Input> implements OutputSig
919925
)
920926
}
921927

922-
predicate storeStep(ClosureNode node1, CapturedVariable v, ClosureNode node2) {
923-
// store v in the closure or in the malloc in case of a relevant constructor call
928+
private predicate storeStepClosure(
929+
ClosureNode node1, CapturedVariable v, ClosureNode node2, boolean alias
930+
) {
924931
exists(BasicBlock bb, int i, Expr closure |
925-
synthRead(v, bb, i, _, closure) and
932+
synthRead(v, bb, i, _, closure, alias) and
926933
node1 = TSynthRead(v, bb, i, false)
927934
|
928935
node2 = TExprNode(closure, false)
929936
or
930937
node2 = TMallocNode(closure) and hasConstructorCapture(closure, v)
931938
)
939+
}
940+
941+
predicate storeStep(ClosureNode node1, CapturedVariable v, ClosureNode node2) {
942+
// store v in the closure or in the malloc in case of a relevant constructor call
943+
storeStepClosure(node1, v, node2, _)
932944
or
933945
// write to v inside the closure body
934946
exists(BasicBlock bb, int i, VariableWrite vw |
@@ -964,6 +976,62 @@ module Flow<LocationSig Location, InputSig<Location> Input> implements OutputSig
964976
}
965977

966978
predicate clearsContent(ClosureNode node, CapturedVariable v) {
979+
/*
980+
* Stores into closure aliases block flow from previous stores, both to
981+
* avoid overlapping data flow paths, but also to avoid false positive
982+
* flow.
983+
*
984+
* Example 1 (overlapping paths):
985+
*
986+
* ```rb
987+
* def m
988+
* x = taint
989+
*
990+
* fn = -> { # (1)
991+
* sink x
992+
* }
993+
*
994+
* fn.call # (2)
995+
* ```
996+
*
997+
* If we don't clear `x` at `fn` (2), we will have two overlapping paths:
998+
*
999+
* ```
1000+
* taint -> fn (2) [captured x]
1001+
* taint -> fn (1) [captured x] -> fn (2) [captured x]
1002+
* ```
1003+
*
1004+
* where the step `fn (1) [captured x] -> fn [captured x]` arises from normal
1005+
* use-use flow for `fn`. Clearing `x` at `fn` (2) removes the second path above.
1006+
*
1007+
* Example 2 (false positive flow):
1008+
*
1009+
* ```rb
1010+
* def m
1011+
* x = taint
1012+
*
1013+
* fn = -> { # (1)
1014+
* sink x
1015+
* }
1016+
*
1017+
* x = nil # (2)
1018+
*
1019+
* fn.call # (3)
1020+
* end
1021+
* ```
1022+
*
1023+
* If we don't clear `x` at `fn` (3), we will have the following false positive
1024+
* flow path:
1025+
*
1026+
* ```
1027+
* taint -> fn (1) [captured x] -> fn (3) [captured x]
1028+
* ```
1029+
*
1030+
* since normal use-use flow for `fn` does not take the overwrite at (2) into account.
1031+
*/
1032+
1033+
storeStepClosure(_, v, node, true)
1034+
or
9671035
exists(BasicBlock bb, int i |
9681036
captureWrite(v, bb, i, false, _) and
9691037
node = TSynthThisQualifier(bb, i, false)

0 commit comments

Comments
 (0)