Skip to content

Commit 5b5a52f

Browse files
authored
Merge pull request #9551 from yoff/python/port-tarslip
Approved by RasmusWL
2 parents 7dd095c + cf9b69b commit 5b5a52f

File tree

5 files changed

+206
-193
lines changed

5 files changed

+206
-193
lines changed
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for detecting
3+
* "tar slip"
4+
* vulnerabilities, as well as extension points for adding your own.
5+
*/
6+
7+
private import python
8+
private import semmle.python.dataflow.new.DataFlow
9+
private import semmle.python.Concepts
10+
private import semmle.python.dataflow.new.BarrierGuards
11+
private import semmle.python.ApiGraphs
12+
13+
/**
14+
* Provides default sources, sinks and sanitizers for detecting
15+
* "tar slip"
16+
* vulnerabilities, as well as extension points for adding your own.
17+
*/
18+
module TarSlip {
19+
/**
20+
* A data flow source for "tar slip" vulnerabilities.
21+
*/
22+
abstract class Source extends DataFlow::Node { }
23+
24+
/**
25+
* A data flow sink for "tar slip" vulnerabilities.
26+
*/
27+
abstract class Sink extends DataFlow::Node { }
28+
29+
/**
30+
* A sanitizer for "tar slip" vulnerabilities.
31+
*/
32+
abstract class Sanitizer extends DataFlow::Node { }
33+
34+
/**
35+
* A call to `tarfile.open`, considered as a flow source.
36+
*/
37+
class TarfileOpen extends Source {
38+
TarfileOpen() {
39+
this = API::moduleImport("tarfile").getMember("open").getACall() and
40+
// If argument refers to a string object, then it's a hardcoded path and
41+
// this tarfile is safe.
42+
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StrConst and
43+
// Ignore opens within the tarfile module itself
44+
not this.getLocation().getFile().getBaseName() = "tarfile.py"
45+
}
46+
}
47+
48+
/**
49+
* A sanitizer based on file name. This because we extract the standard library.
50+
*
51+
* For efficiency we don't want to track the flow of taint
52+
* around the tarfile module.
53+
*/
54+
class ExcludeTarFilePy extends Sanitizer {
55+
ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" }
56+
}
57+
58+
/**
59+
* A sink capturing method calls to `extractall`.
60+
*
61+
* For a call to `file.extractall` without arguments, `file` is considered a sink.
62+
*/
63+
class ExtractAllSink extends Sink {
64+
ExtractAllSink() {
65+
exists(DataFlow::CallCfgNode call |
66+
call =
67+
API::moduleImport("tarfile")
68+
.getMember("open")
69+
.getReturn()
70+
.getMember("extractall")
71+
.getACall() and
72+
not exists(call.getArg(_)) and
73+
not exists(call.getArgByName(_)) and
74+
this = call.(DataFlow::MethodCallNode).getObject()
75+
)
76+
}
77+
}
78+
79+
/**
80+
* An argument to `extract` is considered a sink.
81+
*/
82+
class ExtractSink extends Sink {
83+
ExtractSink() {
84+
exists(DataFlow::CallCfgNode call |
85+
call =
86+
API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and
87+
this = call.getArg(0)
88+
)
89+
}
90+
}
91+
92+
/** The `members` argument `extractall` is considered a sink. */
93+
class ExtractMembersSink extends Sink {
94+
ExtractMembersSink() {
95+
exists(DataFlow::CallCfgNode call |
96+
call =
97+
API::moduleImport("tarfile")
98+
.getMember("open")
99+
.getReturn()
100+
.getMember("extractall")
101+
.getACall() and
102+
this in [call.getArg(0), call.getArgByName("members")]
103+
)
104+
}
105+
}
106+
107+
/**
108+
* Holds if `g` clears taint for `tarInfo`.
109+
*
110+
* The test `if <check_path>(info.name)` should clear taint for `info`,
111+
* where `<check_path>` is any function matching `"%path"`.
112+
* `info` is assumed to be a `TarInfo` instance.
113+
*/
114+
predicate tarFileInfoSanitizer(DataFlow::GuardNode g, ControlFlowNode tarInfo, boolean branch) {
115+
exists(CallNode call, AttrNode attr |
116+
g = call and
117+
// We must test the name of the tar info object.
118+
attr = call.getAnArg() and
119+
attr.getName() = "name" and
120+
attr.getObject() = tarInfo
121+
|
122+
// The assumption that any test that matches %path is a sanitizer might be too broad.
123+
call.getAChild*().(AttrNode).getName().matches("%path")
124+
or
125+
call.getAChild*().(NameNode).getId().matches("%path")
126+
) and
127+
branch = false
128+
}
129+
130+
/**
131+
* A sanitizer guard heuristic.
132+
*
133+
* The test `if <check_path>(info.name)` should clear taint for `info`,
134+
* where `<check_path>` is any function matching `"%path"`.
135+
* `info` is assumed to be a `TarInfo` instance.
136+
*/
137+
class TarFileInfoSanitizer extends Sanitizer {
138+
TarFileInfoSanitizer() {
139+
this = DataFlow::BarrierGuard<tarFileInfoSanitizer/3>::getABarrierNode()
140+
}
141+
}
142+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/**
2+
* Provides a taint-tracking configuration for detecting "command injection" vulnerabilities.
3+
*
4+
* Note, for performance reasons: only import this file if
5+
* `TarSlip::Configuration` is needed, otherwise
6+
* `TarSlipCustomizations` should be imported instead.
7+
*/
8+
9+
private import python
10+
import semmle.python.dataflow.new.DataFlow
11+
import semmle.python.dataflow.new.TaintTracking
12+
import TarSlipCustomizations::TarSlip
13+
14+
/**
15+
* A taint-tracking configuration for detecting "command injection" vulnerabilities.
16+
*/
17+
class Configuration extends TaintTracking::Configuration {
18+
Configuration() { this = "TarSlip" }
19+
20+
override predicate isSource(DataFlow::Node source) { source instanceof Source }
21+
22+
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
23+
24+
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
25+
}

python/ql/src/Security/CWE-022/TarSlip.ql

Lines changed: 5 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -13,170 +13,10 @@
1313
*/
1414

1515
import python
16-
import semmle.python.security.Paths
17-
import semmle.python.dataflow.TaintTracking
18-
import semmle.python.security.strings.Basic
16+
import semmle.python.security.dataflow.TarSlipQuery
17+
import DataFlow::PathGraph
1918

20-
/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
21-
class OpenTarFile extends TaintKind {
22-
OpenTarFile() { this = "tarfile.open" }
23-
24-
override TaintKind getTaintOfMethodResult(string name) {
25-
name = "getmember" and result instanceof TarFileInfo
26-
or
27-
name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
28-
}
29-
30-
override ClassValue getType() { result = Value::named("tarfile.TarFile") }
31-
32-
override TaintKind getTaintForIteration() { result instanceof TarFileInfo }
33-
}
34-
35-
/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
36-
class TarfileOpen extends TaintSource {
37-
TarfileOpen() {
38-
Value::named("tarfile.open").getACall() = this and
39-
/*
40-
* If argument refers to a string object, then it's a hardcoded path and
41-
* this tarfile is safe.
42-
*/
43-
44-
not this.(CallNode).getAnArg().pointsTo(any(StringValue str)) and
45-
/* Ignore opens within the tarfile module itself */
46-
not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
47-
}
48-
49-
override predicate isSourceOf(TaintKind kind) { kind instanceof OpenTarFile }
50-
}
51-
52-
class TarFileInfo extends TaintKind {
53-
TarFileInfo() { this = "tarfile.entry" }
54-
55-
override TaintKind getTaintOfMethodResult(string name) { name = "next" and result = this }
56-
57-
override TaintKind getTaintOfAttribute(string name) {
58-
name = "name" and result instanceof TarFileInfo
59-
}
60-
}
61-
62-
/*
63-
* For efficiency we don't want to track the flow of taint
64-
* around the tarfile module.
65-
*/
66-
67-
class ExcludeTarFilePy extends Sanitizer {
68-
ExcludeTarFilePy() { this = "Tar sanitizer" }
69-
70-
override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
71-
node.getLocation().getFile().getBaseName() = "tarfile.py" and
72-
(
73-
taint instanceof OpenTarFile
74-
or
75-
taint instanceof TarFileInfo
76-
or
77-
taint.(SequenceKind).getItem() instanceof TarFileInfo
78-
)
79-
}
80-
}
81-
82-
/* Any call to an extractall method */
83-
class ExtractAllSink extends TaintSink {
84-
ExtractAllSink() {
85-
exists(CallNode call |
86-
this = call.getFunction().(AttrNode).getObject("extractall") and
87-
not exists(call.getAnArg())
88-
)
89-
}
90-
91-
override predicate sinks(TaintKind kind) { kind instanceof OpenTarFile }
92-
}
93-
94-
/* Argument to extract method */
95-
class ExtractSink extends TaintSink {
96-
CallNode call;
97-
98-
ExtractSink() {
99-
call.getFunction().(AttrNode).getName() = "extract" and
100-
this = call.getArg(0)
101-
}
102-
103-
override predicate sinks(TaintKind kind) { kind instanceof TarFileInfo }
104-
}
105-
106-
/* Members argument to extract method */
107-
class ExtractMembersSink extends TaintSink {
108-
CallNode call;
109-
110-
ExtractMembersSink() {
111-
call.getFunction().(AttrNode).getName() = "extractall" and
112-
(this = call.getArg(0) or this = call.getArgByName("members"))
113-
}
114-
115-
override predicate sinks(TaintKind kind) {
116-
kind.(SequenceKind).getItem() instanceof TarFileInfo
117-
or
118-
kind instanceof OpenTarFile
119-
}
120-
}
121-
122-
class TarFileInfoSanitizer extends Sanitizer {
123-
TarFileInfoSanitizer() { this = "TarInfo sanitizer" }
124-
125-
/* The test `if <path_sanitizing_test>:` clears taint on its `false` edge. */
126-
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
127-
taint instanceof TarFileInfo and
128-
clears_taint_on_false_edge(test.getTest(), test.getSense())
129-
}
130-
131-
private predicate clears_taint_on_false_edge(ControlFlowNode test, boolean sense) {
132-
path_sanitizing_test(test) and
133-
sense = false
134-
or
135-
// handle `not` (also nested)
136-
test.(UnaryExprNode).getNode().getOp() instanceof Not and
137-
clears_taint_on_false_edge(test.(UnaryExprNode).getOperand(), sense.booleanNot())
138-
}
139-
}
140-
141-
private predicate path_sanitizing_test(ControlFlowNode test) {
142-
/* Assume that any test with "path" in it is a sanitizer */
143-
test.getAChild+().(AttrNode).getName().matches("%path")
144-
or
145-
test.getAChild+().(NameNode).getId().matches("%path")
146-
}
147-
148-
class TarSlipConfiguration extends TaintTracking::Configuration {
149-
TarSlipConfiguration() { this = "TarSlip configuration" }
150-
151-
override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
152-
153-
override predicate isSink(TaintTracking::Sink sink) {
154-
sink instanceof ExtractSink or
155-
sink instanceof ExtractAllSink or
156-
sink instanceof ExtractMembersSink
157-
}
158-
159-
override predicate isSanitizer(Sanitizer sanitizer) {
160-
sanitizer instanceof TarFileInfoSanitizer
161-
or
162-
sanitizer instanceof ExcludeTarFilePy
163-
}
164-
165-
override predicate isBarrier(DataFlow::Node node) {
166-
// Avoid flow into the tarfile module
167-
exists(ParameterDefinition def |
168-
node.asVariable().getDefinition() = def
169-
or
170-
node.asCfgNode() = def.getDefiningNode()
171-
|
172-
def.getScope() = Value::named("tarfile.open").(CallableValue).getScope()
173-
or
174-
def.isSelf() and def.getScope().getEnclosingModule().getName() = "tarfile"
175-
)
176-
}
177-
}
178-
179-
from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
180-
where config.hasFlowPath(src, sink)
181-
select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(),
19+
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
20+
where config.hasFlowPath(source, sink)
21+
select sink.getNode(), source, sink, "Extraction of tarfile from $@", source.getNode(),
18222
"a potentially untrusted source"

0 commit comments

Comments
 (0)