Skip to content

Commit 7a9e03d

Browse files
committed
Add support for openai.OpenAI client library
1 parent 6fa6093 commit 7a9e03d

File tree

10 files changed

+344
-0
lines changed

10 files changed

+344
-0
lines changed

python/ql/lib/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ private import semmle.python.frameworks.Multidict
5454
private import semmle.python.frameworks.Mysql
5555
private import semmle.python.frameworks.MySQLdb
5656
private import semmle.python.frameworks.Numpy
57+
private import semmle.python.frameworks.OpenAI
5758
private import semmle.python.frameworks.Opml
5859
private import semmle.python.frameworks.Oracledb
5960
private import semmle.python.frameworks.Pandas
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `openAI`Agents SDK package.
3+
* See https://github.com/openai/openai-agents-python.
4+
*/
5+
6+
private import python
7+
private import semmle.python.ApiGraphs
8+
9+
/**
10+
* Provides models for Agent (instances of the `agents.Agent` class).
11+
*
12+
* See https://github.com/openai/openai-agents-python.
13+
*/
14+
module Agent {
15+
/** Gets a reference to the `agents.Agent` class. */
16+
API::Node classRef() { result = API::moduleImport("agents").getMember("Agent") }
17+
18+
/** Gets a reference to a potential property of `agents.Agent` called instructions which refers to the system prompt. */
19+
API::Node sink() { result = classRef().getACall().getKeywordParameter("instructions") }
20+
}
21+
22+
/**
23+
* Provides models for OpenAI (instances of `openai` classes).
24+
*
25+
* See https://github.com/openai/openai-python.
26+
*/
27+
module OpenAI {
28+
/** Gets a reference to `openai.OpenAI`, `openai.AsyncOpenAI` and `openai.AzureOpenAI`classes. */
29+
API::Node classRef() {
30+
result = API::moduleImport("openai").getMember(["OpenAI", "AsyncOpenAI", "AzureOpenAI"])
31+
}
32+
33+
/** Gets a reference to a potential property of `openai.OpenAI called instructions which refers to the system prompt. */
34+
API::Node sink() {
35+
result =
36+
classRef()
37+
.getReturn()
38+
.getMember("responses")
39+
.getMember("create")
40+
.getKeywordParameter(["input", "instructions"]) or
41+
result =
42+
classRef()
43+
.getReturn()
44+
.getMember("realtime")
45+
.getMember("connect")
46+
.getReturn()
47+
.getMember("conversation")
48+
.getMember("item")
49+
.getMember("create")
50+
.getKeywordParameter("item") or
51+
result =
52+
classRef()
53+
.getReturn()
54+
.getMember("chat")
55+
.getMember("completions")
56+
.getMember("create")
57+
.getKeywordParameter("messages")
58+
}
59+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import python
2+
private import semmle.python.dataflow.new.DataFlow
3+
private import semmle.python.Concepts
4+
private import semmle.python.dataflow.new.RemoteFlowSources
5+
private import semmle.python.dataflow.new.BarrierGuards
6+
private import semmle.python.frameworks.OpenAI
7+
8+
/**
9+
* Provides default sources, sinks and sanitizers for detecting
10+
* "prompt injection"
11+
* vulnerabilities, as well as extension points for adding your own.
12+
*/
13+
module PromptInjection {
14+
/**
15+
* A data flow source for "prompt injection" vulnerabilities.
16+
*/
17+
abstract class Source extends DataFlow::Node { }
18+
19+
/**
20+
* A data flow sink for "prompt injection" vulnerabilities.
21+
*/
22+
abstract class Sink extends DataFlow::Node { }
23+
24+
/**
25+
* A sanitizer for "prompt injection" vulnerabilities.
26+
*/
27+
abstract class Sanitizer extends DataFlow::Node { }
28+
29+
/**
30+
* An active threat-model source, considered as a flow source.
31+
*/
32+
private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }
33+
34+
class SystemPromptSink extends Sink {
35+
SystemPromptSink() { this = Agent::sink().asSink() or this = OpenAI::sink().asSink() }
36+
}
37+
38+
private import semmle.python.frameworks.data.ModelsAsData
39+
40+
private class DataAsPromptSink extends Sink {
41+
DataAsPromptSink() { this = ModelOutput::getASinkNode("prompt-injection").asSink() }
42+
}
43+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
private import python
2+
import semmle.python.dataflow.new.DataFlow
3+
import semmle.python.dataflow.new.TaintTracking
4+
import PromptInjectionCustomizations::PromptInjection
5+
6+
private module PromptInjectionConfig implements DataFlow::ConfigSig {
7+
predicate isSource(DataFlow::Node node) { node instanceof Source }
8+
9+
predicate isSink(DataFlow::Node node) {
10+
node instanceof Sink
11+
//any()
12+
}
13+
14+
predicate isBarrierIn(DataFlow::Node node) { node instanceof Sanitizer }
15+
16+
predicate observeDiffInformedIncrementalMode() { any() }
17+
}
18+
19+
/** Global taint-tracking for detecting "prompt injection" vulnerabilities. */
20+
module PromptInjectionFlow = TaintTracking::Global<PromptInjectionConfig>;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>Prompts can be constructed to bypass the original purposes of an agent and lead to sensitive data leak or
8+
operations that were not intended.
9+
</p>
10+
</overview>
11+
12+
<recommendation>
13+
Sanitize user input and also avoid using user input in developer or system level prompts.
14+
</recommendation>
15+
16+
<example>
17+
<p>In the following examples, the cases marked GOOD show secure prompt construction; whereas in the case marked BAD they may be susceptible to prompt injection.</p>
18+
<sample src="examples/TODO.py" />
19+
</example>
20+
21+
<references>
22+
<li>OWASP: <a href="https://owasp.org/www-community/attacks/PromptInjection">PromptInjection</a>.</li>
23+
</references>
24+
25+
</qhelp>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/**
2+
* @name User input used in developer message and or system prompt
3+
* @description User input used in developer message and or system prompt can allow for Prompt Injection attacks.
4+
* @kind path-problem
5+
* @problem.severity error
6+
* @security-severity 5.0
7+
* @precision high
8+
* @id py/prompt-injection
9+
* @tags security
10+
* external/cwe/cwe-1427
11+
*/
12+
13+
import python
14+
import semmle.python.security.dataflow.PromptInjectionQuery
15+
import PromptInjectionFlow::PathGraph
16+
17+
from PromptInjectionFlow::PathNode source, PromptInjectionFlow::PathNode sink
18+
where PromptInjectionFlow::flowPath(source, sink)
19+
select sink.getNode(), source, sink, "This prompt construction depends on a $@.", source.getNode(),
20+
"user-provided value"
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
edges
2+
| agent_instructions.py:2:26:2:32 | ControlFlowNode for ImportMember | agent_instructions.py:2:26:2:32 | ControlFlowNode for request | provenance | |
3+
| agent_instructions.py:2:26:2:32 | ControlFlowNode for request | agent_instructions.py:7:13:7:19 | ControlFlowNode for request | provenance | |
4+
| agent_instructions.py:7:5:7:9 | ControlFlowNode for input | agent_instructions.py:9:50:9:89 | ControlFlowNode for BinaryExpr | provenance | |
5+
| agent_instructions.py:7:13:7:19 | ControlFlowNode for request | agent_instructions.py:7:13:7:24 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
6+
| agent_instructions.py:7:13:7:24 | ControlFlowNode for Attribute | agent_instructions.py:7:13:7:37 | ControlFlowNode for Attribute() | provenance | dict.get |
7+
| agent_instructions.py:7:13:7:37 | ControlFlowNode for Attribute() | agent_instructions.py:7:5:7:9 | ControlFlowNode for input | provenance | |
8+
| openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:2:26:2:32 | ControlFlowNode for request | provenance | |
9+
| openai_test.py:2:26:2:32 | ControlFlowNode for request | openai_test.py:12:15:12:21 | ControlFlowNode for request | provenance | |
10+
| openai_test.py:2:26:2:32 | ControlFlowNode for request | openai_test.py:13:13:13:19 | ControlFlowNode for request | provenance | |
11+
| openai_test.py:2:26:2:32 | ControlFlowNode for request | openai_test.py:14:12:14:18 | ControlFlowNode for request | provenance | |
12+
| openai_test.py:12:5:12:11 | ControlFlowNode for persona | openai_test.py:17:22:17:46 | ControlFlowNode for BinaryExpr | provenance | |
13+
| openai_test.py:12:5:12:11 | ControlFlowNode for persona | openai_test.py:22:22:22:46 | ControlFlowNode for BinaryExpr | provenance | |
14+
| openai_test.py:12:5:12:11 | ControlFlowNode for persona | openai_test.py:23:15:36:9 | ControlFlowNode for List | provenance | |
15+
| openai_test.py:12:5:12:11 | ControlFlowNode for persona | openai_test.py:40:22:40:46 | ControlFlowNode for BinaryExpr | provenance | |
16+
| openai_test.py:12:5:12:11 | ControlFlowNode for persona | openai_test.py:58:18:69:9 | ControlFlowNode for List | provenance | |
17+
| openai_test.py:12:5:12:11 | ControlFlowNode for persona | openai_test.py:73:18:82:9 | ControlFlowNode for List | provenance | |
18+
| openai_test.py:12:15:12:21 | ControlFlowNode for request | openai_test.py:12:15:12:26 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
19+
| openai_test.py:12:15:12:21 | ControlFlowNode for request | openai_test.py:13:13:13:24 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
20+
| openai_test.py:12:15:12:21 | ControlFlowNode for request | openai_test.py:14:12:14:23 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
21+
| openai_test.py:12:15:12:26 | ControlFlowNode for Attribute | openai_test.py:12:15:12:41 | ControlFlowNode for Attribute() | provenance | dict.get |
22+
| openai_test.py:12:15:12:41 | ControlFlowNode for Attribute() | openai_test.py:12:5:12:11 | ControlFlowNode for persona | provenance | |
23+
| openai_test.py:13:5:13:9 | ControlFlowNode for query | openai_test.py:18:15:18:19 | ControlFlowNode for query | provenance | |
24+
| openai_test.py:13:5:13:9 | ControlFlowNode for query | openai_test.py:23:15:36:9 | ControlFlowNode for List | provenance | |
25+
| openai_test.py:13:5:13:9 | ControlFlowNode for query | openai_test.py:41:15:41:19 | ControlFlowNode for query | provenance | |
26+
| openai_test.py:13:5:13:9 | ControlFlowNode for query | openai_test.py:46:18:54:13 | ControlFlowNode for Dict | provenance | |
27+
| openai_test.py:13:5:13:9 | ControlFlowNode for query | openai_test.py:58:18:69:9 | ControlFlowNode for List | provenance | |
28+
| openai_test.py:13:5:13:9 | ControlFlowNode for query | openai_test.py:73:18:82:9 | ControlFlowNode for List | provenance | |
29+
| openai_test.py:13:13:13:19 | ControlFlowNode for request | openai_test.py:13:13:13:24 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
30+
| openai_test.py:13:13:13:19 | ControlFlowNode for request | openai_test.py:14:12:14:23 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
31+
| openai_test.py:13:13:13:24 | ControlFlowNode for Attribute | openai_test.py:13:13:13:37 | ControlFlowNode for Attribute() | provenance | dict.get |
32+
| openai_test.py:13:13:13:37 | ControlFlowNode for Attribute() | openai_test.py:13:5:13:9 | ControlFlowNode for query | provenance | |
33+
| openai_test.py:14:5:14:8 | ControlFlowNode for role | openai_test.py:46:18:54:13 | ControlFlowNode for Dict | provenance | |
34+
| openai_test.py:14:5:14:8 | ControlFlowNode for role | openai_test.py:58:18:69:9 | ControlFlowNode for List | provenance | |
35+
| openai_test.py:14:12:14:18 | ControlFlowNode for request | openai_test.py:14:12:14:23 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
36+
| openai_test.py:14:12:14:23 | ControlFlowNode for Attribute | openai_test.py:14:12:14:35 | ControlFlowNode for Attribute() | provenance | dict.get |
37+
| openai_test.py:14:12:14:35 | ControlFlowNode for Attribute() | openai_test.py:14:5:14:8 | ControlFlowNode for role | provenance | |
38+
nodes
39+
| agent_instructions.py:2:26:2:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
40+
| agent_instructions.py:2:26:2:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
41+
| agent_instructions.py:7:5:7:9 | ControlFlowNode for input | semmle.label | ControlFlowNode for input |
42+
| agent_instructions.py:7:13:7:19 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
43+
| agent_instructions.py:7:13:7:24 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
44+
| agent_instructions.py:7:13:7:37 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
45+
| agent_instructions.py:9:50:9:89 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
46+
| openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
47+
| openai_test.py:2:26:2:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
48+
| openai_test.py:12:5:12:11 | ControlFlowNode for persona | semmle.label | ControlFlowNode for persona |
49+
| openai_test.py:12:15:12:21 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
50+
| openai_test.py:12:15:12:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
51+
| openai_test.py:12:15:12:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
52+
| openai_test.py:13:5:13:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
53+
| openai_test.py:13:13:13:19 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
54+
| openai_test.py:13:13:13:24 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
55+
| openai_test.py:13:13:13:37 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
56+
| openai_test.py:14:5:14:8 | ControlFlowNode for role | semmle.label | ControlFlowNode for role |
57+
| openai_test.py:14:12:14:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
58+
| openai_test.py:14:12:14:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
59+
| openai_test.py:14:12:14:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
60+
| openai_test.py:17:22:17:46 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
61+
| openai_test.py:18:15:18:19 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
62+
| openai_test.py:22:22:22:46 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
63+
| openai_test.py:23:15:36:9 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
64+
| openai_test.py:40:22:40:46 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
65+
| openai_test.py:41:15:41:19 | ControlFlowNode for query | semmle.label | ControlFlowNode for query |
66+
| openai_test.py:46:18:54:13 | ControlFlowNode for Dict | semmle.label | ControlFlowNode for Dict |
67+
| openai_test.py:58:18:69:9 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
68+
| openai_test.py:73:18:82:9 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
69+
subpaths
70+
#select
71+
| agent_instructions.py:9:50:9:89 | ControlFlowNode for BinaryExpr | agent_instructions.py:2:26:2:32 | ControlFlowNode for ImportMember | agent_instructions.py:9:50:9:89 | ControlFlowNode for BinaryExpr | This prompt construction depends on a $@. | agent_instructions.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
72+
| openai_test.py:17:22:17:46 | ControlFlowNode for BinaryExpr | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:17:22:17:46 | ControlFlowNode for BinaryExpr | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
73+
| openai_test.py:18:15:18:19 | ControlFlowNode for query | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:18:15:18:19 | ControlFlowNode for query | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
74+
| openai_test.py:22:22:22:46 | ControlFlowNode for BinaryExpr | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:22:22:22:46 | ControlFlowNode for BinaryExpr | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
75+
| openai_test.py:23:15:36:9 | ControlFlowNode for List | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:23:15:36:9 | ControlFlowNode for List | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
76+
| openai_test.py:40:22:40:46 | ControlFlowNode for BinaryExpr | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:40:22:40:46 | ControlFlowNode for BinaryExpr | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
77+
| openai_test.py:41:15:41:19 | ControlFlowNode for query | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:41:15:41:19 | ControlFlowNode for query | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
78+
| openai_test.py:46:18:54:13 | ControlFlowNode for Dict | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:46:18:54:13 | ControlFlowNode for Dict | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
79+
| openai_test.py:58:18:69:9 | ControlFlowNode for List | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:58:18:69:9 | ControlFlowNode for List | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
80+
| openai_test.py:73:18:82:9 | ControlFlowNode for List | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | openai_test.py:73:18:82:9 | ControlFlowNode for List | This prompt construction depends on a $@. | openai_test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
query: Security/CWE-1427/PromptInjection.ql
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from agents import Agent, Runner
2+
from flask import Flask, request # $ Source=flask
3+
app = Flask(__name__)
4+
5+
@app.route("/parameter-route")
6+
def get_input():
7+
input = request.args.get("input")
8+
9+
agent = Agent(name="Assistant", instructions="This prompt is customized for " + input) # $Alert[py/prompt-injection]
10+
11+
result = Runner.run_sync(agent, "This is a user message.")
12+
print(result.final_output)

0 commit comments

Comments
 (0)