Skip to content

Commit

Permalink
Merge pull request #16971 from RasmusWL/mad-dict-source
Browse files Browse the repository at this point in the history
Python: Add MaD support for DictionaryElement/DictionaryElementAny for sources
  • Loading branch information
yoff authored Jul 31, 2024
2 parents 8901b1f + efcd4e2 commit 123dcc7
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 4 deletions.
4 changes: 4 additions & 0 deletions python/ql/lib/change-notes/2024-07-12-mad-dict-sources.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added support for `DictionaryElement[<key>]` and `DictionaryElementAny` when Customizing Library Models for `sourceModel` (see https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-python/)
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,25 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
token.getAnArgument() = "any-named" and
result = node.getKeywordParameter(_)
)
or
// content based steps
//
// note: if we want to migrate to use `FlowSummaryImpl::Input::encodeContent` like
// they do in Ruby, be aware that we currently don't make
// `DataFlow::DictionaryElementContent` just from seeing a subscript read, so we would
// need to add that. (also need to handle things like `DictionaryElementAny` which
// doesn't have any value for .getAnArgument())
(
token.getName() = "DictionaryElement" and
result = node.getSubscript(token.getAnArgument())
or
token.getName() = "DictionaryElementAny" and
result = node.getASubscript() and
not exists(token.getAnArgument())
// TODO: ListElement/SetElement/TupleElement
)
// Some features don't have MaD tokens yet, they would need to be added to API-graphs first.
// - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter")
// - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue")
}

/**
Expand Down Expand Up @@ -242,15 +258,19 @@ InvokeNode getAnInvocationOf(API::Node node) { result = node.getACall() }
*/
bindingset[name]
predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Member", "Instance", "Awaited", "Call", "Method", "Subclass"]
name =
[
"Member", "Instance", "Awaited", "Call", "Method", "Subclass", "DictionaryElement",
"DictionaryElementAny"
]
}

/**
* Holds if `name` is a valid name for an access path token with no arguments, occurring
* in an identifying access path.
*/
predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = ["Instance", "Awaited", "Call", "Subclass"]
name = ["Instance", "Awaited", "Call", "Subclass", "DictionaryElementAny"]
}

/**
Expand All @@ -259,7 +279,7 @@ predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
*/
bindingset[name, argument]
predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
name = ["Member", "Method"] and
name = ["Member", "Method", "DictionaryElement"] and
exists(argument)
or
name = ["Argument", "Parameter"] and
Expand Down
2 changes: 2 additions & 0 deletions python/ql/test/library-tests/frameworks/data/test.expected
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ isSource
| test.py:117:31:117:41 | ControlFlowNode for getSource() | test-source |
| test.py:118:35:118:45 | ControlFlowNode for getSource() | test-source |
| test.py:119:20:119:30 | ControlFlowNode for getSource() | test-source |
| test.py:124:1:124:33 | ControlFlowNode for Attribute() | test-source |
| test.py:126:11:126:43 | ControlFlowNode for Attribute() | test-source |
syntaxErrors
| Member[foo |
| Member[foo] .Member[bar] |
Expand Down
6 changes: 6 additions & 0 deletions python/ql/test/library-tests/frameworks/data/test.ext.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ extensions:
- ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0]", "test-source"]
- ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any]", "test-source"]
- ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named]", "test-source"]
# test steps through content
- ["testlib", "Member[source_dict].DictionaryElement[key].Member[func].ReturnValue", "test-source"]
- ["testlib", "Member[source_dict_any].DictionaryElementAny.Member[func].ReturnValue", "test-source"]
# TODO: Add support for list/tuples
# - ["testlib", "Member[source_list].ListElement.Member[func].ReturnValue", "test-source"]
# - ["testlib", "Member[source_tuple].TupleElement[0].Member[func].ReturnValue", "test-source"]

- addsTo:
pack: codeql/python-all
Expand Down
14 changes: 14 additions & 0 deletions python/ql/test/library-tests/frameworks/data/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,17 @@ def anyNamed(self, name1, name2=2): # Parameter[any-named] matches all non-self
testlib.foo().bar().fuzzyCall(getSource()) # NOT OK
testlib.foo(lambda x: x.fuzzyCall(getSource())) # NOT OK
otherlib.fuzzyCall(getSource()) # OK

# defining sources through content steps

# dictionaries
testlib.source_dict["key"].func() # source
testlib.source_dict["safe"].func() # not a source
lambda k: testlib.source_dict_any[k].func() # source

# TODO: implement support for lists
lambda i: testlib.source_list[i].func()

# TODO: implement support for tuples
testlib.source_tuple[0].func() # a source
testlib.source_tuple[1].func() # not a source

0 comments on commit 123dcc7

Please sign in to comment.