github · yoff · Jul 31, 2024 · Jul 12, 2024 · Jul 12, 2024 · Jul 12, 2024
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* Added support for `DictionaryElement[<key>]` and `DictionaryElementAny` when Customizing Library Models for `sourceModel` (see https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-python/)
@@ -134,9 +134,25 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
     token.getAnArgument() = "any-named" and
     result = node.getKeywordParameter(_)
   )
+  or
+  // content based steps
+  //
+  // note: if we want to migrate to use `FlowSummaryImpl::Input::encodeContent` like
+  // they do in Ruby, be aware that we currently don't make
+  // `DataFlow::DictionaryElementContent` just from seeing a subscript read, so we would
+  // need to add that. (also need to handle things like `DictionaryElementAny` which
+  // doesn't have any value for .getAnArgument())
+  (
+    token.getName() = "DictionaryElement" and
+    result = node.getSubscript(token.getAnArgument())
+    or
+    token.getName() = "DictionaryElementAny" and
+    result = node.getASubscript() and
+    not exists(token.getAnArgument())
+    // TODO: ListElement/SetElement/TupleElement
+  )
   // Some features don't have MaD tokens yet, they would need to be added to API-graphs first.
   // - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter")
-  // - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue")
 }
 
 /**
@@ -242,15 +258,19 @@ InvokeNode getAnInvocationOf(API::Node node) { result = node.getACall() }
  */
 bindingset[name]
 predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
-  name = ["Member", "Instance", "Awaited", "Call", "Method", "Subclass"]
+  name =
+    [
+      "Member", "Instance", "Awaited", "Call", "Method", "Subclass", "DictionaryElement",
+      "DictionaryElementAny"
+    ]
 }
 
 /**
  * Holds if `name` is a valid name for an access path token with no arguments, occurring
  * in an identifying access path.
  */
 predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
-  name = ["Instance", "Awaited", "Call", "Subclass"]
+  name = ["Instance", "Awaited", "Call", "Subclass", "DictionaryElementAny"]
 }
 
 /**
@@ -259,7 +279,7 @@ predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
  */
 bindingset[name, argument]
 predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
-  name = ["Member", "Method"] and
+  name = ["Member", "Method", "DictionaryElement"] and
   exists(argument)
   or
   name = ["Argument", "Parameter"] and

@@ -106,6 +106,8 @@ isSource
 | test.py:117:31:117:41 | ControlFlowNode for getSource() | test-source |
 | test.py:118:35:118:45 | ControlFlowNode for getSource() | test-source |
 | test.py:119:20:119:30 | ControlFlowNode for getSource() | test-source |
+| test.py:124:1:124:33 | ControlFlowNode for Attribute() | test-source |
+| test.py:126:11:126:43 | ControlFlowNode for Attribute() | test-source |
 syntaxErrors
 | Member[foo |
 | Member[foo] .Member[bar] |

@@ -23,6 +23,12 @@ extensions:
       - ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0]", "test-source"]
       - ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any]", "test-source"]
       - ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named]", "test-source"]
+      # test steps through content
+      - ["testlib", "Member[source_dict].DictionaryElement[key].Member[func].ReturnValue", "test-source"]
+      - ["testlib", "Member[source_dict_any].DictionaryElementAny.Member[func].ReturnValue", "test-source"]
+      # TODO: Add support for list/tuples
+      # - ["testlib", "Member[source_list].ListElement.Member[func].ReturnValue", "test-source"]
+      # - ["testlib", "Member[source_tuple].TupleElement[0].Member[func].ReturnValue", "test-source"]
 
   - addsTo:
       pack: codeql/python-all

@@ -117,3 +117,17 @@ def anyNamed(self, name1, name2=2): # Parameter[any-named] matches all non-self
 testlib.foo().bar().fuzzyCall(getSource()) # NOT OK
 testlib.foo(lambda x: x.fuzzyCall(getSource())) # NOT OK
 otherlib.fuzzyCall(getSource()) # OK
+
+# defining sources through content steps
+
+# dictionaries
+testlib.source_dict["key"].func() # source
+testlib.source_dict["safe"].func() # not a source
+lambda k: testlib.source_dict_any[k].func() # source
+
+# TODO: implement support for lists
+lambda i: testlib.source_list[i].func()
+
+# TODO: implement support for tuples
+testlib.source_tuple[0].func() # a source
+testlib.source_tuple[1].func() # not a source