From cea5223671002efa6ce9018bc743908df7c098f6 Mon Sep 17 00:00:00 2001 From: Raffi Khatchadourian Date: Mon, 29 Jan 2024 22:22:53 -0500 Subject: [PATCH] Get up-to-date (#132) - Investigate decorator issue on tutorial code (https://github.com/ponder-lab/ML/pull/68) - Fix comment. - Add interprocedural dataset support (https://github.com/ponder-lab/ML/pull/69) - Change log from warning to info. - Add support for non-scalar datasets (https://github.com/ponder-lab/ML/pull/70) - Use interprocedural analysis for property read case. - Add logging. - Fix non-scalar determination. - Add logger. - Fix logger. - Add logging around callable workaround. - Handle callables with lexical scoping. - Formatting. - Handle possible null receivers in callable lookup. - Change log message. - Add log. - Add the ability to process datasets coming out of enumerate() (https://github.com/ponder-lab/ML/pull/71) - Fix tests for input files with multiple functions. - Enhance TF2 tests (https://github.com/ponder-lab/ML/pull/73) - Separate tests by calling context (https://github.com/ponder-lab/ML/pull/74) - Add tf.keras.layers.Dense. - Move comment. - Additional check. - New API. - Add autoencoder test. - Move the value number check outside the context loop. - More the sigmoid function to math. - Format. - Add API tf.add(). - Check the context/funciton PK mappings. (https://github.com/ponder-lab/ML/pull/77) - Add URL. - Add dataset generator and test. - tf.nn.softmax isn't a tensor generator. - Add URL. - Apply spotless. - Let's make the two tests only diff in the invocation. - Prevent infinite recursion when processing instructions when finding tensor data flow sources. - When we look for dataset tensor sources, we recurse up to the original defining instruction. However, there's a possibility that instructions have mutually recursive definitions due to the phi instruction. Let's prevent against that. --- .../python/ml/test/TestTensorflowModel.java | 396 ++++++++++-------- .../data/tensorflow.xml | 108 +++++ .../ml/client/PythonTensorAnalysisEngine.java | 203 ++++++++- .../cast/python/ml/types/TensorFlowTypes.java | 18 + .../data/autoencoder.py | 188 +++++++++ .../data/multigpu_training.py | 235 +++++++++++ .../data/neural_network.py | 184 ++++++++ .../data/tensorflow_eager_execution.py | 122 ++++++ .../data/tensorflow_gan_tutorial.py | 165 ++++++++ .../data/tensorflow_gan_tutorial2.py | 166 ++++++++ .../data/tf2_test_add.py | 11 + .../data/tf2_test_add2.py | 11 + .../data/tf2_test_add3.py | 11 + .../data/tf2_test_add4.py | 11 + .../data/tf2_test_add5.py | 11 + .../data/tf2_test_add6.py | 11 + .../data/tf2_test_dataset10.py | 20 + .../data/tf2_test_dataset6.py | 14 + .../data/tf2_test_dataset7.py | 15 + .../data/tf2_test_dataset8.py | 12 + .../data/tf2_test_dataset9.py | 15 + .../data/tf2_test_model_call.py | 4 +- .../data/tf2_test_sigmoid.py | 10 + .../data/tf2_test_sigmoid2.py | 10 + com.ibm.wala.cast.python.test/data/tf2o.py | 5 +- .../PythonTrampolineTargetSelector.java | 126 +++++- 26 files changed, 1881 insertions(+), 201 deletions(-) create mode 100644 com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/types/TensorFlowTypes.java create mode 100644 com.ibm.wala.cast.python.test/data/autoencoder.py create mode 100644 com.ibm.wala.cast.python.test/data/multigpu_training.py create mode 100644 com.ibm.wala.cast.python.test/data/neural_network.py create mode 100644 com.ibm.wala.cast.python.test/data/tensorflow_eager_execution.py create mode 100644 com.ibm.wala.cast.python.test/data/tensorflow_gan_tutorial.py create mode 100644 com.ibm.wala.cast.python.test/data/tensorflow_gan_tutorial2.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_add.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_add2.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_add3.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_add4.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_add5.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_add6.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_dataset10.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_dataset6.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_dataset7.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_dataset8.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_dataset9.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_sigmoid.py create mode 100644 com.ibm.wala.cast.python.test/data/tf2_test_sigmoid2.py diff --git a/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflowModel.java b/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflowModel.java index a81193212..3a3f645d1 100644 --- a/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflowModel.java +++ b/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflowModel.java @@ -1,5 +1,8 @@ package com.ibm.wala.cast.python.ml.test; +import static java.util.Collections.emptySet; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.toSet; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -12,6 +15,7 @@ import com.ibm.wala.classLoader.IMethod; import com.ibm.wala.ipa.callgraph.CGNode; import com.ibm.wala.ipa.callgraph.CallGraph; +import com.ibm.wala.ipa.callgraph.Context; import com.ibm.wala.ipa.callgraph.propagation.LocalPointerKey; import com.ibm.wala.ipa.callgraph.propagation.PointerKey; import com.ibm.wala.ipa.callgraph.propagation.SSAPropagationCallGraphBuilder; @@ -20,7 +24,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -68,137 +71,137 @@ public void testTf1() @Test public void testTf2() throws ClassHierarchyException, IllegalArgumentException, CancelException, IOException { - testTf2("tf2.py", "add", 2, 3, 2, 3); - testTf2("tf2b.py", "add", 2, 3, 2, 3); - testTf2("tf2c.py", "add", 2, 3, 2, 3); - testTf2("tf2d.py", "add", 2, 3, 2, 3); - testTf2("tf2d2.py", "add", 2, 3, 2, 3); - testTf2("tf2d3.py", "add", 2, 3, 2, 3); - testTf2("tf2d4.py", "add", 2, 3, 2, 3); - testTf2("tf2d5.py", "add", 2, 3, 2, 3); - testTf2("tf2e.py", "add", 2, 3, 2, 3); - testTf2("tf2e2.py", "add", 2, 3, 2, 3); - testTf2("tf2e3.py", "add", 2, 3, 2, 3); - testTf2("tf2e4.py", "add", 2, 3, 2, 3); - testTf2("tf2e5.py", "add", 2, 3, 2, 3); - testTf2("tf2e6.py", "add", 2, 3, 2, 3); - testTf2("tf2e7.py", "add", 2, 3, 2, 3); - testTf2("tf2e8.py", "add", 2, 3, 2, 3); - testTf2("tf2f.py", "add", 2, 3, 2, 3); - testTf2("tf2f2.py", "add", 2, 3, 2, 3); - testTf2("tf2f3.py", "add", 2, 3, 2, 3); - testTf2("tf2g.py", "add", 2, 3, 2, 3); - testTf2("tf2g2.py", "add", 2, 3, 2, 3); - testTf2("tf2h.py", "add", 2, 3, 2, 3); - testTf2("tf2h2.py", "add", 2, 3, 2, 3); - testTf2("tf2i.py", "add", 2, 3, 2, 3); - testTf2("tf2i2.py", "add", 2, 3, 2, 3); - testTf2("tf2j.py", "add", 2, 3, 2, 3); - testTf2("tf2j2.py", "add", 2, 3, 2, 3); - testTf2("tf2k.py", "add", 2, 3, 2, 3); - testTf2("tf2k2.py", "add", 2, 3, 2, 3); - testTf2("tf2l.py", "add", 2, 3, 2, 3); - testTf2("tf2l2.py", "add", 2, 3, 2, 3); - testTf2("tf2m.py", "add", 2, 3, 2, 3); - testTf2("tf2m2.py", "add", 2, 3, 2, 3); - testTf2("tf2n.py", "func2", 1, 4, 2); - testTf2("tf2n2.py", "func2", 1, 4, 2); - testTf2("tf2n3.py", "func2", 1, 4, 2); + testTf2("tf2.py", "add", 2, 2, 2, 3); + testTf2("tf2b.py", "add", 2, 2, 2, 3); + testTf2("tf2c.py", "add", 2, 2, 2, 3); + testTf2("tf2d.py", "add", 2, 2, 2, 3); + testTf2("tf2d2.py", "add", 2, 2, 2, 3); + testTf2("tf2d3.py", "add", 2, 2, 2, 3); + testTf2("tf2d4.py", "add", 2, 2, 2, 3); + testTf2("tf2d5.py", "add", 2, 2, 2, 3); + testTf2("tf2e.py", "add", 2, 2, 2, 3); + testTf2("tf2e2.py", "add", 2, 2, 2, 3); + testTf2("tf2e3.py", "add", 2, 2, 2, 3); + testTf2("tf2e4.py", "add", 2, 2, 2, 3); + testTf2("tf2e5.py", "add", 2, 2, 2, 3); + testTf2("tf2e6.py", "add", 2, 2, 2, 3); + testTf2("tf2e7.py", "add", 2, 2, 2, 3); + testTf2("tf2e8.py", "add", 2, 2, 2, 3); + testTf2("tf2f.py", "add", 2, 2, 2, 3); + testTf2("tf2f2.py", "add", 2, 2, 2, 3); + testTf2("tf2f3.py", "add", 2, 2, 2, 3); + testTf2("tf2g.py", "add", 2, 2, 2, 3); + testTf2("tf2g2.py", "add", 2, 2, 2, 3); + testTf2("tf2h.py", "add", 2, 2, 2, 3); + testTf2("tf2h2.py", "add", 2, 2, 2, 3); + testTf2("tf2i.py", "add", 2, 2, 2, 3); + testTf2("tf2i2.py", "add", 2, 2, 2, 3); + testTf2("tf2j.py", "add", 2, 2, 2, 3); + testTf2("tf2j2.py", "add", 2, 2, 2, 3); + testTf2("tf2k.py", "add", 2, 2, 2, 3); + testTf2("tf2k2.py", "add", 2, 2, 2, 3); + testTf2("tf2l.py", "add", 2, 2, 2, 3); + testTf2("tf2l2.py", "add", 2, 2, 2, 3); + testTf2("tf2m.py", "add", 2, 2, 2, 3); + testTf2("tf2m2.py", "add", 2, 2, 2, 3); + testTf2("tf2n.py", "func2", 1, 1, 2); + testTf2("tf2n2.py", "func2", 1, 1, 2); + testTf2("tf2n3.py", "func2", 1, 1, 2); testTf2("tf2o.py", "add", 2, 3, 2, 3); testTf2("tf2o2.py", "add", 2, 3, 2, 3); - testTf2("tf2p.py", "value_index", 2, 4, 2, 3); - testTf2("tf2p2.py", "value_index", 2, 4, 2, 3); - testTf2("tf2q.py", "add", 2, 3, 2, 3); - testTf2("tf2r.py", "add", 2, 3, 2, 3); + testTf2("tf2p.py", "value_index", 2, 2, 2, 3); + testTf2("tf2p2.py", "value_index", 2, 2, 2, 3); + testTf2("tf2q.py", "add", 2, 2, 2, 3); + testTf2("tf2r.py", "add", 2, 2, 2, 3); testTf2( "tf2s.py", "add", 0, 0); // NOTE: Set the expected number of tensor parameters, variables, and tensor parameter // value numbers to 2, 3, and 2 and 3, respectively, when // https://github.com/wala/ML/issues/65 is fixed. - testTf2("tf2t.py", "add", 2, 3, 2, 3); - testTf2("tf2u.py", "add", 2, 3, 2, 3); - testTf2("tf2u2.py", "add", 2, 3, 2, 3); - testTf2("tf2u3.py", "add", 2, 3, 2, 3); - testTf2("tf2v.py", "add", 2, 3, 2, 3); - testTf2("tf2v2.py", "add", 2, 3, 2, 3); - testTf2("tf2v3.py", "add", 2, 3, 2, 3); - testTf2("tf2v4.py", "add", 2, 4, 2, 3); - testTf2("tf2v5.py", "add", 2, 4, 2, 3); - testTf2("tf2w.py", "add", 2, 3, 2, 3); - testTf2("tf2w2.py", "add", 2, 3, 2, 3); - testTf2("tf2w3.py", "add", 2, 3, 2, 3); - testTf2("tf2w4.py", "add", 2, 3, 2, 3); - testTf2("tf2x.py", "add", 2, 3, 2, 3); - testTf2("tf2x2.py", "add", 2, 3, 2, 3); - testTf2("tf2x3.py", "add", 2, 3, 2, 3); - testTf2("tf2y.py", "add", 2, 3, 2, 3); - testTf2("tf2y2.py", "add", 2, 3, 2, 3); - testTf2("tf2y3.py", "add", 2, 3, 2, 3); - testTf2("tf2y4.py", "add", 2, 3, 2, 3); - testTf2("tf2y5.py", "add", 2, 3, 2, 3); - testTf2("tf2y6.py", "add", 2, 3, 2, 3); - testTf2("tf2z.py", "add", 2, 5, 2, 3); - testTf2("tf2z2.py", "add", 2, 5, 2, 3); - testTf2("tf2z3.py", "add", 2, 5, 2, 3); - testTf2("tf2z4.py", "add", 2, 5, 2, 3); - testTf2("tf2aa.py", "add", 2, 3, 2, 3); - testTf2("tf2aa2.py", "add", 2, 3, 2, 3); - testTf2("tf2aa3.py", "add", 2, 3, 2, 3); - testTf2("tf2aa4.py", "add", 2, 3, 2, 3); - testTf2("tf2aa5.py", "add", 2, 3, 2, 3); - testTf2("tf2aa6.py", "add", 2, 3, 2, 3); - testTf2("tf2aa7.py", "add", 2, 3, 2, 3); - testTf2("tf2aa8.py", "add", 2, 3, 2, 3); - testTf2("tf2bb.py", "add", 2, 5, 2, 3); - testTf2("tf2bb2.py", "add", 2, 5, 2, 3); - testTf2("tf2bb3.py", "add", 2, 5, 2, 3); - testTf2("tf2bb4.py", "add", 2, 5, 2, 3); - testTf2("tf2cc.py", "add", 2, 3, 2, 3); - testTf2("tf2cc2.py", "add", 2, 3, 2, 3); - testTf2("tf2cc3.py", "add", 2, 3, 2, 3); - testTf2("tf2dd.py", "add", 2, 3, 2, 3); - testTf2("tf2dd2.py", "add", 2, 3, 2, 3); - testTf2("tf2ee.py", "add", 2, 3, 2, 3); - testTf2("tf2ee2.py", "add", 2, 3, 2, 3); - testTf2("tf2ff.py", "add", 2, 3, 2, 3); - testTf2("tf2ff2.py", "add", 2, 3, 2, 3); - testTf2("tf2gg.py", "add", 2, 3, 2, 3); - testTf2("tf2gg2.py", "add", 2, 3, 2, 3); - testTf2("tf2gg3.py", "add", 2, 3, 2, 3); - testTf2("tf2hh.py", "add", 2, 3, 2, 3); - testTf2("tf2hh2.py", "add", 2, 3, 2, 3); - testTf2("tf2hh3.py", "add", 2, 3, 2, 3); - testTf2("tf2hh4.py", "add", 2, 3, 2, 3); - testTf2("tf2ii.py", "add", 2, 3, 2, 3); - testTf2("tf2ii2.py", "add", 2, 3, 2, 3); - testTf2("tf2ii3.py", "add", 2, 3, 2, 3); - testTf2("tf2jj.py", "add", 2, 3, 2, 3); - testTf2("tf2jj2.py", "add", 2, 3, 2, 3); - testTf2("tf2kk.py", "add", 2, 3, 2, 3); - testTf2("tf2kk2.py", "add", 2, 3, 2, 3); - testTf2("tf2ll.py", "add", 2, 3, 2, 3); - testTf2("tf2ll2.py", "add", 2, 3, 2, 3); - testTf2("tf2ll3.py", "add", 2, 3, 2, 3); - testTf2("tf2mm.py", "add", 2, 3, 2, 3); - testTf2("tf2mm2.py", "add", 2, 3, 2, 3); - testTf2("tf2nn.py", "value_index", 2, 4, 2, 3); - testTf2("tf2nn2.py", "value_index", 2, 4, 2, 3); - testTf2("tf2nn3.py", "value_index", 2, 4, 2, 3); - testTf2("tf2nn4.py", "value_index", 2, 4, 2, 3); - testTf2("tf2oo.py", "func2", 1, 4, 2); - testTf2("tf2oo2.py", "func2", 1, 4, 2); - testTf2("tf2oo3.py", "func2", 1, 4, 2); - testTf2("tf2oo4.py", "func2", 1, 4, 2); - testTf2("tf2_testing_decorator.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator2.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator3.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator4.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator5.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator6.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator7.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator8.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator9.py", "returned", 1, 3, 2); - testTf2("tf2_testing_decorator10.py", "returned", 1, 3, 2); + testTf2("tf2t.py", "add", 2, 2, 2, 3); + testTf2("tf2u.py", "add", 2, 2, 2, 3); + testTf2("tf2u2.py", "add", 2, 2, 2, 3); + testTf2("tf2u3.py", "add", 2, 2, 2, 3); + testTf2("tf2v.py", "add", 2, 2, 2, 3); + testTf2("tf2v2.py", "add", 2, 2, 2, 3); + testTf2("tf2v3.py", "add", 2, 2, 2, 3); + testTf2("tf2v4.py", "add", 2, 2, 2, 3); + testTf2("tf2v5.py", "add", 2, 2, 2, 3); + testTf2("tf2w.py", "add", 2, 2, 2, 3); + testTf2("tf2w2.py", "add", 2, 2, 2, 3); + testTf2("tf2w3.py", "add", 2, 2, 2, 3); + testTf2("tf2w4.py", "add", 2, 2, 2, 3); + testTf2("tf2x.py", "add", 2, 2, 2, 3); + testTf2("tf2x2.py", "add", 2, 2, 2, 3); + testTf2("tf2x3.py", "add", 2, 2, 2, 3); + testTf2("tf2y.py", "add", 2, 2, 2, 3); + testTf2("tf2y2.py", "add", 2, 2, 2, 3); + testTf2("tf2y3.py", "add", 2, 2, 2, 3); + testTf2("tf2y4.py", "add", 2, 2, 2, 3); + testTf2("tf2y5.py", "add", 2, 2, 2, 3); + testTf2("tf2y6.py", "add", 2, 2, 2, 3); + testTf2("tf2z.py", "add", 2, 2, 2, 3); + testTf2("tf2z2.py", "add", 2, 2, 2, 3); + testTf2("tf2z3.py", "add", 2, 2, 2, 3); + testTf2("tf2z4.py", "add", 2, 2, 2, 3); + testTf2("tf2aa.py", "add", 2, 2, 2, 3); + testTf2("tf2aa2.py", "add", 2, 2, 2, 3); + testTf2("tf2aa3.py", "add", 2, 2, 2, 3); + testTf2("tf2aa4.py", "add", 2, 2, 2, 3); + testTf2("tf2aa5.py", "add", 2, 2, 2, 3); + testTf2("tf2aa6.py", "add", 2, 2, 2, 3); + testTf2("tf2aa7.py", "add", 2, 2, 2, 3); + testTf2("tf2aa8.py", "add", 2, 2, 2, 3); + testTf2("tf2bb.py", "add", 2, 2, 2, 3); + testTf2("tf2bb2.py", "add", 2, 2, 2, 3); + testTf2("tf2bb3.py", "add", 2, 2, 2, 3); + testTf2("tf2bb4.py", "add", 2, 2, 2, 3); + testTf2("tf2cc.py", "add", 2, 2, 2, 3); + testTf2("tf2cc2.py", "add", 2, 2, 2, 3); + testTf2("tf2cc3.py", "add", 2, 2, 2, 3); + testTf2("tf2dd.py", "add", 2, 2, 2, 3); + testTf2("tf2dd2.py", "add", 2, 2, 2, 3); + testTf2("tf2ee.py", "add", 2, 2, 2, 3); + testTf2("tf2ee2.py", "add", 2, 2, 2, 3); + testTf2("tf2ff.py", "add", 2, 2, 2, 3); + testTf2("tf2ff2.py", "add", 2, 2, 2, 3); + testTf2("tf2gg.py", "add", 2, 2, 2, 3); + testTf2("tf2gg2.py", "add", 2, 2, 2, 3); + testTf2("tf2gg3.py", "add", 2, 2, 2, 3); + testTf2("tf2hh.py", "add", 2, 2, 2, 3); + testTf2("tf2hh2.py", "add", 2, 2, 2, 3); + testTf2("tf2hh3.py", "add", 2, 2, 2, 3); + testTf2("tf2hh4.py", "add", 2, 2, 2, 3); + testTf2("tf2ii.py", "add", 2, 2, 2, 3); + testTf2("tf2ii2.py", "add", 2, 2, 2, 3); + testTf2("tf2ii3.py", "add", 2, 2, 2, 3); + testTf2("tf2jj.py", "add", 2, 2, 2, 3); + testTf2("tf2jj2.py", "add", 2, 2, 2, 3); + testTf2("tf2kk.py", "add", 2, 2, 2, 3); + testTf2("tf2kk2.py", "add", 2, 2, 2, 3); + testTf2("tf2ll.py", "add", 2, 2, 2, 3); + testTf2("tf2ll2.py", "add", 2, 2, 2, 3); + testTf2("tf2ll3.py", "add", 2, 2, 2, 3); + testTf2("tf2mm.py", "add", 2, 2, 2, 3); + testTf2("tf2mm2.py", "add", 2, 2, 2, 3); + testTf2("tf2nn.py", "value_index", 2, 2, 2, 3); + testTf2("tf2nn2.py", "value_index", 2, 2, 2, 3); + testTf2("tf2nn3.py", "value_index", 2, 2, 2, 3); + testTf2("tf2nn4.py", "value_index", 2, 2, 2, 3); + testTf2("tf2oo.py", "func2", 1, 1, 2); + testTf2("tf2oo2.py", "func2", 1, 1, 2); + testTf2("tf2oo3.py", "func2", 1, 1, 2); + testTf2("tf2oo4.py", "func2", 1, 1, 2); + testTf2("tf2_testing_decorator.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator2.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator3.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator4.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator5.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator6.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator7.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator8.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator9.py", "returned", 1, 1, 2); + testTf2("tf2_testing_decorator10.py", "returned", 1, 1, 2); // FIXME: Test tf2_test_dataset.py really has three tensors in its dataset. We are currently // treating it as one. But, in the literal case, it should be possible to model it like the list // tests below. @@ -207,17 +210,55 @@ public void testTf2() testTf2("tf2_test_dataset3.py", "add", 2, 2, 2, 3); testTf2("tf2_test_dataset4.py", "add", 2, 2, 2, 3); testTf2("tf2_test_dataset5.py", "add", 2, 2, 2, 3); - testTf2("tf2_test_tensor_list.py", "add", 2, 3, 2, 3); - testTf2("tf2_test_tensor_list2.py", "add", 0, 2); - testTf2("tf2_test_tensor_list3.py", "add", 0, 2); + testTf2("tf2_test_dataset6.py", "add", 2, 2, 2, 3); + testTf2("tf2_test_dataset7.py", "add", 2, 2, 2, 3); + testTf2("tf2_test_dataset8.py", "add", 2, 2, 2, 3); + testTf2("tf2_test_dataset9.py", "add", 2, 2, 2, 3); + testTf2("tf2_test_dataset10.py", "add", 2, 2, 2, 3); + testTf2("tf2_test_tensor_list.py", "add", 2, 2, 2, 3); + testTf2("tf2_test_tensor_list2.py", "add", 0, 0); + testTf2("tf2_test_tensor_list3.py", "add", 0, 0); testTf2("tf2_test_tensor_list4.py", "add", 0, 0); - testTf2("tf2_test_tensor_list5.py", "add", 0, 2); - testTf2("tf2_test_model_call.py", "SequentialModel.__call__", 1, 4, 3); - testTf2("tf2_test_model_call2.py", "SequentialModel.call", 1, 4, 3); - testTf2("tf2_test_model_call3.py", "SequentialModel.call", 1, 4, 3); - testTf2("tf2_test_model_call4.py", "SequentialModel.__call__", 1, 4, 3); - testTf2("tf2_test_callbacks.py", "replica_fn", 1, 3, 2); - testTf2("tf2_test_callbacks2.py", "replica_fn", 1, 4, 2); + testTf2("tf2_test_tensor_list5.py", "add", 0, 0); + testTf2("tf2_test_model_call.py", "SequentialModel.__call__", 1, 1, 3); + testTf2("tf2_test_model_call2.py", "SequentialModel.call", 1, 1, 3); + testTf2("tf2_test_model_call3.py", "SequentialModel.call", 1, 1, 3); + testTf2("tf2_test_model_call4.py", "SequentialModel.__call__", 1, 1, 3); + testTf2("tf2_test_callbacks.py", "replica_fn", 1, 1, 2); + testTf2("tf2_test_callbacks2.py", "replica_fn", 1, 1, 2); + testTf2("tensorflow_gan_tutorial.py", "train_step", 1, 2, 2); + testTf2("tensorflow_gan_tutorial2.py", "train_step", 1, 2, 2); + testTf2("tensorflow_eager_execution.py", "MyModel.call", 1, 1, 3); + testTf2("neural_network.py", "NeuralNet.call", 1, 1, 3); + testTf2( + "neural_network.py", + "cross_entropy_loss", + 1, + 4, + 3); // NOTE: Change to 2 tensor parameters once https://github.com/wala/ML/issues/127 is + // fixed. Values 2 and 3 will correspond to the tensor parameters. + testTf2("neural_network.py", "run_optimization", 2, 2, 2, 3); + testTf2( + "neural_network.py", + "accuracy", + 1, + 3, + 3); // NOTE: Change to 2 tensor parameters and 5 tensor variables once + // https://github.com/wala/ML/issues/127 is fixed. Values 2 and 3 will correspond to the + // tensor parameters. + testTf2("autoencoder.py", "encoder", 1, 18, 2); + testTf2("autoencoder.py", "mean_square", 2, 2, 2, 3); + testTf2("autoencoder.py", "run_optimization", 1, 3, 2); + testTf2("autoencoder.py", "decoder", 1, 18, 2); + testTf2("tf2_test_sigmoid.py", "f", 1, 1, 2); + testTf2("tf2_test_sigmoid2.py", "f", 1, 1, 2); + testTf2("tf2_test_add.py", "f", 1, 1, 2); + testTf2("tf2_test_add2.py", "f", 1, 1, 2); + testTf2("tf2_test_add3.py", "f", 1, 1, 2); + testTf2("tf2_test_add4.py", "f", 1, 1, 2); + testTf2("tf2_test_add5.py", "f", 1, 1, 2); + testTf2("tf2_test_add6.py", "f", 1, 1, 2); + testTf2("multigpu_training.py", "run_optimization", 2, 4, 2, 3); } private void testTf2( @@ -246,11 +287,11 @@ private void testTf2( logger.info("Tensor analysis: " + analysis); - // Create a mapping from method signatures to pointer keys. - Map> methodSignatureToPointerKeys = new HashMap<>(); + // Create a mapping from function signatures to pointer keys. + Map> functionSignatureToPointerKeys = new HashMap<>(); - // Create a mapping from method signatures to tensor variables. - Map> methodSignatureToTensorVariables = new HashMap<>(); + // Create a mapping from function signatures to tensor variables. + Map> functionSignatureToTensorVariables = new HashMap<>(); // for each pointer key, tensor variable pair. analysis.forEach( @@ -260,7 +301,7 @@ private void testTf2( if (pointerKey instanceof LocalPointerKey) { LocalPointerKey localPointerKey = (LocalPointerKey) pointerKey; - // get the call graph node associated with the + // get the call graph node associated with the pointer key. CGNode node = localPointerKey.getNode(); // get the method associated with the call graph node. @@ -268,7 +309,7 @@ private void testTf2( String methodSignature = method.getSignature(); // associate the method to the pointer key. - methodSignatureToPointerKeys.compute( + functionSignatureToPointerKeys.compute( methodSignature, (k, v) -> { if (v == null) v = new HashSet<>(); @@ -279,7 +320,7 @@ private void testTf2( TensorVariable tensorVariable = p.snd; // associate the method to the tensor variables. - methodSignatureToTensorVariables.compute( + functionSignatureToTensorVariables.compute( methodSignature, (k, v) -> { if (v == null) v = new HashSet<>(); @@ -289,38 +330,53 @@ private void testTf2( } else logger.warning(() -> "Encountered: " + pointerKey.getClass()); }); - // check the maps. - assertEquals(expectedNumberOfTensorVariables, methodSignatureToPointerKeys.size()); - assertEquals(expectedNumberOfTensorVariables, methodSignatureToTensorVariables.size()); - final String functionSignature = "script " + filename + "." + functionName + ".do()LRoot;"; - // get the pointer keys for the function. - Set functionPointerKeys = - methodSignatureToPointerKeys.getOrDefault(functionSignature, Collections.emptySet()); + // get the tensor variables for the function. + Set functionTensorVariables = + functionSignatureToTensorVariables.getOrDefault(functionSignature, emptySet()); - // check tensor parameters. - assertEquals(expectedNumberOfTensorParameters, functionPointerKeys.size()); + assertEquals(expectedNumberOfTensorVariables, functionTensorVariables.size()); // check value numbers. - Set actualValueNumberSet = - functionPointerKeys.stream() - .map(LocalPointerKey::getValueNumber) - .collect(Collectors.toSet()); - - assertEquals(expectedTensorParameterValueNumbers.length, actualValueNumberSet.size()); - Arrays.stream(expectedTensorParameterValueNumbers) - .forEach( - ev -> - assertTrue( - "Expecting " + actualValueNumberSet + " to contain " + ev + ".", - actualValueNumberSet.contains(ev))); - - // get the tensor variables for the function. - Set functionTensors = - methodSignatureToTensorVariables.getOrDefault(functionSignature, Collections.emptySet()); - - // check tensor parameters. - assertEquals(expectedNumberOfTensorParameters, functionTensors.size()); + assertEquals( + "Each tensor parameter should have a unique value number.", + expectedNumberOfTensorParameters, + expectedTensorParameterValueNumbers.length); + + // get the pointer keys for the function by their contexts. + Map> contextToFunctionParameterPointerKeys = + functionSignatureToPointerKeys.getOrDefault(functionSignature, emptySet()).stream() + .filter(LocalPointerKey::isParameter) + .collect(groupingBy(lpk -> lpk.getNode().getContext(), toSet())); + + assertTrue( + "Because tensor parameters are inferred via function arguments, we need at least one" + + " calling context if we are expecting at least one tensor parameter.", + expectedNumberOfTensorParameters <= 0 || contextToFunctionParameterPointerKeys.size() > 0); + + for (Context ctx : contextToFunctionParameterPointerKeys.keySet()) { + // check tensor parameters. + Set functionParameterPointerKeys = + contextToFunctionParameterPointerKeys.get(ctx); + + assertEquals(expectedNumberOfTensorParameters, functionParameterPointerKeys.size()); + + // check value numbers. + Set actualParameterValueNumberSet = + functionParameterPointerKeys.stream() + .map(LocalPointerKey::getValueNumber) + .collect(Collectors.toSet()); + + assertEquals( + expectedTensorParameterValueNumbers.length, actualParameterValueNumberSet.size()); + + Arrays.stream(expectedTensorParameterValueNumbers) + .forEach( + ev -> + assertTrue( + "Expecting " + actualParameterValueNumberSet + " to contain " + ev + ".", + actualParameterValueNumberSet.contains(ev))); + } } } diff --git a/com.ibm.wala.cast.python.ml/data/tensorflow.xml b/com.ibm.wala.cast.python.ml/data/tensorflow.xml index 1bbab15c5..8cc2c550c 100644 --- a/com.ibm.wala.cast.python.ml/data/tensorflow.xml +++ b/com.ibm.wala.cast.python.ml/data/tensorflow.xml @@ -32,6 +32,8 @@ + + @@ -59,6 +61,8 @@ + + @@ -67,6 +71,14 @@ + + + + + + + + @@ -120,6 +132,8 @@ + + @@ -247,6 +261,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -631,6 +665,12 @@ + + + + + + @@ -650,6 +690,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -658,6 +724,12 @@ + + + + + + @@ -683,15 +755,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/client/PythonTensorAnalysisEngine.java b/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/client/PythonTensorAnalysisEngine.java index 1fbac142c..f6259a4f5 100644 --- a/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/client/PythonTensorAnalysisEngine.java +++ b/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/client/PythonTensorAnalysisEngine.java @@ -1,5 +1,7 @@ package com.ibm.wala.cast.python.ml.client; +import static com.google.common.collect.Sets.newHashSet; +import static com.ibm.wala.cast.python.ml.types.TensorFlowTypes.DATASET; import static com.ibm.wala.cast.types.AstMethodReference.fnReference; import com.ibm.wala.cast.ir.ssa.EachElementGetInstruction; @@ -7,6 +9,7 @@ import com.ibm.wala.cast.python.client.PythonAnalysisEngine; import com.ibm.wala.cast.python.ml.analysis.TensorTypeAnalysis; import com.ibm.wala.cast.python.ml.types.TensorType; +import com.ibm.wala.cast.python.ssa.PythonPropertyRead; import com.ibm.wala.cast.python.types.PythonTypes; import com.ibm.wala.cast.types.AstMethodReference; import com.ibm.wala.classLoader.CallSiteReference; @@ -119,13 +122,54 @@ private static Set getDataflowSources( // We are potentially pulling a tensor out of a tensor iterable. EachElementGetInstruction eachElementGetInstruction = (EachElementGetInstruction) inst; - // Find the potential tensor iterable definition. - int use = eachElementGetInstruction.getUse(0); - SSAInstruction def = du.getDef(use); + // Don't add the source if the container has elements in it. In that case, we want to add + // the individual elements themselves as sources instead. + if (definitionIsNonScalar(eachElementGetInstruction, du)) + logger.info( + "Definition of instruction: " + + eachElementGetInstruction + + " is non-scalar. Skipping..."); + else { + logger.info( + "Definition of instruction: " + + eachElementGetInstruction + + " is scalar. Processing..."); + + // Find the potential tensor iterable definition. + processInstruction( + eachElementGetInstruction, + du, + localPointerKeyNode, + src, + sources, + callGraph, + pointerAnalysis, + newHashSet()); + } + } else if (inst instanceof PythonPropertyRead) { + // We are potentially pulling a tensor out of a non-scalar tensor iterable. + PythonPropertyRead propertyRead = (PythonPropertyRead) inst; - if (definesTensorIterable(def, localPointerKeyNode, callGraph, pointerAnalysis)) { - sources.add(src); - logger.info("Added dataflow source from tensor iterable: " + src + "."); + // Find the potential tensor iterable definition. + int objectRef = propertyRead.getObjectRef(); + SSAInstruction def = du.getDef(objectRef); + + if (def == null) { + // definition is unavailable from the local DefUse. Use interprocedural analysis using + // the PA. + processInstructionInterprocedurally( + propertyRead, objectRef, localPointerKeyNode, src, sources, pointerAnalysis); + } else if (def instanceof EachElementGetInstruction + || def instanceof PythonPropertyRead) { + processInstruction( + def, + du, + localPointerKeyNode, + src, + sources, + callGraph, + pointerAnalysis, + newHashSet()); } } } @@ -133,6 +177,153 @@ private static Set getDataflowSources( return sources; } + /** + * Processes the given {@link SSAInstruction} to decide if the given {@link PointsToSetVariable} + * is added to the given {@link Set} of {@link PointsToSetVariable}s as tensor dataflow sources. + * + * @param instruction The {@link SSAInstruction} to process. + * @param du The {@link DefUse} corresponding to the siven {@link SSAInstruction}. + * @param node The {@link CGNode} containing the given {@link SSAInstruction}. + * @param src The {@link PointsToSetVariable} under question as to whether it shoudl be considered + * a tensor dataflow source. + * @param sources The {@link Set} of tensor dataflow sources. + * @param callGraph The {@link CallGraph} containing the given {@link SSAInstruction}. + * @param pointerAnalysis The {@link PointerAnalysis} corresponding to the given {@link + * CallGraph}. + * @param seen A {@link Set} of previously processed {@link SSAInstruction}. + * @return True iff the given {@link PointsToSetVariable} was added to the given {@link Set} of + * {@link PointsToSetVariable} dataflow sources. + */ + private static boolean processInstruction( + SSAInstruction instruction, + DefUse du, + CGNode node, + PointsToSetVariable src, + Set sources, + CallGraph callGraph, + PointerAnalysis pointerAnalysis, + Set seen) { + if (seen.contains(instruction)) + logger.fine(() -> "Skipping instruction: " + instruction + ". We've seen it before."); + else { + logger.fine(() -> "Processing instruction: " + instruction + "."); + seen.add(instruction); + + if (instruction != null && instruction.getNumberOfUses() > 0) { + int use = instruction.getUse(0); + SSAInstruction def = du.getDef(use); + + // First try intraprocedural analysis. + if (definesTensorIterable(def, node, callGraph, pointerAnalysis)) { + sources.add(src); + logger.info("Added dataflow source from tensor iterable: " + src + "."); + return true; + } else { + // Use interprocedural analysis using the PA. + boolean added = + processInstructionInterprocedurally( + instruction, use, node, src, sources, pointerAnalysis); + + if (added) return true; + else + // keep going up. + return processInstruction( + def, du, node, src, sources, callGraph, pointerAnalysis, seen); + } + } + } + + return false; + } + + /** + * Similar to processInstruction but does so using the given {@link PointerAnalysis}. + * + * @param instruction The {@link SSAInstruction} to be processed. + * @param use The {@link DefUse} corresponding to the given {@link SSAInstruction}. + * @param node The {@link CGNode} containing the given {@link SSAInstruction}. + * @param src The {@link PointsToSetVariable} being decided upon whether it should be considered + * as a tensor dataflow source. + * @param sources The {@link Set} of all tensor dataflow sources, i.e., {@link + * PointsToSetVariable}s. + * @param pointerAnalysis The {@link PointerAnalysis} built from the given {@link CGNode}'s {@link + * CallGraph}. + * @return True iff the given {@link PointsToSetVariable} was added to the given set of tensor + * dataflow sources, i.e., the given {@link Set} of {@link PointsToSetVariable}s. + */ + private static boolean processInstructionInterprocedurally( + SSAInstruction instruction, + int use, + CGNode node, + PointsToSetVariable src, + Set sources, + PointerAnalysis pointerAnalysis) { + logger.info( + () -> + "Using interprocedural analysis to find potential tensor iterable definition for use: " + + use + + " of instruction: " + + instruction + + "."); + + // Look up the use in the pointer analysis to see if it points to a dataset. + PointerKey usePointerKey = pointerAnalysis.getHeapModel().getPointerKeyForLocal(node, use); + + for (InstanceKey ik : pointerAnalysis.getPointsToSet(usePointerKey)) { + if (ik instanceof AllocationSiteInNode) { + AllocationSiteInNode asin = (AllocationSiteInNode) ik; + IClass concreteType = asin.getConcreteType(); + TypeReference reference = concreteType.getReference(); + + if (reference.equals(DATASET)) { + sources.add(src); + logger.info("Added dataflow source from tensor dataset: " + src + "."); + return true; + } + } + } + + return false; + } + + /** + * True iff the given {@link EachElementGetInstruction} constitutes individual elements. + * + * @param eachElementGetInstruction The {@link EachElementGetInstruction} in question. + * @param du The {@link DefUse} for the containing {@link CGNode}. + * @return True iff the definition of the given {@link EachElementGetInstruction} is non-scalar. + */ + private static boolean definitionIsNonScalar( + EachElementGetInstruction eachElementGetInstruction, DefUse du) { + int def = eachElementGetInstruction.getDef(); + logger.info( + "Processing definition: " + def + " of instruction: " + eachElementGetInstruction + "."); + + int numberOfUses = du.getNumberOfUses(def); + logger.info( + "Definition: " + + def + + " of instruction: " + + eachElementGetInstruction + + " has " + + numberOfUses + + " uses."); + + for (Iterator uses = du.getUses(def); uses.hasNext(); ) { + SSAInstruction instruction = uses.next(); + logger.info("Processing use: " + instruction + "."); + + if (instruction instanceof PythonPropertyRead) { + PythonPropertyRead read = (PythonPropertyRead) instruction; + logger.info("Found property read use: " + read + "."); + + // if the definition appears on the LHS of the read. + if (read.getObjectRef() == def) return true; + } + } + return false; + } + /** * Returns true iff the given {@link SSAInstruction} defines an iterable of tensors. * diff --git a/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/types/TensorFlowTypes.java b/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/types/TensorFlowTypes.java new file mode 100644 index 000000000..d906363d0 --- /dev/null +++ b/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/types/TensorFlowTypes.java @@ -0,0 +1,18 @@ +package com.ibm.wala.cast.python.ml.types; + +import com.ibm.wala.cast.python.types.PythonTypes; +import com.ibm.wala.types.TypeName; +import com.ibm.wala.types.TypeReference; + +/** + * Types found in the TensorFlow library. + * + * @author Raffi Khatchadourian + */ +public class TensorFlowTypes extends PythonTypes { + + public static final TypeReference DATASET = + TypeReference.findOrCreate(pythonLoader, TypeName.findOrCreate("Ltensorflow/data/Dataset")); + + private TensorFlowTypes() {} +} diff --git a/com.ibm.wala.cast.python.test/data/autoencoder.py b/com.ibm.wala.cast.python.test/data/autoencoder.py new file mode 100644 index 000000000..9e99da203 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/autoencoder.py @@ -0,0 +1,188 @@ +# From https://github.com/aymericdamien/TensorFlow-Examples/blob/6dcbe14649163814e72a22a999f20c5e247ce988/tensorflow_v2/notebooks/3_NeuralNetworks/autoencoder.ipynb. + +# %% +# """ +# # Auto-Encoder Example + +# Build a 2 layers auto-encoder with TensorFlow v2 to compress images to a lower latent space and then reconstruct them. + +# - Author: Aymeric Damien +# - Project: https://github.com/aymericdamien/TensorFlow-Examples/ +# """ + +# %% +# """ +# ## Auto-Encoder Overview + +# ae + +# References: +# - [Gradient-based learning applied to document recognition](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf). Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Proceedings of the IEEE, 86(11):2278-2324, November 1998. + +# ## MNIST Dataset Overview + +# This example is using MNIST handwritten digits. The dataset contains 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image (28x28 pixels) with values from 0 to 255. + +# In this example, each image will be converted to float32, normalized to [0, 1] and flattened to a 1-D array of 784 features (28*28). + +# ![MNIST Dataset](http://neuralnetworksanddeeplearning.com/images/mnist_100_digits.png) + +# More info: http://yann.lecun.com/exdb/mnist/ +# """ + +# %% +from __future__ import absolute_import, division, print_function + +import tensorflow as tf +print("TensorFlow version:", tf.__version__) +assert(tf.__version__ == "2.9.3") +import numpy as np + +# %% +# MNIST Dataset parameters. +num_features = 784 # data features (img shape: 28*28). + +# Training parameters. +learning_rate = 0.01 +training_steps = 1 +batch_size = 256 +display_step = 1000 + +# Network Parameters +num_hidden_1 = 128 # 1st layer num features. +num_hidden_2 = 64 # 2nd layer num features (the latent dim). + +# %% +# Prepare MNIST data. +from tensorflow.keras.datasets import mnist +(x_train, y_train), (x_test, y_test) = mnist.load_data() +# Convert to float32. +x_train, x_test = x_train.astype(np.float32), x_test.astype(np.float32) +# Flatten images to 1-D vector of 784 features (28*28). +x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features]) +# Normalize images value from [0, 255] to [0, 1]. +x_train, x_test = x_train / 255., x_test / 255. + +# %% +# Use tf.data API to shuffle and batch data. +train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_data = train_data.repeat().shuffle(10000).batch(batch_size).prefetch(1) + +test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)) +test_data = test_data.repeat().batch(batch_size).prefetch(1) + +# %% +# Store layers weight & bias + +# A random value generator to initialize weights. +random_normal = tf.initializers.RandomNormal() + +weights = { + 'encoder_h1': tf.Variable(random_normal([num_features, num_hidden_1])), + 'encoder_h2': tf.Variable(random_normal([num_hidden_1, num_hidden_2])), + 'decoder_h1': tf.Variable(random_normal([num_hidden_2, num_hidden_1])), + 'decoder_h2': tf.Variable(random_normal([num_hidden_1, num_features])), +} +biases = { + 'encoder_b1': tf.Variable(random_normal([num_hidden_1])), + 'encoder_b2': tf.Variable(random_normal([num_hidden_2])), + 'decoder_b1': tf.Variable(random_normal([num_hidden_1])), + 'decoder_b2': tf.Variable(random_normal([num_features])), +} + + +# %% +# Building the encoder. +def encoder(x): + # Encoder Hidden layer with sigmoid activation. + layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), + biases['encoder_b1'])) + # Encoder Hidden layer with sigmoid activation. + layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), + biases['encoder_b2'])) + return layer_2 + + +# Building the decoder. +def decoder(x): + # Decoder Hidden layer with sigmoid activation. + layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), + biases['decoder_b1'])) + # Decoder Hidden layer with sigmoid activation. + layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), + biases['decoder_b2'])) + return layer_2 + + +# %% +# Mean square loss between original images and reconstructed ones. +def mean_square(reconstructed, original): + return tf.reduce_mean(tf.pow(original - reconstructed, 2)) + + +# Adam optimizer. +optimizer = tf.optimizers.Adam(learning_rate=learning_rate) + + +# %% +# Optimization process. +def run_optimization(x): + # Wrap computation inside a GradientTape for automatic differentiation. + with tf.GradientTape() as g: + reconstructed_image = decoder(encoder(x)) + loss = mean_square(reconstructed_image, x) + + # Variables to update, i.e. trainable variables. + trainable_variables = list(weights.values()) + list(biases.values()) + + # Compute gradients. + gradients = g.gradient(loss, trainable_variables) + + # Update W and b following gradients. + optimizer.apply_gradients(zip(gradients, trainable_variables)) + + return loss + + +# %% +# Run training for the given number of steps. +for step, (batch_x, _) in enumerate(train_data.take(training_steps + 1)): + + # Run the optimization. + loss = run_optimization(batch_x) + + if step % display_step == 0: + print("step: %i, loss: %f" % (step, loss)) + +# %% +# Testing and Visualization. +import matplotlib.pyplot as plt + +# %% +# Encode and decode images from test set and visualize their reconstruction. +n = 4 +canvas_orig = np.empty((28 * n, 28 * n)) +canvas_recon = np.empty((28 * n, 28 * n)) +for i, (batch_x, _) in enumerate(test_data.take(n)): + # Encode and decode the digit image. + reconstructed_images = decoder(encoder(batch_x)) + # Display original images. + for j in range(n): + # Draw the generated digits. + img = batch_x[j].numpy().reshape([28, 28]) + canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = img + # Display reconstructed images. + for j in range(n): + # Draw the generated digits. + reconstr_img = reconstructed_images[j].numpy().reshape([28, 28]) + canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = reconstr_img + +# print("Original Images") +# plt.figure(figsize=(n, n)) +# plt.imshow(canvas_orig, origin="upper", cmap="gray") +# plt.show() +# +# print("Reconstructed Images") +# plt.figure(figsize=(n, n)) +# plt.imshow(canvas_recon, origin="upper", cmap="gray") +# plt.show() diff --git a/com.ibm.wala.cast.python.test/data/multigpu_training.py b/com.ibm.wala.cast.python.test/data/multigpu_training.py new file mode 100644 index 000000000..06883afb9 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/multigpu_training.py @@ -0,0 +1,235 @@ +# From https://github.com/aymericdamien/TensorFlow-Examples/blob/6dcbe14649163814e72a22a999f20c5e247ce988/tensorflow_v2/notebooks/6_Hardware/multigpu_training.ipynb. +# %% +""" +# Multi-GPU Training Example + +Train a convolutional neural network on multiple GPU with TensorFlow 2.0+. + +- Author: Aymeric Damien +- Project: https://github.com/aymericdamien/TensorFlow-Examples/ +""" + +# %% +""" +## Training with multiple GPU cards + +In this example, we are using data parallelism to split the training accross multiple GPUs. Each GPU has a full replica of the neural network model, and the weights (i.e. variables) are updated synchronously by waiting that each GPU process its batch of data. + +First, each GPU process a distinct batch of data and compute the corresponding gradients, then, all gradients are accumulated in the CPU and averaged. The model weights are finally updated with the gradients averaged, and the new model weights are sent back to each GPU, to repeat the training process. + +Parallelism + +## CIFAR10 Dataset Overview + +The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. + +![CIFAR10 Dataset](https://storage.googleapis.com/kaggle-competitions/kaggle/3649/media/cifar-10.png) + +More info: https://www.cs.toronto.edu/~kriz/cifar.html +""" + +# %% + +import tensorflow as tf +from tensorflow.keras import Model, layers +import time +import numpy as np + +# %% +# MNIST dataset parameters. +num_classes = 10 # total classes (0-9 digits). +num_gpus = 4 + +# Training parameters. +learning_rate = 0.001 +training_steps = 1000 +# Split batch size equally between GPUs. +# Note: Reduce batch size if you encounter OOM Errors. +batch_size = 1024 * num_gpus +display_step = 20 + +# Network parameters. +conv1_filters = 64 # number of filters for 1st conv layer. +conv2_filters = 128 # number of filters for 2nd conv layer. +conv3_filters = 256 # number of filters for 2nd conv layer. +fc1_units = 2048 # number of neurons for 1st fully-connected layer. + +# %% +# Prepare MNIST data. +from tensorflow.keras.datasets import cifar10 +(x_train, y_train), (x_test, y_test) = cifar10.load_data() +# Convert to float32. +x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32) +# Normalize images value from [0, 255] to [0, 1]. +x_train, x_test = x_train / 255., x_test / 255. +y_train, y_test = np.reshape(y_train, (-1)), np.reshape(y_test, (-1)) + +# %% +# Use tf.data API to shuffle and batch data. +train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_data = train_data.repeat().shuffle(batch_size * 10).batch(batch_size).prefetch(num_gpus) + + +# %% +class ConvNet(Model): + + # Set layers. + def __init__(self): + super(ConvNet, self).__init__() + + # Convolution Layer with 64 filters and a kernel size of 3. + self.conv1_1 = layers.Conv2D(conv1_filters, kernel_size=3, padding='SAME', activation=tf.nn.relu) + self.conv1_2 = layers.Conv2D(conv1_filters, kernel_size=3, padding='SAME', activation=tf.nn.relu) + # Max Pooling (down-sampling) with kernel size of 2 and strides of 2. + self.maxpool1 = layers.MaxPool2D(2, strides=2) + + # Convolution Layer with 128 filters and a kernel size of 3. + self.conv2_1 = layers.Conv2D(conv2_filters, kernel_size=3, padding='SAME', activation=tf.nn.relu) + self.conv2_2 = layers.Conv2D(conv2_filters, kernel_size=3, padding='SAME', activation=tf.nn.relu) + self.conv2_3 = layers.Conv2D(conv2_filters, kernel_size=3, padding='SAME', activation=tf.nn.relu) + # Max Pooling (down-sampling) with kernel size of 2 and strides of 2. + self.maxpool2 = layers.MaxPool2D(2, strides=2) + + # Convolution Layer with 256 filters and a kernel size of 3. + self.conv3_1 = layers.Conv2D(conv3_filters, kernel_size=3, padding='SAME', activation=tf.nn.relu) + self.conv3_2 = layers.Conv2D(conv3_filters, kernel_size=3, padding='SAME', activation=tf.nn.relu) + self.conv3_3 = layers.Conv2D(conv3_filters, kernel_size=3, padding='SAME', activation=tf.nn.relu) + + # Flatten the data to a 1-D vector for the fully connected layer. + self.flatten = layers.Flatten() + + # Fully connected layer. + self.fc1 = layers.Dense(1024, activation=tf.nn.relu) + # Apply Dropout (if is_training is False, dropout is not applied). + self.dropout = layers.Dropout(rate=0.5) + + # Output layer, class prediction. + self.out = layers.Dense(num_classes) + + # Set forward pass. + @tf.function + def call(self, x, is_training=False): + x = self.conv1_1(x) + x = self.conv1_2(x) + x = self.maxpool1(x) + x = self.conv2_1(x) + x = self.conv2_2(x) + x = self.conv2_3(x) + x = self.maxpool2(x) + x = self.conv3_1(x) + x = self.conv3_2(x) + x = self.conv3_3(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.dropout(x, training=is_training) + x = self.out(x) + if not is_training: + # tf cross entropy expect logits without softmax, so only + # apply softmax when not training. + x = tf.nn.softmax(x) + return x + + +# %% +# Cross-Entropy Loss. +# Note that this will apply 'softmax' to the logits. +@tf.function +def cross_entropy_loss(x, y): + # Convert labels to int 64 for tf cross-entropy function. + y = tf.cast(y, tf.int64) + # Apply softmax to logits and compute cross-entropy. + loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x) + # Average loss across the batch. + return tf.reduce_mean(loss) + + +# Accuracy metric. +@tf.function +def accuracy(y_pred, y_true): + # Predicted class is the index of highest score in prediction vector (i.e. argmax). + correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64)) + return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1) + + +@tf.function +def backprop(batch_x, batch_y, trainable_variables): + # Wrap computation inside a GradientTape for automatic differentiation. + with tf.GradientTape() as g: + # Forward pass. + pred = conv_net(batch_x, is_training=True) + # Compute loss. + loss = cross_entropy_loss(pred, batch_y) + # Compute gradients. + gradients = g.gradient(loss, trainable_variables) + return gradients + + +# Build the function to average the gradients. +@tf.function +def average_gradients(tower_grads): + avg_grads = [] + for tgrads in zip(*tower_grads): + grads = [] + for g in tgrads: + expanded_g = tf.expand_dims(g, 0) + grads.append(expanded_g) + + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + avg_grads.append(grad) + + return avg_grads + + +# %% +with tf.device('/cpu:0'): + # Build convnet. + conv_net = ConvNet() + # Stochastic gradient descent optimizer. + optimizer = tf.optimizers.Adam(learning_rate) + + +# %% +# Optimization process. +def run_optimization(x, y): + # Save gradients for all GPUs. + tower_grads = [] + # Variables to update, i.e. trainable variables. + trainable_variables = conv_net.trainable_variables + + with tf.device('/cpu:0'): + for i in range(num_gpus): + # Split data between GPUs. + gpu_batch_size = int(batch_size / num_gpus) + batch_x = x[i * gpu_batch_size: (i + 1) * gpu_batch_size] + batch_y = y[i * gpu_batch_size: (i + 1) * gpu_batch_size] + + # Build the neural net on each GPU. + with tf.device('/gpu:%i' % i): + grad = backprop(batch_x, batch_y, trainable_variables) + tower_grads.append(grad) + + # Last GPU Average gradients from all GPUs. + if i == num_gpus - 1: + gradients = average_gradients(tower_grads) + + # Update vars following gradients. + optimizer.apply_gradients(list(zip(gradients, trainable_variables))) + + +# %% +# Run training for the given number of steps. +ts = time.time() +for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1): + # Run the optimization to update W and b values. + run_optimization(batch_x, batch_y) + + if step % display_step == 0 or step == 1: + dt = time.time() - ts + speed = batch_size * display_step / dt + pred = conv_net(batch_x) + loss = cross_entropy_loss(pred, batch_y) + acc = accuracy(pred, batch_y) + print(("step: %i, loss: %f, accuracy: %f, speed: %f examples/sec" % (step, loss, acc, speed))) + ts = time.time() diff --git a/com.ibm.wala.cast.python.test/data/neural_network.py b/com.ibm.wala.cast.python.test/data/neural_network.py new file mode 100644 index 000000000..3354294fd --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/neural_network.py @@ -0,0 +1,184 @@ +# From https://github.com/aymericdamien/TensorFlow-Examples/blob/6dcbe14649163814e72a22a999f20c5e247ce988/tensorflow_v2/notebooks/3_NeuralNetworks/neural_network.ipynb. + +# %% +# # Neural Network Example + +# Build a 2-hidden layers fully connected neural network (a.k.a multilayer perceptron) with TensorFlow v2. + +# This example is using a low-level approach to better understand all mechanics behind building neural networks and the training process. + +# - Author: Aymeric Damien +# - Project: https://github.com/aymericdamien/TensorFlow-Examples/ +# """ + +# %% +# ## Neural Network Overview + +# nn + +# ## MNIST Dataset Overview + +# This example is using MNIST handwritten digits. The dataset contains 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image (28x28 pixels) with values from 0 to 255. + +# In this example, each image will be converted to float32, normalized to [0, 1] and flattened to a 1-D array of 784 features (28*28). + +# ![MNIST Dataset](http://neuralnetworksanddeeplearning.com/images/mnist_100_digits.png) + +# More info: http://yann.lecun.com/exdb/mnist/ + +# %% +from __future__ import absolute_import, division, print_function + +import tensorflow as tf +print("TensorFlow version:", tf.__version__) +assert(tf.__version__ == "2.15.0") +from tensorflow.keras import Model, layers +import numpy as np +import timeit + +start_time = timeit.default_timer() +skipped_time = 0 + +# %% +# MNIST dataset parameters. +num_classes = 10 # total classes (0-9 digits). +num_features = 784 # data features (img shape: 28*28). + +# Training parameters. +learning_rate = 0.1 +training_steps = 20000 +batch_size = 256 +display_step = 100 + +# Network parameters. +n_hidden_1 = 128 # 1st layer number of neurons. +n_hidden_2 = 256 # 2nd layer number of neurons. + +# %% +# Prepare MNIST data. +from tensorflow.keras.datasets import mnist +(x_train, y_train), (x_test, y_test) = mnist.load_data() +# Convert to float32. +x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32) +# Flatten images to 1-D vector of 784 features (28*28). +x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features]) +# Normalize images value from [0, 255] to [0, 1]. +x_train, x_test = x_train / 255., x_test / 255. + +# %% +# Use tf.data API to shuffle and batch data. +train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1) + + +# %% +# Create TF Model. +class NeuralNet(Model): + + # Set layers. + def __init__(self): + super(NeuralNet, self).__init__() + # First fully-connected hidden layer. + self.fc1 = layers.Dense(n_hidden_1, activation=tf.nn.relu) + # First fully-connected hidden layer. + self.fc2 = layers.Dense(n_hidden_2, activation=tf.nn.relu) + # Second fully-connecter hidden layer. + self.out = layers.Dense(num_classes) + + # Set forward pass. + def call(self, x, is_training=False): + x = self.fc1(x) + x = self.fc2(x) + x = self.out(x) + if not is_training: + # tf cross entropy expect logits without softmax, so only + # apply softmax when not training. + x = tf.nn.softmax(x) + return x + + +# Build neural network model. +neural_net = NeuralNet() + + +# %% +# Cross-Entropy Loss. +# Note that this will apply 'softmax' to the logits. +def cross_entropy_loss(x, y): + # Convert labels to int 64 for tf cross-entropy function. + y = tf.cast(y, tf.int64) + # Apply softmax to logits and compute cross-entropy. + loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x) + # Average loss across the batch. + return tf.reduce_mean(loss) + + +# Accuracy metric. +def accuracy(y_pred, y_true): + # Predicted class is the index of highest score in prediction vector (i.e. argmax). + correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64)) + return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1) + + +# Stochastic gradient descent optimizer. +optimizer = tf.optimizers.SGD(learning_rate) + + +# %% +# Optimization process. +def run_optimization(x, y): + # Wrap computation inside a GradientTape for automatic differentiation. + with tf.GradientTape() as g: + # Forward pass. + pred = neural_net(x, is_training=True) + # Compute loss. + loss = cross_entropy_loss(pred, y) + + # Variables to update, i.e. trainable variables. + trainable_variables = neural_net.trainable_variables + + # Compute gradients. + gradients = g.gradient(loss, trainable_variables) + + # Update W and b following gradients. + optimizer.apply_gradients(zip(gradients, trainable_variables)) + + +# %% +# Run training for the given number of steps. +for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1): + # Run the optimization to update W and b values. + run_optimization(batch_x, batch_y) + + if step % display_step == 0: + pred = neural_net(batch_x, is_training=True) + loss = cross_entropy_loss(pred, batch_y) + acc = accuracy(pred, batch_y) + print_time = timeit.default_timer() + print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc)) + skipped_time += timeit.default_timer() - print_time + +# %% +# Test model on validation set. +pred = neural_net(x_test, is_training=False) +print_time = timeit.default_timer() +print("Test Accuracy: %f" % accuracy(pred, y_test)) +skipped_time += timeit.default_timer() - print_time + +# %% +# Visualize predictions. +import matplotlib.pyplot as plt + +# %% +# Predict 5 images from validation set. +n_images = 5 +test_images = x_test[:n_images] +predictions = neural_net(test_images) + +print("Elapsed time: ", timeit.default_timer() - start_time - skipped_time) + +# Display image and model prediction. +for i in range(n_images): + plt.imshow(np.reshape(test_images[i], [28, 28]), cmap='gray') + plt.show() + print("Model prediction: %i" % np.argmax(predictions.numpy()[i])) diff --git a/com.ibm.wala.cast.python.test/data/tensorflow_eager_execution.py b/com.ibm.wala.cast.python.test/data/tensorflow_eager_execution.py new file mode 100644 index 000000000..bd9d07c32 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tensorflow_eager_execution.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +import sys +import numpy as np +import tensorflow as tf +import tensorflow.keras as keras + + +class MyModel(keras.Model): + + def __init__(self): + super(MyModel, self).__init__() + self.conv1 = keras.layers.Conv2D(32, 3, activation="relu") + self.flatten = keras.layers.Flatten() + self.d1 = keras.layers.Dense(128, activation="relu") + self.d2 = keras.layers.Dense(10, activation="softmax") + + def call(self, x): + x = self.conv1(x) + x = self.flatten(x) + x = self.d1(x) + x = self.d2(x) + return x + + +@tf.function +def train_step(model, fn_loss, fn_accuracy, images, labels): + with tf.GradientTape() as tape: + predictions = model(images) + loss = loss_object(labels, predictions) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + + fn_loss(loss) + fn_accuracy(labels, predictions) + + +@tf.function +def test_step(model, fn_loss, fn_accuracy, images, labels): + predictions = model(images) + t_loss = loss_object(labels, predictions) + + fn_loss(t_loss) + fn_accuracy(labels, predictions) + + +# +# Prepare training data +# +validation_split = 0.2 + +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + +x_train = x_train.astype(np.float32) / 255.0 +x_test = x_test.astype(np.float32) / 255.0 + +num_train = int(x_train.shape[0] * (1.0 - validation_split)) +x_valid = x_train[num_train:, ..., tf.newaxis] +y_valid = y_train[num_train:] +x_train = x_train[:num_train, ..., tf.newaxis] +y_train = y_train[:num_train] + +x_test = x_test[..., tf.newaxis] + +train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32) +valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)).batch(32) +test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32) + +# +# Model and loss functions +# +model = MyModel() + +loss_object = tf.keras.losses.SparseCategoricalCrossentropy() +optimizer = tf.keras.optimizers.Adam() + +train_loss = tf.keras.metrics.Mean(name="train_loss") +train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="train_accuracy") + +valid_loss = tf.keras.metrics.Mean(name="validation_loss") +valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="validation_accuracy") + +test_loss = tf.keras.metrics.Mean(name="test_loss") +test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="test_accuracy") + +# +# Run training +# +EPOCHS = 10 + +min_loss = sys.float_info.max +for epoch in range(EPOCHS): + for images, labels in train_ds: + train_step(model, train_loss, train_accuracy, images, labels) + + for valid_images, valid_labels in valid_ds: + test_step(model, valid_loss, valid_accuracy, valid_images, valid_labels) + + if valid_loss.result() < min_loss: + min_loss = valid_loss.result() + min_weights = model.get_weights() + + template = "Epoch {}, Loss: {:.4f}, Acc: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.4f}, Min Loss: {:.4f}" + print(template.format(epoch + 1, + train_loss.result(), + train_accuracy.result() * 100, + valid_loss.result(), + valid_accuracy.result() * 100, + min_loss)) + + train_loss.reset_states() + train_accuracy.reset_states() + valid_loss.reset_states() + valid_accuracy.reset_states() + +model.set_weights(min_weights) + +for test_images, test_labels in test_ds: + test_step(model, test_loss, test_accuracy, test_images, test_labels) + +print("Test Loss: {:.4f}, Test Accuracy: {:.4f}".format(test_loss.result(), test_accuracy.result())) + diff --git a/com.ibm.wala.cast.python.test/data/tensorflow_gan_tutorial.py b/com.ibm.wala.cast.python.test/data/tensorflow_gan_tutorial.py new file mode 100644 index 000000000..e9695c42f --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tensorflow_gan_tutorial.py @@ -0,0 +1,165 @@ +# From: https://github.com/ponder-lab/samples/blob/39f7644391e664244b45c90868c804abad923eb3/tensorflow_gan_tutorial/tensorflow_gan_tutorial.py + +#!/usr/bin/env python + +import os +import time +import random +import matplotlib.pyplot as plt + +import numpy as np +import tensorflow as tf + + +def make_generator_model(): + input_node = tf.keras.Input((100,)) + x = input_node + x = tf.keras.layers.Dense(7 * 7 * 256, use_bias=False)(x) + x = tf.keras.layers.BatchNormalization()(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Reshape((7, 7, 256))(x) + x = tf.keras.layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding="same", use_bias=False)(x) + x = tf.keras.layers.BatchNormalization()(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding="same", use_bias=False)(x) + x = tf.keras.layers.BatchNormalization()(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding="same", use_bias=False)(x) + output_node = x + + return tf.keras.models.Model(input_node, output_node) + + +def make_discriminator_model(): + input_node = tf.keras.Input((28, 28, 1)) + x = input_node + x = tf.keras.layers.Conv2D(64, (5, 5), strides=(2, 2), padding="same")(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Dropout(0.3)(x) + x = tf.keras.layers.Conv2D(128, (5, 5), strides=(2, 2), padding="same")(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Dropout(0.3)(x) + x = tf.keras.layers.Flatten()(x) + x = tf.keras.layers.Dense(1)(x) + output_node = x + + return tf.keras.models.Model(input_node, output_node) + + +CROSS_ENTROPY = tf.keras.losses.BinaryCrossentropy(from_logits=True) + + +def discriminator_loss(real_output, fake_output): + real_loss = CROSS_ENTROPY(tf.ones_like(real_output), real_output) + fake_loss = CROSS_ENTROPY(tf.zeros_like(fake_output), fake_output) + total_loss = real_loss + fake_loss + return total_loss + + +def generator_loss(fake_output): + return CROSS_ENTROPY(tf.ones_like(fake_output), fake_output) + + +def train_step(images, generator, discriminator, generator_optimizer, discriminator_optimizer): + noise = tf.random.normal([images.shape[0], noise_dim]) + + with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: + generated_images = generator(noise, training=True) + + real_output = discriminator(images, training=True) + fake_output = discriminator(generated_images, training=True) + + gen_loss = generator_loss(fake_output) + disc_loss = discriminator_loss(real_output, fake_output) + + gen_grads = gen_tape.gradient(gen_loss, generator.trainable_variables) + disc_grads = disc_tape.gradient(disc_loss, discriminator.trainable_variables) + + generator_optimizer.apply_gradients(zip(gen_grads, generator.trainable_variables)) + discriminator_optimizer.apply_gradients(zip(disc_grads, discriminator.trainable_variables)) + + +def train(dataset, epochs, checkpoint, generator, discriminator, + generator_optimizer, discriminator_optimizer, seed): + # for epoch in range(epochs, epochs*2): + for epoch in range(epochs): + start = time.time() + + for image_batch in dataset: + train_step(image_batch, generator, discriminator, + generator_optimizer, discriminator_optimizer) + + generate_and_save_images(generator, epoch + 1, seed) + + if (epoch + 1) % 15 == 0: + checkpoint_dir = "./training_checkpoints" + checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") + checkpoint.save(file_prefix=checkpoint_prefix) + + print("Time for epoch {} is {} sec".format(epoch + 1, time.time() - start)) + + generate_and_save_images(generator, epochs, seed) + + +def generate_and_save_images(model, epoch, test_input): + predictions = model(test_input, training=False) + + fig = plt.figure(figsize=(4, 4)) + + for i in range(predictions.shape[0]): + plt.subplot(4, 4, i + 1) + plt.imshow(predictions[i, ..., 0] * 127.5 + 127.5, cmap="gray") + plt.axis("off") + + plt.savefig("image_at_epoch_{:04d}.png".format(epoch)) + plt.close() + + +random.seed(0) +np.random.seed(0) +tf.random.set_seed(0) + +(train_images, train_labels), (_, _) = tf.keras.datasets.mnist.load_data() + +train_images = train_images[..., None].astype(np.float32) +train_images = (train_images - 127.5) / 127.5 + +buffer_size = train_images.shape[0] +batch_size = 256 + +train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(buffer_size).batch(batch_size) + +generator = make_generator_model(); +# generator.summary() + +# noise = tf.random.normal([1, 100]) +# generated_image = generator(noise, training=False) + +# plt.imshow(generated_image[0, ..., 0], cmap="gray") +# plt.show() + +discriminator = make_discriminator_model() +# discriminator.summary() + +# decision = discriminator(generated_image) +# print(decision) + +generator_optimizer = tf.keras.optimizers.Adam(1e-4) +discriminator_optimizer = tf.keras.optimizers.Adam(1e-4) + +checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, + discriminator_optimizer=discriminator_optimizer, + generator=generator, + discriminator=discriminator) + +# checkpoint_dir = "./training_checkpoints" +# checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) + +epochs = 50 +noise_dim = 100 +num_examples_to_generate = 16 + +seed = tf.random.normal([num_examples_to_generate, noise_dim]) + +train(train_dataset, epochs, checkpoint, generator, discriminator, + generator_optimizer, discriminator_optimizer, seed) diff --git a/com.ibm.wala.cast.python.test/data/tensorflow_gan_tutorial2.py b/com.ibm.wala.cast.python.test/data/tensorflow_gan_tutorial2.py new file mode 100644 index 000000000..b765fa73d --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tensorflow_gan_tutorial2.py @@ -0,0 +1,166 @@ +# From: https://github.com/ponder-lab/samples/blob/39f7644391e664244b45c90868c804abad923eb3/tensorflow_gan_tutorial/tensorflow_gan_tutorial.py + +#!/usr/bin/env python + +import os +import time +import random +import matplotlib.pyplot as plt + +import numpy as np +import tensorflow as tf + + +def make_generator_model(): + input_node = tf.keras.Input((100,)) + x = input_node + x = tf.keras.layers.Dense(7 * 7 * 256, use_bias=False)(x) + x = tf.keras.layers.BatchNormalization()(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Reshape((7, 7, 256))(x) + x = tf.keras.layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding="same", use_bias=False)(x) + x = tf.keras.layers.BatchNormalization()(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding="same", use_bias=False)(x) + x = tf.keras.layers.BatchNormalization()(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding="same", use_bias=False)(x) + output_node = x + + return tf.keras.models.Model(input_node, output_node) + + +def make_discriminator_model(): + input_node = tf.keras.Input((28, 28, 1)) + x = input_node + x = tf.keras.layers.Conv2D(64, (5, 5), strides=(2, 2), padding="same")(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Dropout(0.3)(x) + x = tf.keras.layers.Conv2D(128, (5, 5), strides=(2, 2), padding="same")(x) + x = tf.keras.layers.LeakyReLU()(x) + x = tf.keras.layers.Dropout(0.3)(x) + x = tf.keras.layers.Flatten()(x) + x = tf.keras.layers.Dense(1)(x) + output_node = x + + return tf.keras.models.Model(input_node, output_node) + + +CROSS_ENTROPY = tf.keras.losses.BinaryCrossentropy(from_logits=True) + + +def discriminator_loss(real_output, fake_output): + real_loss = CROSS_ENTROPY(tf.ones_like(real_output), real_output) + fake_loss = CROSS_ENTROPY(tf.zeros_like(fake_output), fake_output) + total_loss = real_loss + fake_loss + return total_loss + + +def generator_loss(fake_output): + return CROSS_ENTROPY(tf.ones_like(fake_output), fake_output) + + +@tf.function +def train_step(images, generator, discriminator, generator_optimizer, discriminator_optimizer): + noise = tf.random.normal([images.shape[0], noise_dim]) + + with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: + generated_images = generator(noise, training=True) + + real_output = discriminator(images, training=True) + fake_output = discriminator(generated_images, training=True) + + gen_loss = generator_loss(fake_output) + disc_loss = discriminator_loss(real_output, fake_output) + + gen_grads = gen_tape.gradient(gen_loss, generator.trainable_variables) + disc_grads = disc_tape.gradient(disc_loss, discriminator.trainable_variables) + + generator_optimizer.apply_gradients(zip(gen_grads, generator.trainable_variables)) + discriminator_optimizer.apply_gradients(zip(disc_grads, discriminator.trainable_variables)) + + +def train(dataset, epochs, checkpoint, generator, discriminator, + generator_optimizer, discriminator_optimizer, seed): + # for epoch in range(epochs, epochs*2): + for epoch in range(epochs): + start = time.time() + + for image_batch in dataset: + train_step(image_batch, generator, discriminator, + generator_optimizer, discriminator_optimizer) + + generate_and_save_images(generator, epoch + 1, seed) + + if (epoch + 1) % 15 == 0: + checkpoint_dir = "./training_checkpoints" + checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") + checkpoint.save(file_prefix=checkpoint_prefix) + + print("Time for epoch {} is {} sec".format(epoch + 1, time.time() - start)) + + generate_and_save_images(generator, epochs, seed) + + +def generate_and_save_images(model, epoch, test_input): + predictions = model(test_input, training=False) + + fig = plt.figure(figsize=(4, 4)) + + for i in range(predictions.shape[0]): + plt.subplot(4, 4, i + 1) + plt.imshow(predictions[i, ..., 0] * 127.5 + 127.5, cmap="gray") + plt.axis("off") + + plt.savefig("image_at_epoch_{:04d}.png".format(epoch)) + plt.close() + + +random.seed(0) +np.random.seed(0) +tf.random.set_seed(0) + +(train_images, train_labels), (_, _) = tf.keras.datasets.mnist.load_data() + +train_images = train_images[..., None].astype(np.float32) +train_images = (train_images - 127.5) / 127.5 + +buffer_size = train_images.shape[0] +batch_size = 256 + +train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(buffer_size).batch(batch_size) + +generator = make_generator_model(); +# generator.summary() + +# noise = tf.random.normal([1, 100]) +# generated_image = generator(noise, training=False) + +# plt.imshow(generated_image[0, ..., 0], cmap="gray") +# plt.show() + +discriminator = make_discriminator_model() +# discriminator.summary() + +# decision = discriminator(generated_image) +# print(decision) + +generator_optimizer = tf.keras.optimizers.Adam(1e-4) +discriminator_optimizer = tf.keras.optimizers.Adam(1e-4) + +checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, + discriminator_optimizer=discriminator_optimizer, + generator=generator, + discriminator=discriminator) + +# checkpoint_dir = "./training_checkpoints" +# checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) + +epochs = 50 +noise_dim = 100 +num_examples_to_generate = 16 + +seed = tf.random.normal([num_examples_to_generate, noise_dim]) + +train(train_dataset, epochs, checkpoint, generator, discriminator, + generator_optimizer, discriminator_optimizer, seed) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_add.py b/com.ibm.wala.cast.python.test/data/tf2_test_add.py new file mode 100644 index 000000000..c7d00cb5d --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_add.py @@ -0,0 +1,11 @@ +import tensorflow as tf + + +def f(a): + pass + + +x = [1, 2, 3, 4, 5] +y = 1 +z = tf.add(x, y) +f(z) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_add2.py b/com.ibm.wala.cast.python.test/data/tf2_test_add2.py new file mode 100644 index 000000000..94e5f0c57 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_add2.py @@ -0,0 +1,11 @@ +import tensorflow as tf + + +def f(a): + pass + + +x = tf.convert_to_tensor([1, 2, 3, 4, 5]) +y = tf.convert_to_tensor(1) +z = tf.add(x, y) +f(z) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_add3.py b/com.ibm.wala.cast.python.test/data/tf2_test_add3.py new file mode 100644 index 000000000..3b1e925e6 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_add3.py @@ -0,0 +1,11 @@ +import tensorflow as tf + + +def f(a): + pass + + +x = [1, 2, 3, 4, 5] +y = tf.constant([1, 2, 3, 4, 5]) +z = tf.add(x, y) +f(z) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_add4.py b/com.ibm.wala.cast.python.test/data/tf2_test_add4.py new file mode 100644 index 000000000..8102ce26f --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_add4.py @@ -0,0 +1,11 @@ +import tensorflow as tf + + +def f(a): + pass + + +x = [1, 2, 3, 4, 5] +y = 1 +z = tf.math.add(x, y) +f(z) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_add5.py b/com.ibm.wala.cast.python.test/data/tf2_test_add5.py new file mode 100644 index 000000000..b97262bb8 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_add5.py @@ -0,0 +1,11 @@ +import tensorflow as tf + + +def f(a): + pass + + +x = tf.convert_to_tensor([1, 2, 3, 4, 5]) +y = tf.convert_to_tensor(1) +z = tf.math.add(x, y) +f(z) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_add6.py b/com.ibm.wala.cast.python.test/data/tf2_test_add6.py new file mode 100644 index 000000000..78ae4b836 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_add6.py @@ -0,0 +1,11 @@ +import tensorflow as tf + + +def f(a): + pass + + +x = [1, 2, 3, 4, 5] +y = tf.constant([1, 2, 3, 4, 5]) +z = tf.math.add(x, y) +f(z) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_dataset10.py b/com.ibm.wala.cast.python.test/data/tf2_test_dataset10.py new file mode 100644 index 000000000..bf7392bfb --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_dataset10.py @@ -0,0 +1,20 @@ +import tensorflow as tf + + +def gen(): + ragged_tensor = tf.ragged.constant([[1, 2], [3]]) + yield 42, ragged_tensor + + +def add(a, b): + return a + b + + +dataset = tf.data.Dataset.from_generator( + gen, + output_signature=( + tf.TensorSpec(shape=(), dtype=tf.int32), + tf.RaggedTensorSpec(shape=(2, None), dtype=tf.int32))) + +for element in dataset: + c = add(element, element) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_dataset6.py b/com.ibm.wala.cast.python.test/data/tf2_test_dataset6.py new file mode 100644 index 000000000..f5fc00ec4 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_dataset6.py @@ -0,0 +1,14 @@ +import tensorflow as tf + + +def add(a, b): + return a + b + + +def func(ds): + for element in ds: + c = add(element, element) + + +dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]).shuffle(3).batch(2) +func(dataset) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_dataset7.py b/com.ibm.wala.cast.python.test/data/tf2_test_dataset7.py new file mode 100644 index 000000000..c1f8aebf3 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_dataset7.py @@ -0,0 +1,15 @@ +import tensorflow as tf + + +@tf.function +def add(a, b): + return a + b + + +def func(ds): + for element in ds: + c = add(element, element) + + +dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]).shuffle(3).batch(2) +func(dataset) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_dataset8.py b/com.ibm.wala.cast.python.test/data/tf2_test_dataset8.py new file mode 100644 index 000000000..a2b6b936c --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_dataset8.py @@ -0,0 +1,12 @@ +import tensorflow as tf + + +def add(a, b): + return a + a + + +(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() +dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32) + +for images, labels in dataset: + c = add(images, labels) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_dataset9.py b/com.ibm.wala.cast.python.test/data/tf2_test_dataset9.py new file mode 100644 index 000000000..b78b34bd6 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_dataset9.py @@ -0,0 +1,15 @@ +import tensorflow as tf + + +def add(a, b): + return a + a + + +def func(ds): + for images, labels in ds: + c = add(images, labels) + + +(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() +dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32) +func(dataset) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_model_call.py b/com.ibm.wala.cast.python.test/data/tf2_test_model_call.py index 466b01491..e65cbbd69 100644 --- a/com.ibm.wala.cast.python.test/data/tf2_test_model_call.py +++ b/com.ibm.wala.cast.python.test/data/tf2_test_model_call.py @@ -1,7 +1,8 @@ import tensorflow as tf - # Create an override model to classify pictures + + class SequentialModel(tf.keras.Model): def __init__(self, **kwargs): @@ -28,6 +29,7 @@ def __call__(self, x): return x + input_data = tf.random.uniform([20, 28, 28]) model = SequentialModel() diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_sigmoid.py b/com.ibm.wala.cast.python.test/data/tf2_test_sigmoid.py new file mode 100644 index 000000000..3d61b2dab --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_sigmoid.py @@ -0,0 +1,10 @@ +import tensorflow as tf + + +def f(a): + pass + + +x = tf.constant([0.0, 1.0, 50.0, 100.0]) +y = tf.math.sigmoid(x) +f(y) diff --git a/com.ibm.wala.cast.python.test/data/tf2_test_sigmoid2.py b/com.ibm.wala.cast.python.test/data/tf2_test_sigmoid2.py new file mode 100644 index 000000000..a2830a948 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/tf2_test_sigmoid2.py @@ -0,0 +1,10 @@ +import tensorflow as tf + + +def f(a): + pass + + +x = tf.constant([0.0, 1.0, 50.0, 100.0]) +y = tf.nn.sigmoid(x) +f(y) diff --git a/com.ibm.wala.cast.python.test/data/tf2o.py b/com.ibm.wala.cast.python.test/data/tf2o.py index 1c6eae541..7612eef57 100644 --- a/com.ibm.wala.cast.python.test/data/tf2o.py +++ b/com.ibm.wala.cast.python.test/data/tf2o.py @@ -1,7 +1,8 @@ import tensorflow as tf + def add(a, b): - return tf.add(a,b) + return tf.add(a, b) -c = add(tf.eye(2,3), tf.eye(2,3)) +c = add(tf.eye(2, 3), tf.eye(2, 3)) diff --git a/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonTrampolineTargetSelector.java b/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonTrampolineTargetSelector.java index aeb07f141..491c845b6 100644 --- a/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonTrampolineTargetSelector.java +++ b/com.ibm.wala.cast.python/source/com/ibm/wala/cast/python/ipa/callgraph/PythonTrampolineTargetSelector.java @@ -10,6 +10,7 @@ *****************************************************************************/ package com.ibm.wala.cast.python.ipa.callgraph; +import com.ibm.wala.cast.ipa.callgraph.ScopeMappingInstanceKeys.ScopeMappingInstanceKey; import com.ibm.wala.cast.loader.DynamicCallSiteReference; import com.ibm.wala.cast.python.client.PythonAnalysisEngine; import com.ibm.wala.cast.python.ipa.summaries.PythonInstanceMethodTrampoline; @@ -25,8 +26,9 @@ import com.ibm.wala.core.util.strings.Atom; import com.ibm.wala.ipa.callgraph.CGNode; import com.ibm.wala.ipa.callgraph.MethodTargetSelector; +import com.ibm.wala.ipa.callgraph.propagation.AllocationSiteInNode; +import com.ibm.wala.ipa.callgraph.propagation.ConstantKey; import com.ibm.wala.ipa.callgraph.propagation.InstanceKey; -import com.ibm.wala.ipa.callgraph.propagation.NormalAllocationInNode; import com.ibm.wala.ipa.callgraph.propagation.PointerKey; import com.ibm.wala.ipa.callgraph.propagation.PointerKeyFactory; import com.ibm.wala.ipa.cha.IClassHierarchy; @@ -40,9 +42,13 @@ import com.ibm.wala.util.collections.Pair; import com.ibm.wala.util.intset.OrdinalSet; import java.util.Map; +import java.util.logging.Logger; public class PythonTrampolineTargetSelector implements MethodTargetSelector { + private static final Logger logger = + Logger.getLogger(PythonSSAPropagationCallGraphBuilder.class.getName()); + /** * The method name that is used for Python callables. * @@ -84,10 +90,13 @@ public IMethod getCalleeTarget(CGNode caller, CallSiteReference site, IClass rec PythonInvokeInstruction call = (PythonInvokeInstruction) caller.getIR().getCalls(site)[0]; if (callable) { + logger.fine("Encountered callable."); + // It's a callable. Change the receiver. receiver = getCallable(caller, cha, call); if (receiver == null) return null; // not found. + else logger.fine("Substituting the receiver with one derived from a callable."); } Pair key = Pair.make(receiver, call.getNumberOfTotalParameters()); @@ -183,35 +192,108 @@ private IClass getCallable(CGNode caller, IClassHierarchy cha, PythonInvokeInstr OrdinalSet objs = builder.getPointerAnalysis().getPointsToSet(receiver); for (InstanceKey o : objs) { - NormalAllocationInNode instanceKey = (NormalAllocationInNode) o; - CGNode node = instanceKey.getNode(); - IMethod method = node.getMethod(); - IClass declaringClass = method.getDeclaringClass(); - final ClassLoaderReference classLoaderReference = - declaringClass.getClassLoader().getReference(); - TypeName declaringClassName = declaringClass.getName(); - final String packageName = "$" + declaringClassName.toString().substring(1); - - IClass callable = - cha.lookupClass( - TypeReference.findOrCreateClass( - classLoaderReference, packageName, CALLABLE_METHOD_NAME)); - - // TODO: Remove this code once https://github.com/wala/ML/issues/118 is completed. - if (callable == null) - // try the workaround for https://github.com/wala/ML/issues/106. NOTE: We cannot verify that - // the super class is tf.keras.Model due to https://github.com/wala/ML/issues/118. - callable = + AllocationSiteInNode instanceKey = getAllocationSiteInNode(o); + if (instanceKey != null) { + CGNode node = instanceKey.getNode(); + IMethod method = node.getMethod(); + IClass declaringClass = method.getDeclaringClass(); + final ClassLoaderReference classLoaderReference = + declaringClass.getClassLoader().getReference(); + TypeName declaringClassName = declaringClass.getName(); + final String packageName = "$" + declaringClassName.toString().substring(1); + + IClass callable = cha.lookupClass( TypeReference.findOrCreateClass( - classLoaderReference, packageName, CALLABLE_METHOD_NAME_FOR_KERAS_MODELS)); + classLoaderReference, packageName, CALLABLE_METHOD_NAME)); + + // TODO: Remove this code once https://github.com/wala/ML/issues/118 is completed. + if (callable == null) { + // try the workaround for https://github.com/wala/ML/issues/106. NOTE: We cannot verify + // that the super class is tf.keras.Model due to https://github.com/wala/ML/issues/118. + logger.fine("Attempting callable workaround for https://github.com/wala/ML/issues/118."); + + callable = + cha.lookupClass( + TypeReference.findOrCreateClass( + classLoaderReference, packageName, CALLABLE_METHOD_NAME_FOR_KERAS_MODELS)); - if (callable != null) return callable; + if (callable != null) + logger.info("Applying callable workaround for https://github.com/wala/ML/issues/118."); + } + + if (callable != null) return callable; + } } return null; } + /** + * Extracts the {@link AllocationSiteInNode} from the given {@link InstanceKey}. If the given + * {@link InstanceKey} is an instance of {@link AllocationSiteInNode}, then it itself is returned. + * If the given {@link InstanceKey} is a {@link ScopeMappingInstanceKey}, then it's base {@link + * InstanceKey} is returned if it is an instance {@link AllocationSiteInNode}. + * + * @param instanceKey The {@link InstanceKey} in question. + * @return The {@link AllocationSiteInNode} corresponding to the given {@link InstanceKey} + * according to the above scheme. + */ + private static AllocationSiteInNode getAllocationSiteInNode(InstanceKey instanceKey) { + if (instanceKey instanceof AllocationSiteInNode) return (AllocationSiteInNode) instanceKey; + else if (instanceKey instanceof ScopeMappingInstanceKey) { + ScopeMappingInstanceKey smik = (ScopeMappingInstanceKey) instanceKey; + InstanceKey baseInstanceKey = smik.getBase(); + + if (baseInstanceKey instanceof AllocationSiteInNode) + return (AllocationSiteInNode) baseInstanceKey; + else if (baseInstanceKey instanceof ConstantKey) { + return getAllocationSiteInNode((ConstantKey) baseInstanceKey); + } else + throw new IllegalArgumentException( + "Can't extract AllocationSiteInNode from: " + + baseInstanceKey + + ". Not expecting: " + + baseInstanceKey.getClass() + + "."); + } else if (instanceKey instanceof ConstantKey) { + return getAllocationSiteInNode((ConstantKey) instanceKey); + } else + throw new IllegalArgumentException( + "Can't extract AllocationSiteInNode from: " + + instanceKey + + ". Not expecting: " + + instanceKey.getClass() + + "."); + } + + /** + * If the given {@link ConstantKey}'s value is null, then issue a warning and return + * null. Otherwise, throw an {@link IllegalArgumentException} stating that an {@link + * AllocationSiteInNode} cannot be extracted from the given {@link ConstantKey}. A value of + * null most likely indicates that a receiver can potentially be null. + * + * @param constantKey The {@link ConstantKey} from which to extract the correspondoing {@link + * AllocationSiteInNode}. + * @return null if the given {@link ConstantKey}'s value is null. + * @throws IllegalArgumentException If the constant's value is another else other than null + * . + */ + private static AllocationSiteInNode getAllocationSiteInNode(ConstantKey constantKey) { + Object value = constantKey.getValue(); + + if (value == null) { + logger.warning("Can't extract AllocationSiteInNode from: " + constantKey + "."); + return null; + } else + throw new IllegalArgumentException( + "Can't extract AllocationSiteInNode from: " + + constantKey + + ". Not expecting value of: " + + value + + " from ConstantKey."); + } + public PythonAnalysisEngine getEngine() { return engine; }