From 7039ac81914edd84b50792cb340050d7c4f46a6d Mon Sep 17 00:00:00 2001 From: Julian Dolby Date: Mon, 9 Jul 2018 22:13:23 -0400 Subject: [PATCH] trivial start of pandas model --- .../cast/python/ml/test/TestPandasModel.java | 19 +++++++++++ com.ibm.wala.cast.python.ml/data/pandas.xml | 34 +++++++++++++++++++ .../ml/client/PythonTensorAnalysisEngine.java | 9 +++-- com.ibm.wala.cast.python.test/.project | 6 ++++ com.ibm.wala.cast.python.test/data/pandas1.py | 8 +++++ 5 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestPandasModel.java create mode 100644 com.ibm.wala.cast.python.ml/data/pandas.xml create mode 100644 com.ibm.wala.cast.python.test/data/pandas1.py diff --git a/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestPandasModel.java b/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestPandasModel.java new file mode 100644 index 000000000..dc9e4f994 --- /dev/null +++ b/com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestPandasModel.java @@ -0,0 +1,19 @@ +package com.ibm.wala.cast.python.ml.test; + +import java.io.IOException; + +import org.junit.Test; + +import com.ibm.wala.ipa.callgraph.CallGraph; +import com.ibm.wala.ipa.cha.ClassHierarchyException; +import com.ibm.wala.util.CancelException; + +public class TestPandasModel extends TestPythonMLCallGraphShape { + + @Test + public void testPandas1() throws ClassHierarchyException, IllegalArgumentException, CancelException, IOException { + CallGraph CG = process("pandas1.py"); + System.err.println(CG); + } + +} diff --git a/com.ibm.wala.cast.python.ml/data/pandas.xml b/com.ibm.wala.cast.python.ml/data/pandas.xml new file mode 100644 index 000000000..2028c4988 --- /dev/null +++ b/com.ibm.wala.cast.python.ml/data/pandas.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/client/PythonTensorAnalysisEngine.java b/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/client/PythonTensorAnalysisEngine.java index bcfd5cf67..c25268ce1 100644 --- a/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/client/PythonTensorAnalysisEngine.java +++ b/com.ibm.wala.cast.python.ml/source/com/ibm/wala/cast/python/ml/client/PythonTensorAnalysisEngine.java @@ -186,10 +186,13 @@ public Map getErrors() { protected void addBypassLogic(AnalysisOptions options) { super.addBypassLogic(options); - + addSummaryBypassLogic(options, "tensorflow.xml"); + addSummaryBypassLogic(options, "pandas.xml"); + } + + private void addSummaryBypassLogic(AnalysisOptions options, String summary) { IClassHierarchy cha = getClassHierarchy(); - - XMLMethodSummaryReader xml = new XMLMethodSummaryReader(getClass().getClassLoader().getResourceAsStream("tensorflow.xml"), scope); + XMLMethodSummaryReader xml = new XMLMethodSummaryReader(getClass().getClassLoader().getResourceAsStream(summary), scope); for(TypeReference t : xml.getAllocatableClasses()) { BypassSyntheticClassLoader ldr = (BypassSyntheticClassLoader) cha.getLoader(scope.getSyntheticLoader()); ldr.registerClass(t.getName(), new SyntheticClass(t, cha) { diff --git a/com.ibm.wala.cast.python.test/.project b/com.ibm.wala.cast.python.test/.project index 9ab89339d..125dd967c 100644 --- a/com.ibm.wala.cast.python.test/.project +++ b/com.ibm.wala.cast.python.test/.project @@ -5,6 +5,11 @@ + + org.python.pydev.PyDevBuilder + + + org.eclipse.jdt.core.javabuilder @@ -30,5 +35,6 @@ org.eclipse.m2e.core.maven2Nature org.eclipse.pde.PluginNature org.eclipse.jdt.core.javanature + org.python.pydev.pythonNature diff --git a/com.ibm.wala.cast.python.test/data/pandas1.py b/com.ibm.wala.cast.python.test/data/pandas1.py new file mode 100644 index 000000000..ce24d2463 --- /dev/null +++ b/com.ibm.wala.cast.python.test/data/pandas1.py @@ -0,0 +1,8 @@ +import pandas as pd + +dfqol = pd.read_excel("Master_Data_Sheet_06252018.xlsx", sheetname="EQ5D") +dfdemog = pd.read_excel("Master_Data_Sheet_06252018.xlsx", sheetname="Patient_Info_Demographics", skiprows = 1) + +# format participant IDs uniformly across each dataframe +dfqol['PID'] = dfqol['PID'].str.replace("'",'') +dfdemog['PID'] = dfdemog['Patient_ID'].str.replace('-','')