Skip to content

Commit

Permalink
trivial start of pandas model
Browse files Browse the repository at this point in the history
  • Loading branch information
juliandolby committed Jul 10, 2018
1 parent e1af282 commit 7039ac8
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.ibm.wala.cast.python.ml.test;

import java.io.IOException;

import org.junit.Test;

import com.ibm.wala.ipa.callgraph.CallGraph;
import com.ibm.wala.ipa.cha.ClassHierarchyException;
import com.ibm.wala.util.CancelException;

public class TestPandasModel extends TestPythonMLCallGraphShape {

@Test
public void testPandas1() throws ClassHierarchyException, IllegalArgumentException, CancelException, IOException {
CallGraph CG = process("pandas1.py");
System.err.println(CG);
}

}
34 changes: 34 additions & 0 deletions com.ibm.wala.cast.python.ml/data/pandas.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?xml version="1.0" ?>
<!DOCTYPE summary-spec>
<!-- Pandas model -->
<summary-spec>
<classloader name="PythonLoader">
<class name="pandas" allocatable="true">
<method name="import"
static="true"
descriptor="()Lpandas;">
<new def="x" class="Lpandas"/>

<new def="read_excel" class="Lpandas/functions/read_excel"/>
<putfield class="LRoot"
field="read_excel"
fieldType="LRoot"
ref="x"
value="read_excel"/>

<return value="x"/>
</method>
</class>

<package name="pandas/functions">

<class name="read_excel" allocatable="true">
<method name="do" descriptor="()LRoot;" numArgs="3" paramNames="self file sheet">
<new def="v" class="Lobject"/>
<return vaue="v"/>
</method>
</class>

</package>
</classloader>
</summary-spec>
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,13 @@ public Map<PointerKey, AnalysisError> getErrors() {

protected void addBypassLogic(AnalysisOptions options) {
super.addBypassLogic(options);

addSummaryBypassLogic(options, "tensorflow.xml");
addSummaryBypassLogic(options, "pandas.xml");
}

private void addSummaryBypassLogic(AnalysisOptions options, String summary) {
IClassHierarchy cha = getClassHierarchy();

XMLMethodSummaryReader xml = new XMLMethodSummaryReader(getClass().getClassLoader().getResourceAsStream("tensorflow.xml"), scope);
XMLMethodSummaryReader xml = new XMLMethodSummaryReader(getClass().getClassLoader().getResourceAsStream(summary), scope);
for(TypeReference t : xml.getAllocatableClasses()) {
BypassSyntheticClassLoader ldr = (BypassSyntheticClassLoader) cha.getLoader(scope.getSyntheticLoader());
ldr.registerClass(t.getName(), new SyntheticClass(t, cha) {
Expand Down
6 changes: 6 additions & 0 deletions com.ibm.wala.cast.python.test/.project
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
Expand All @@ -30,5 +35,6 @@
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.pde.PluginNature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>
8 changes: 8 additions & 0 deletions com.ibm.wala.cast.python.test/data/pandas1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pandas as pd

dfqol = pd.read_excel("Master_Data_Sheet_06252018.xlsx", sheetname="EQ5D")
dfdemog = pd.read_excel("Master_Data_Sheet_06252018.xlsx", sheetname="Patient_Info_Demographics", skiprows = 1)

# format participant IDs uniformly across each dataframe
dfqol['PID'] = dfqol['PID'].str.replace("'",'')
dfdemog['PID'] = dfdemog['Patient_ID'].str.replace('-','')

0 comments on commit 7039ac8

Please sign in to comment.