Merge branch 'refs/heads/dev_experiments' into dev

# Conflicts: # experiments/DTI/visualize.py
kalininalab · Sep 3, 2024 · 6d66bde · 6d66bde
2 parents 76c633d + 0258181
commit 6d66bde
Show file tree

Hide file tree

Showing 11 changed files with 493 additions and 236 deletions.
diff --git a/experiments/DTI/train.py b/experiments/DTI/train.py
@@ -175,8 +175,11 @@ def main(full_path: Path):
         full_path: Path to the folder holding the runs for all tools
     """
     for tool in TECHNIQUES:
+        if tool == "datasail":
+            continue
         train_tool(full_path, tool)
 
 
 if __name__ == '__main__':
     main(Path(sys.argv[1]))
+
diff --git a/experiments/DTI/visualize.py b/experiments/DTI/visualize.py
diff --git a/experiments/MPP/split.py b/experiments/MPP/split.py
@@ -40,8 +40,8 @@ def split_w_datasail(base_path: Path, name: str, techniques: List[str], solver:
     #     print("DataSAIL skipping", name)
     #     return
 
-    with open(base_path / "time.txt", "w") as time:
-        print("Start", file=time)
+    # with open(base_path / "time.txt", "w") as time:
+    #     print("Start", file=time)
 
     df = prep_moleculenet(name)
     start = T.time()
@@ -56,8 +56,8 @@ def split_w_datasail(base_path: Path, name: str, techniques: List[str], solver:
         max_sec=1000,
         epsilon=0.1,
     )
-    with open(base_path / "time.txt", "a") as time:
-        print("I1+C1", T.time() - start, file=time)
+    # with open(base_path / "time.txt", "a") as time:
+    #     print("I1+C1", T.time() - start, file=time)
 
     save_datasail_splits(base_path, df, "ID", [(t, t) for t in techniques], e_splits=e_splits)
 
@@ -163,7 +163,7 @@ def split(full_path, name, solver="GUROBI"):
     """
     Split the MoleculeNet datasets using different techniques.
     """
-    split_w_datasail(full_path / "datasail" / name, name, techniques=["I1e", "C1e"], solver=solver)
+    split_w_datasail(full_path / "datasail" / name, name, techniques=["I1e"], solver=solver)
     # split_w_deepchem(full_path / "deepchem" / name, name, techniques=SPLITTERS.keys())
     # split_w_lohi(full_path / "lohi" / name, name)
 
@@ -177,6 +177,8 @@ def specific():
 
 
 if __name__ == '__main__':
+    split_w_datasail(Path("/") / "scratch" / "SCRATCH_SAS" / "roman" / "DataSAIL" / "v10" / "MPP" / "datasail" / "hiv", "hiv", ["I1e"])
+    exit(0)
     if len(sys.argv) == 1:
         specific()
     elif len(sys.argv) == 2:

diff --git a/experiments/MPP/train.py b/experiments/MPP/train.py
@@ -134,7 +134,11 @@ def train_run(run_path: Path, data_path: Path, name: str, model: str) -> float:
     m.fit(x_train, y_train)
 
     test_predictions = m.predict(x_test)
-    test_perf = metric[DATASETS[name][2]](y_test, test_predictions)
+    scoring = metric[DATASETS[name][2]]
+    if name == "muv":
+        test_perf = np.mean([scoring(y_test[:, i], test_predictions[:, i]) for i in range(y_test.shape[1])])
+    else:
+        test_perf = scoring(y_test, test_predictions)
 
     return test_perf
 
@@ -174,9 +178,8 @@ def train_model(base_path: Path, data_path: Path, model: str, tool: str, name: s
         pd.DataFrame: Dataframe of the performance of the models
     """
     perf = {}
-    # for tech in set(TECHNIQUES[tool]).intersection(set(DRUG_TECHNIQUES)):
-    tech = "C1e"
-    perf.update(train_tech(base_path / tech, data_path, model, tech, name))
+    for tech in ["I1e", "C1e"]:  # set(TECHNIQUES[tool]).intersection(set(DRUG_TECHNIQUES)):
+        perf.update(train_tech(base_path / tech, data_path, model, tech, name))
         # message(tool, name, model[:-2], tech)
     df = pd.DataFrame(list(perf.items()), columns=["name", "perf"])
     df["model"] = model
@@ -224,14 +227,19 @@ def train(full_path: Path, name: Optional[str] = None) -> None:
     """
     if name is None:
         for name in DATASETS:
-            # train_dataset(full_path, name)
+            if name in ["qm7", "qm8", "qm9", "lipophilicity", "esol", "freesolv", "pcba", "tox21", "clintox", "muv"]:
+                continue
             train_tool(full_path, "datasail", name)
+            # train_dataset(full_path, name)
     else:
         train_dataset(full_path, name)
 
 
 if __name__ == '__main__':
+    train_tool(Path(sys.argv[1]), "datasail", "hiv")
+    exit(0)
     if len(sys.argv) == 2:
         train(Path(sys.argv[1]))
+        # train_tool(Path(sys.argv[1]), "datasail", "muv")
     elif len(sys.argv) == 3:
         train_dataset(Path(sys.argv[1]), sys.argv[2])