From a161cfd989c11df9949386a103110fac45734cad Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Sun, 2 Jun 2024 20:52:58 +0200 Subject: [PATCH] reinterpret argument settings (#62) * reinterpret argument settings * fix stability script * validate input arguments --- scripts/run_stability_test.sh | 1 + .../methods/transformer_ensemble/script.py | 45 ++++++++++++------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/scripts/run_stability_test.sh b/scripts/run_stability_test.sh index bc22cf25..0f05493c 100755 --- a/scripts/run_stability_test.sh +++ b/scripts/run_stability_test.sh @@ -7,6 +7,7 @@ id: neurips-2023-data sc_counts: resources/neurips-2023-raw/sc_counts_reannotated_with_counts.h5ad method_ids: ['ground_truth', 'sample', 'mean_across_celltypes', 'mean_across_compounds'] layer: t # test a different layer +bootstrap_num_replicates: 2 publish_dir: "output/test_stability_analysis" output_state: "state.yaml" HERE diff --git a/src/task/methods/transformer_ensemble/script.py b/src/task/methods/transformer_ensemble/script.py index 29f22dfc..5f48d5e7 100644 --- a/src/task/methods/transformer_ensemble/script.py +++ b/src/task/methods/transformer_ensemble/script.py @@ -42,34 +42,38 @@ # train and predict models argsets = [ + # Note by author - weight_df1: 0.5 (utilizing std, mean, and clustering sampling, yielding 0.551) { - "name": "trained_models_kmeans_mean_std", + "name": "weight_df1", "mean_std": "mean_std", "uncommon": False, - "sampling_strategy": "k-means", - "weight": 0.4, + "sampling_strategy": "random", + "weight": 0.5, }, + # Note by author - weight_df2: 0.25 (excluding uncommon elements, resulting in 0.559) { - "name": "trained_models_kmeans_mean_std_trueuncommon", + "name": "weight_df2", "mean_std": "mean_std", "uncommon": True, - "sampling_strategy": "k-means", - "weight": 0.1, + "sampling_strategy": "random", + "weight": 0.25, }, + # Note by author - weight_df3: 0.25 (leveraging clustering sampling, achieving 0.575) { - "name": "trained_models_kmeans_mean", - "mean_std": "mean", - "uncommon": False, + "name": "weight_df3", + "mean_std": "mean_std", + "uncommon": False, # should this be set to False or True? "sampling_strategy": "k-means", - "weight": 0.2, + "weight": 0.25, }, + # Note by author - weight_df4: 0.3 (incorporating mean, random sampling, and excluding std, attaining 0.554) { - "name": "trained_models_nonkmeans_mean", + "name": "weight_df4", "mean_std": "mean", - "uncommon": False, + "uncommon": False, # should this be set to False or True? "sampling_strategy": "random", "weight": 0.3, - }, + } ] @@ -90,6 +94,8 @@ one_hot_encode_features, targets, one_hot_test = ( prepare_augmented_data_mean_only(de_train=de_train, id_map=id_map) ) + else: + raise ValueError("Invalid mean_std argument") print(f"> Train model", flush=True) if argset["sampling_strategy"] == "k-means": @@ -104,7 +110,7 @@ device=device, mean_std=argset["mean_std"], ) - else: + elif argset["sampling_strategy"] == "random": label_reducer, scaler, transformer_model = train_non_k_means_strategy( n_components=n_components, d_model=d_model, @@ -116,6 +122,8 @@ device=device, mean_std=argset["mean_std"], ) + else: + raise ValueError("Invalid sampling_strategy argument") print(f"> Predict model", flush=True) unseen_data = torch.tensor(one_hot_test, dtype=torch.float32).to(device) @@ -145,9 +153,12 @@ predictions.append(pred) print(f"Combine predictions", flush=True) -weighted_pred = sum( - [argset["weight"] * pred for argset, pred in zip(argsets, predictions)] -) / sum([argset["weight"] for argset in argsets]) +# compute weighted sum +sum_weights = sum([argset["weight"] for argset in argsets]) +weighted_pred = sum([ + pred * argset["weight"] / sum_weights + for argset, pred in zip(argsets, predictions) +]) print('Write output to file', flush=True)