Successfully tested NOMI, TDM, and GAIN

FalaahArifKhan · Dec 25, 2024 · 172db32 · 172db32
1 parent 89bdf58
commit 172db32
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 15 deletions.
diff --git a/source/null_imputers/gain_imputer.py b/source/null_imputers/gain_imputer.py
@@ -499,7 +499,8 @@ def _set_hyperparameters_for_optimization(self, trial: optuna.trial.Trial) -> No
             "noise": trial.suggest_discrete_uniform("noise", 0, 1, 1),
 
             # training
-            "batch_size": trial.suggest_discrete_uniform("batch_size", 0, 1024, 1),
+            # "batch_size": trial.suggest_discrete_uniform("batch_size", 0, 1024, 1),
+            "batch_size": trial.suggest_discrete_uniform("batch_size", 0, 512, 1),
             "max_epochs": trial.suggest_discrete_uniform("max_epochs", 0, 10000, 1),
             "early_stop": trial.suggest_discrete_uniform("early_stop", 0, 1000, 1),
 

diff --git a/source/null_imputers/imputation_methods.py b/source/null_imputers/imputation_methods.py
@@ -210,7 +210,9 @@ def impute_with_tdm(X_train_with_nulls: pd.DataFrame, X_tests_with_nulls_lst: li
                     hyperparams: dict, **kwargs):
     dataset_name = kwargs['dataset_name']
     seed = kwargs['experiment_seed']
-    torch.manual_seed(seed)  # Set the random seed for reproducibility
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
 
     X_train_encoded, cat_encoders, _ = encode_dataset_for_missforest(df=X_train_with_nulls,
                                                                      dataset_name=dataset_name,
@@ -286,10 +288,6 @@ def impute_with_nomi(X_train_with_nulls: pd.DataFrame, X_tests_with_nulls_lst: l
     np.random.seed(seed)
     torch.manual_seed(seed)
 
-    print("X_train_with_nulls.shape:", X_train_with_nulls.shape)
-    print("X_train_with_nulls.dtypes:", X_train_with_nulls.dtypes)
-    print("X_train_with_nulls.head():", X_train_with_nulls.head())
-
     num_indices_with_nulls = [X_train_with_nulls.columns.get_loc(col) for col in numeric_columns_with_nulls]
     cat_indices_with_nulls = [X_train_with_nulls.columns.get_loc(col) for col in categorical_columns_with_nulls]
 
@@ -304,39 +302,32 @@ def impute_with_nomi(X_train_with_nulls: pd.DataFrame, X_tests_with_nulls_lst: l
                                       categorical_columns_with_nulls=categorical_columns_with_nulls)[0]
         for X_test_with_nulls in X_tests_with_nulls_lst
     ]
-    print("X_train_encoded.head():", X_train_encoded.head())
 
     # Apply an imputer
     imputer = NOMIImputer(k_neighbors=kwargs['k_neighbors'],
                           similarity_metric=kwargs['similarity_metric'],
                           max_iterations=kwargs['max_iterations'],
                           tau=kwargs['tau'],
                           beta=kwargs['beta'])
-    print("np.sum(X_train_encoded mask):", np.sum(np.isnan(X_train_encoded.to_numpy())))
     X_train_imputed_np = imputer.fit_transform(X_train_encoded.to_numpy(), num_indices_with_nulls, cat_indices_with_nulls)
     X_tests_imputed_np_lst = list(map(lambda X_test_encoded:
             imputer.transform(X_test_encoded.to_numpy(), num_indices_with_nulls, cat_indices_with_nulls),
       X_tests_encoded_lst)
     )
-    print("X_train_imputed_np[:5]:", X_train_imputed_np[:5])
 
     # Convert numpy arrays back to DataFrames
     X_train_imputed = pd.DataFrame(X_train_imputed_np, columns=X_train_with_nulls.columns, index=X_train_with_nulls.index)
     X_tests_imputed_lst = [
         pd.DataFrame(X_test, columns=X_test_with_nulls.columns, index=X_test_with_nulls.index)
         for X_test, X_test_with_nulls in zip(X_tests_imputed_np_lst, X_tests_with_nulls_lst)
     ]
-    print("X_train_imputed.head():", X_train_imputed.head())
 
     # Decode categories back
     X_train_imputed = decode_dataset_for_missforest(X_train_imputed, cat_encoders, dataset_name=dataset_name)
     X_tests_imputed_lst = [
         decode_dataset_for_missforest(X_test_imputed, cat_encoders, dataset_name=dataset_name)
         for X_test_imputed in X_tests_imputed_lst
     ]
-    print("X_train_imputed.shape:", X_train_imputed.shape)
-    print("X_train_imputed.dtypes:", X_train_imputed.dtypes)
-    print("X_train_imputed.head():", X_train_imputed.head())
 
     hyperparams = {
         "k_neighbors": imputer.k_neighbors,

diff --git a/source/null_imputers/nomi_imputer.py b/source/null_imputers/nomi_imputer.py
@@ -17,9 +17,14 @@
 import torch
 import numpy as np
 import hnswlib
-import neural_tangents as nt
+import tensorflow as tf
+from distutils.version import LooseVersion
 from tqdm import tqdm
-from neural_tangents import stax
+
+# Check if the required dependencies are available
+if LooseVersion(tf.__version__) >= LooseVersion("2.16"):
+    import neural_tangents as nt
+    from neural_tangents import stax
 
 
 def sample_batch_index(total, batch_size):