Remove unused code and simplify NeuralPyEDCR implementation

lab-v2 · Jan 3, 2025 · 525d2de · 525d2de
1 parent 619922c
commit 525d2de
Showing 1 changed file with 4 additions and 198 deletions.
diff --git a/NeuralPyEDCR.py b/NeuralPyEDCR.py
@@ -6,16 +6,13 @@
 
 import numpy as np
 import typing
-from tqdm.contrib.concurrent import process_map
+# from tqdm.contrib.concurrent import process_map
 # import itertools
 
 import data_preprocessing
 import PyEDCR
-import backbone_pipeline
-import combined_fine_tuning
-import neural_evaluation
-import google_sheets_api
-import plotting
+# import google_sheets_api
+# import plotting
 
 
 class NeuralPyEDCR(PyEDCR.EDCR):
@@ -42,7 +39,6 @@ def __init__(self,
                  binary_model_name: str = None,
                  binary_num_epochs: int = None,
                  binary_lr: typing.Union[str, float] = None,
-                 num_train_images_per_class: int = None,
                  maximize_ratio: bool = True,
                  indices_of_fine_labels_to_take_out: typing.List[int] = [],
                  negated_conditions: bool = False):
@@ -66,142 +62,16 @@ def __init__(self,
                          binary_num_epochs=binary_num_epochs,
                          binary_lr=binary_lr,
                          secondary_lr=secondary_lr,
-                         num_train_images_per_class=num_train_images_per_class,
                          maximize_ratio=maximize_ratio,
                          indices_of_fine_labels_to_take_out=indices_of_fine_labels_to_take_out,
                          negated_conditions=negated_conditions)
         self.EDCR_num_epochs = EDCR_num_epochs
         self.neural_num_epochs = neural_num_epochs
 
-        # relevant_predicted_indices = None
-
-        # if 'correct' in experiment_name:
-        #     train_pred_correct_mask = np.ones_like(self.pred_data['train']['original'][
-        #                                                data_preprocessing.DataPreprocessor.granularities['fine']])
-        #
-        #     for g in data_preprocessing.DataPreprocessor.granularities.values():
-        #         train_pred_correct_mask &= self.get_where_predicted_correct(test=False, g=g)
-        #
-        #     relevant_predicted_indices = np.where(train_pred_correct_mask == 1)[0]
-
-        # if num_train_images_per_class is not None:
-        #     example_indices = []
-        #
-        #     for i in range(len(self.preprocessor.fine_grain_classes_str)):
-        #         i_indices_in_ground_truth = np.where(self.preprocessor.train_true_fine_data == i)[0]
-        #         cls_idx = np.intersect1d(i_indices_in_ground_truth, relevant_predicted_indices)
-        #         example_indices.extend(cls_idx[:num_train_images_per_class])
-        #         # break
-        #
-        #     self.K_train = np.array(example_indices)
-        #
-        #     for g in data_preprocessing.DataPreprocessor.granularities.values():
-        #         self.pred_data['train']['original'][g] = self.pred_data['train']['original'][g][self.K_train]
-
         # for g in data_preprocessing.DataPreprocessor.granularities.values():
         #     print(f"prediction train {g.g_str} is {self.pred_data['train']['original'][g]}")
         #     print(f"and its ground truth is {self.pred_data['train']['original'][g]}")
 
-    def run_training_correction_model_pipeline(self,
-                                               new_model_name: str,
-                                               new_lr: float):
-
-        perceived_examples_with_errors = set()
-        for g in data_preprocessing.FineCoarseDataPreprocessor.granularities.values():
-            perceived_examples_with_errors = perceived_examples_with_errors.union(set(
-                np.where(self.get_predictions(test=False, g=g, stage='post_detection') == -1)[0]))
-
-        perceived_examples_with_errors = np.array(list(perceived_examples_with_errors))
-
-        print(utils.red_text(f'\nNumber of perceived train errors: {len(perceived_examples_with_errors)} / '
-                             f'{self.T_train}\n'))
-
-        preprocessor, fine_tuners, loaders, devices = (
-            backbone_pipeline.initiate(
-                data_str=self.data_str,
-                model_name=new_model_name,
-                preprocessor=self.preprocessor,
-                lr=new_lr,
-                combined=self.combined,
-                error_indices=perceived_examples_with_errors,
-                # train_eval_split=0.8
-            ))
-
-        if self.correction_model is None:
-            self.correction_model = fine_tuners[0]
-
-        combined_fine_tuning.fine_tune_combined_model(
-            preprocessor=preprocessor,
-            lr=new_lr,
-            fine_tuner=self.correction_model,
-            device=devices[0],
-            loaders=loaders,
-            loss=self.loss,
-            save_files=False,
-            evaluate_on_test_between_epochs=False,
-            num_epochs=self.neural_num_epochs,
-            data_str=preprocessor.data_str,
-            model_name=self.main_model_name
-            # debug=True
-        )
-        print('#' * 100)
-
-        _, _, loaders, devices = backbone_pipeline.initiate(
-            data_str=self.data_str,
-            model_name=new_model_name,
-            preprocessor=self.preprocessor,
-            lr=new_lr,
-            combined=self.combined,
-            error_indices=perceived_examples_with_errors,
-            evaluation=True,
-        )
-
-        evaluation_return_values = neural_evaluation.evaluate_combined_model(
-            preprocessor=self.preprocessor,
-            fine_tuner=self.correction_model,
-            loaders=loaders,
-            loss=self.loss,
-            device=devices[0],
-            split='train',
-            print_results=True)
-
-        new_fine_predictions, new_coarse_predictions = evaluation_return_values[2], evaluation_return_values[3]
-
-        self.pred_data['train']['post_detection'][data_preprocessing.FineCoarseDataPreprocessor.granularities['fine']][
-            perceived_examples_with_errors] = new_fine_predictions
-        self.pred_data['train']['post_detection'][data_preprocessing.FineCoarseDataPreprocessor.granularities['coarse']][
-            perceived_examples_with_errors] = new_coarse_predictions
-
-    def apply_new_model_on_test(self,
-                                print_results: bool = True):
-        new_fine_predictions, new_coarse_predictions = (
-            neural_evaluation.run_combined_evaluating_pipeline(data_str=self.data_str,
-                                                               model_name=self.main_model_name,
-                                                               split='test',
-                                                               lr=self.lr,
-                                                               loss=self.loss,
-                                                               num_epochs=self.neural_num_epochs,
-                                                               pretrained_fine_tuner=self.correction_model,
-                                                               save_files=False,
-                                                               print_results=False))
-
-        for g in data_preprocessing.FineCoarseDataPreprocessor.granularities.values():
-            old_test_g_predictions = self.get_predictions(test=True, g=g, stage='post_detection')
-            new_test_g_predictions = new_fine_predictions if g.g_str == 'fine' else new_coarse_predictions
-
-            self.pred_data['test']['post_detection'][g] = np.where(old_test_g_predictions == -1,
-                                                                   new_test_g_predictions,
-                                                                   old_test_g_predictions)
-        if print_results:
-            self.print_metrics(split='test', prior=False, stage='post_detection')
-
-            where_fixed_initial_error = set()
-            for g in data_preprocessing.FineCoarseDataPreprocessor.granularities.values():
-                where_fixed_initial_error = where_fixed_initial_error.union(set(
-                    np.where(self.get_where_predicted_correct(test=True, g=g, stage='post_detection') == 1)[0]
-                ).intersection(set(np.where(self.get_where_predicted_incorrect(test=True, g=g) == 1)[0])))
-
-            print(f'where_fixed_initial_error: {len(where_fixed_initial_error)}')
 
     def run_learning_pipeline(self,
                               multi_processing: bool = True):
@@ -231,35 +101,6 @@ def run_learning_pipeline(self,
 
         print('\nRule learning completed\n')
 
-    def learn_error_binary_model(self,
-                                 binary_model_name: str,
-                                 binary_lr: typing.Union[float, str]):
-        preprocessor, fine_tuners, loaders, devices = backbone_pipeline.initiate(
-            data_str=self.data_str,
-            model_name=binary_model_name,
-            preprocessor=self.preprocessor,
-            lr=binary_lr,
-            train_fine_predictions=self.get_predictions(test=False, g=self.preprocessor.granularities['fine']),
-            train_coarse_predictions=self.get_predictions(test=False, g=self.preprocessor.granularities['coarse']),
-            test_fine_predictions=self.get_predictions(test=True, g=self.preprocessor.granularities['fine']),
-            test_coarse_predictions=self.get_predictions(test=True, g=self.preprocessor.granularities['coarse'])
-            # debug=True
-        )
-
-        combined_fine_tuning.fine_tune_combined_model(
-            preprocessor=preprocessor,
-            lr=binary_lr,
-            fine_tuner=fine_tuners[0],
-            device=devices[0],
-            loaders=loaders,
-            loss='error_BCE',
-            save_files=False,
-            evaluate_on_test_between_epochs=False,
-            num_epochs=2,
-            data_str=preprocessor.data_str,
-            model_name=self.main_model_name
-        )
-
 
 def work_on_value(args):
     (epsilon_index,
@@ -276,14 +117,12 @@ def work_on_value(args):
      binary_model_name,
      binary_lr,
      binary_num_epochs,
-     num_train_images_per_class,
      maximize_ratio,
      multi_processing,
      fine_labels_to_take_out,
      negated_conditions
      ) = args
 
-    print('#' * 25 + f'num_train_images_per_class = {num_train_images_per_class}, eps = {epsilon}' + '#' * 50)
     edcr = NeuralPyEDCR(data_str=data_str,
                         epsilon=epsilon,
                         sheet_index=epsilon_index,
@@ -301,10 +140,8 @@ def work_on_value(args):
                         binary_model_name=binary_model_name,
                         binary_lr=binary_lr,
                         binary_num_epochs=binary_num_epochs,
-                        # lower_predictions_indices=lower_predictions_indices,
                         EDCR_num_epochs=1,
                         neural_num_epochs=1,
-                        # num_train_images_per_class=num_train_images_per_class
                         maximize_ratio=maximize_ratio,
                         indices_of_fine_labels_to_take_out=fine_labels_to_take_out,
                         negated_conditions=negated_conditions
@@ -325,9 +162,6 @@ def simulate_for_values(data_str: str,
                         main_lr: typing.Union[float, str],
                         original_num_epochs: int,
                         binary_model_name: str,
-                        total_number_of_points: int = 10,
-                        min_value: float = 0.1,
-                        max_value: float = 0.3,
                         multi_processing: bool = True,
                         secondary_model_name: str = None,
                         secondary_model_loss: str = None,
@@ -336,30 +170,10 @@ def simulate_for_values(data_str: str,
                         binary_l_strs: typing.List[str] = [],
                         binary_lr: typing.Union[str, float] = None,
                         binary_num_epochs: int = None,
-                        num_train_images_per_class: typing.Sequence[int] = None,
-                        only_from_missing_values: bool = False,
                         maximize_ratio: bool = True,
                         lists_of_fine_labels_to_take_out: typing.List[typing.List[int]] = [],
                         negated_conditions: bool = False):
-    # all_values = {i: element for i, element
-    #               in enumerate(itertools.product(train_labels_noise_ratios,
-    #                                              lists_of_fine_labels_to_take_out
-    #                                              # np.linspace(start=min_value,
-    #                                              #             stop=max_value,
-    #                                              #             num=total_number_of_points)
-    #                                              ))
-    #               }
-
-    # if only_from_missing_values:
-    #     first_values, second_values = google_sheets_api.get_values_from_columns(sheet_tab_name=sheet_tab_name,
-    #                                                                             column_letters=['A', 'B'])
-    #     if len(first_values) and len(second_values):
-    #         last_first_value = first_values[-1]
-    #         last_epsilon = second_values[-1]
-    #         all_values = {i: (first_value, second_value) for i, (first_value, second_value) in
-    #                       all_values.items()
-    #                       if ((first_value == last_first_value and second_value > last_epsilon)
-    #                           or (first_value > last_first_value))}
+
 
     datas = [(i,
               None if maximize_ratio else 0.1,
@@ -375,7 +189,6 @@ def simulate_for_values(data_str: str,
               binary_model_name,
               binary_lr,
               binary_num_epochs,
-              None,
               maximize_ratio,
               multi_processing,
               fine_labels_to_take_out,
@@ -400,7 +213,6 @@ def main():
     original_num_epochs = 10
     secondary_num_epochs = 20
     binary_num_epochs = 10
-    number_of_fine_classes = 24
 
     # data_str = 'imagenet'
     # main_model_name = binary_model_name = 'dinov2_vits14'
@@ -410,7 +222,6 @@ def main():
     # original_num_epochs = 8
     # secondary_num_epochs = 2
     # binary_num_epochs = 5
-    # number_of_fine_classes = 42
 
     # data_str = 'openimage'
     # main_model_name = 'vit_b_16'
@@ -421,7 +232,6 @@ def main():
     # original_num_epochs = 20
     # secondary_num_epochs = 20
     # binary_num_epochs = 4
-    # number_of_fine_classes = 30
 
     binary_l_strs = list({f.split(f'e{binary_num_epochs - 1}_')[-1].replace('.npy', '')
                           for f in os.listdir('binary_results')
@@ -459,9 +269,6 @@ def main():
                     main_lr=main_lr,
                     original_num_epochs=original_num_epochs,
                     binary_model_name=binary_model_name,
-                    total_number_of_points=1,
-                    min_value=0.1,
-                    max_value=0.1,
                     binary_l_strs=curr_binary_l_strs,
                     binary_lr=curr_binary_lr,
                     binary_num_epochs=curr_binary_num_epochs,
@@ -470,7 +277,6 @@ def main():
                     secondary_model_loss=curr_secondary_model_loss,
                     secondary_num_epochs=curr_secondary_num_epochs,
                     secondary_lr=curr_secondary_lr,
-                    # only_from_missing_values=True
                     maximize_ratio=maximize_ratio,
                     lists_of_fine_labels_to_take_out=lists_of_fine_labels_to_take_out,
                     negated_conditions=False