Skip to content

Commit

Permalink
Remove unused code and simplify NeuralPyEDCR implementation
Browse files Browse the repository at this point in the history
krichelj committed Jan 3, 2025
1 parent 619922c commit 525d2de
Showing 1 changed file with 4 additions and 198 deletions.
202 changes: 4 additions & 198 deletions NeuralPyEDCR.py
Original file line number Diff line number Diff line change
@@ -6,16 +6,13 @@

import numpy as np
import typing
from tqdm.contrib.concurrent import process_map
# from tqdm.contrib.concurrent import process_map
# import itertools

import data_preprocessing
import PyEDCR
import backbone_pipeline
import combined_fine_tuning
import neural_evaluation
import google_sheets_api
import plotting
# import google_sheets_api
# import plotting


class NeuralPyEDCR(PyEDCR.EDCR):
@@ -42,7 +39,6 @@ def __init__(self,
binary_model_name: str = None,
binary_num_epochs: int = None,
binary_lr: typing.Union[str, float] = None,
num_train_images_per_class: int = None,
maximize_ratio: bool = True,
indices_of_fine_labels_to_take_out: typing.List[int] = [],
negated_conditions: bool = False):
@@ -66,142 +62,16 @@ def __init__(self,
binary_num_epochs=binary_num_epochs,
binary_lr=binary_lr,
secondary_lr=secondary_lr,
num_train_images_per_class=num_train_images_per_class,
maximize_ratio=maximize_ratio,
indices_of_fine_labels_to_take_out=indices_of_fine_labels_to_take_out,
negated_conditions=negated_conditions)
self.EDCR_num_epochs = EDCR_num_epochs
self.neural_num_epochs = neural_num_epochs

# relevant_predicted_indices = None

# if 'correct' in experiment_name:
# train_pred_correct_mask = np.ones_like(self.pred_data['train']['original'][
# data_preprocessing.DataPreprocessor.granularities['fine']])
#
# for g in data_preprocessing.DataPreprocessor.granularities.values():
# train_pred_correct_mask &= self.get_where_predicted_correct(test=False, g=g)
#
# relevant_predicted_indices = np.where(train_pred_correct_mask == 1)[0]

# if num_train_images_per_class is not None:
# example_indices = []
#
# for i in range(len(self.preprocessor.fine_grain_classes_str)):
# i_indices_in_ground_truth = np.where(self.preprocessor.train_true_fine_data == i)[0]
# cls_idx = np.intersect1d(i_indices_in_ground_truth, relevant_predicted_indices)
# example_indices.extend(cls_idx[:num_train_images_per_class])
# # break
#
# self.K_train = np.array(example_indices)
#
# for g in data_preprocessing.DataPreprocessor.granularities.values():
# self.pred_data['train']['original'][g] = self.pred_data['train']['original'][g][self.K_train]

# for g in data_preprocessing.DataPreprocessor.granularities.values():
# print(f"prediction train {g.g_str} is {self.pred_data['train']['original'][g]}")
# print(f"and its ground truth is {self.pred_data['train']['original'][g]}")

def run_training_correction_model_pipeline(self,
new_model_name: str,
new_lr: float):

perceived_examples_with_errors = set()
for g in data_preprocessing.FineCoarseDataPreprocessor.granularities.values():
perceived_examples_with_errors = perceived_examples_with_errors.union(set(
np.where(self.get_predictions(test=False, g=g, stage='post_detection') == -1)[0]))

perceived_examples_with_errors = np.array(list(perceived_examples_with_errors))

print(utils.red_text(f'\nNumber of perceived train errors: {len(perceived_examples_with_errors)} / '
f'{self.T_train}\n'))

preprocessor, fine_tuners, loaders, devices = (
backbone_pipeline.initiate(
data_str=self.data_str,
model_name=new_model_name,
preprocessor=self.preprocessor,
lr=new_lr,
combined=self.combined,
error_indices=perceived_examples_with_errors,
# train_eval_split=0.8
))

if self.correction_model is None:
self.correction_model = fine_tuners[0]

combined_fine_tuning.fine_tune_combined_model(
preprocessor=preprocessor,
lr=new_lr,
fine_tuner=self.correction_model,
device=devices[0],
loaders=loaders,
loss=self.loss,
save_files=False,
evaluate_on_test_between_epochs=False,
num_epochs=self.neural_num_epochs,
data_str=preprocessor.data_str,
model_name=self.main_model_name
# debug=True
)
print('#' * 100)

_, _, loaders, devices = backbone_pipeline.initiate(
data_str=self.data_str,
model_name=new_model_name,
preprocessor=self.preprocessor,
lr=new_lr,
combined=self.combined,
error_indices=perceived_examples_with_errors,
evaluation=True,
)

evaluation_return_values = neural_evaluation.evaluate_combined_model(
preprocessor=self.preprocessor,
fine_tuner=self.correction_model,
loaders=loaders,
loss=self.loss,
device=devices[0],
split='train',
print_results=True)

new_fine_predictions, new_coarse_predictions = evaluation_return_values[2], evaluation_return_values[3]

self.pred_data['train']['post_detection'][data_preprocessing.FineCoarseDataPreprocessor.granularities['fine']][
perceived_examples_with_errors] = new_fine_predictions
self.pred_data['train']['post_detection'][data_preprocessing.FineCoarseDataPreprocessor.granularities['coarse']][
perceived_examples_with_errors] = new_coarse_predictions

def apply_new_model_on_test(self,
print_results: bool = True):
new_fine_predictions, new_coarse_predictions = (
neural_evaluation.run_combined_evaluating_pipeline(data_str=self.data_str,
model_name=self.main_model_name,
split='test',
lr=self.lr,
loss=self.loss,
num_epochs=self.neural_num_epochs,
pretrained_fine_tuner=self.correction_model,
save_files=False,
print_results=False))

for g in data_preprocessing.FineCoarseDataPreprocessor.granularities.values():
old_test_g_predictions = self.get_predictions(test=True, g=g, stage='post_detection')
new_test_g_predictions = new_fine_predictions if g.g_str == 'fine' else new_coarse_predictions

self.pred_data['test']['post_detection'][g] = np.where(old_test_g_predictions == -1,
new_test_g_predictions,
old_test_g_predictions)
if print_results:
self.print_metrics(split='test', prior=False, stage='post_detection')

where_fixed_initial_error = set()
for g in data_preprocessing.FineCoarseDataPreprocessor.granularities.values():
where_fixed_initial_error = where_fixed_initial_error.union(set(
np.where(self.get_where_predicted_correct(test=True, g=g, stage='post_detection') == 1)[0]
).intersection(set(np.where(self.get_where_predicted_incorrect(test=True, g=g) == 1)[0])))

print(f'where_fixed_initial_error: {len(where_fixed_initial_error)}')

def run_learning_pipeline(self,
multi_processing: bool = True):
@@ -231,35 +101,6 @@ def run_learning_pipeline(self,

print('\nRule learning completed\n')

def learn_error_binary_model(self,
binary_model_name: str,
binary_lr: typing.Union[float, str]):
preprocessor, fine_tuners, loaders, devices = backbone_pipeline.initiate(
data_str=self.data_str,
model_name=binary_model_name,
preprocessor=self.preprocessor,
lr=binary_lr,
train_fine_predictions=self.get_predictions(test=False, g=self.preprocessor.granularities['fine']),
train_coarse_predictions=self.get_predictions(test=False, g=self.preprocessor.granularities['coarse']),
test_fine_predictions=self.get_predictions(test=True, g=self.preprocessor.granularities['fine']),
test_coarse_predictions=self.get_predictions(test=True, g=self.preprocessor.granularities['coarse'])
# debug=True
)

combined_fine_tuning.fine_tune_combined_model(
preprocessor=preprocessor,
lr=binary_lr,
fine_tuner=fine_tuners[0],
device=devices[0],
loaders=loaders,
loss='error_BCE',
save_files=False,
evaluate_on_test_between_epochs=False,
num_epochs=2,
data_str=preprocessor.data_str,
model_name=self.main_model_name
)


def work_on_value(args):
(epsilon_index,
@@ -276,14 +117,12 @@ def work_on_value(args):
binary_model_name,
binary_lr,
binary_num_epochs,
num_train_images_per_class,
maximize_ratio,
multi_processing,
fine_labels_to_take_out,
negated_conditions
) = args

print('#' * 25 + f'num_train_images_per_class = {num_train_images_per_class}, eps = {epsilon}' + '#' * 50)
edcr = NeuralPyEDCR(data_str=data_str,
epsilon=epsilon,
sheet_index=epsilon_index,
@@ -301,10 +140,8 @@ def work_on_value(args):
binary_model_name=binary_model_name,
binary_lr=binary_lr,
binary_num_epochs=binary_num_epochs,
# lower_predictions_indices=lower_predictions_indices,
EDCR_num_epochs=1,
neural_num_epochs=1,
# num_train_images_per_class=num_train_images_per_class
maximize_ratio=maximize_ratio,
indices_of_fine_labels_to_take_out=fine_labels_to_take_out,
negated_conditions=negated_conditions
@@ -325,9 +162,6 @@ def simulate_for_values(data_str: str,
main_lr: typing.Union[float, str],
original_num_epochs: int,
binary_model_name: str,
total_number_of_points: int = 10,
min_value: float = 0.1,
max_value: float = 0.3,
multi_processing: bool = True,
secondary_model_name: str = None,
secondary_model_loss: str = None,
@@ -336,30 +170,10 @@ def simulate_for_values(data_str: str,
binary_l_strs: typing.List[str] = [],
binary_lr: typing.Union[str, float] = None,
binary_num_epochs: int = None,
num_train_images_per_class: typing.Sequence[int] = None,
only_from_missing_values: bool = False,
maximize_ratio: bool = True,
lists_of_fine_labels_to_take_out: typing.List[typing.List[int]] = [],
negated_conditions: bool = False):
# all_values = {i: element for i, element
# in enumerate(itertools.product(train_labels_noise_ratios,
# lists_of_fine_labels_to_take_out
# # np.linspace(start=min_value,
# # stop=max_value,
# # num=total_number_of_points)
# ))
# }

# if only_from_missing_values:
# first_values, second_values = google_sheets_api.get_values_from_columns(sheet_tab_name=sheet_tab_name,
# column_letters=['A', 'B'])
# if len(first_values) and len(second_values):
# last_first_value = first_values[-1]
# last_epsilon = second_values[-1]
# all_values = {i: (first_value, second_value) for i, (first_value, second_value) in
# all_values.items()
# if ((first_value == last_first_value and second_value > last_epsilon)
# or (first_value > last_first_value))}


datas = [(i,
None if maximize_ratio else 0.1,
@@ -375,7 +189,6 @@ def simulate_for_values(data_str: str,
binary_model_name,
binary_lr,
binary_num_epochs,
None,
maximize_ratio,
multi_processing,
fine_labels_to_take_out,
@@ -400,7 +213,6 @@ def main():
original_num_epochs = 10
secondary_num_epochs = 20
binary_num_epochs = 10
number_of_fine_classes = 24

# data_str = 'imagenet'
# main_model_name = binary_model_name = 'dinov2_vits14'
@@ -410,7 +222,6 @@ def main():
# original_num_epochs = 8
# secondary_num_epochs = 2
# binary_num_epochs = 5
# number_of_fine_classes = 42

# data_str = 'openimage'
# main_model_name = 'vit_b_16'
@@ -421,7 +232,6 @@ def main():
# original_num_epochs = 20
# secondary_num_epochs = 20
# binary_num_epochs = 4
# number_of_fine_classes = 30

binary_l_strs = list({f.split(f'e{binary_num_epochs - 1}_')[-1].replace('.npy', '')
for f in os.listdir('binary_results')
@@ -459,9 +269,6 @@ def main():
main_lr=main_lr,
original_num_epochs=original_num_epochs,
binary_model_name=binary_model_name,
total_number_of_points=1,
min_value=0.1,
max_value=0.1,
binary_l_strs=curr_binary_l_strs,
binary_lr=curr_binary_lr,
binary_num_epochs=curr_binary_num_epochs,
@@ -470,7 +277,6 @@ def main():
secondary_model_loss=curr_secondary_model_loss,
secondary_num_epochs=curr_secondary_num_epochs,
secondary_lr=curr_secondary_lr,
# only_from_missing_values=True
maximize_ratio=maximize_ratio,
lists_of_fine_labels_to_take_out=lists_of_fine_labels_to_take_out,
negated_conditions=False

0 comments on commit 525d2de

Please sign in to comment.