diff --git a/src/dataset.py b/src/dataset.py index a14d4827..8e317d00 100644 --- a/src/dataset.py +++ b/src/dataset.py @@ -148,18 +148,18 @@ def prepare_labels(self): """Bin target values if they are continous but a classification experiment sould be done""" self.check_continous_classification(self.df) """Rename the labels and remove the ones that are not needed.""" + target = glob_conf.config['DATA']['target'] try : + # see if a special mapping should be used mapping = ast.literal_eval(glob_conf.config['DATA'][f'{self.name}.mapping']) - target = glob_conf.config['DATA']['target'] - labels = ast.literal_eval(glob_conf.config['DATA']['labels']) - df = self.df - df[target] = df[target].map(mapping) - self.df = df[df[target].isin(labels)] + self.df[target] = self.df[target].map(mapping) self.util.debug(f'for dataset {self.name} mapped {mapping}') - self.util.debug(f'Categories: {self.df[target].unique()}') - except KeyError: pass + # remove labels that are not in the labels list + labels = ast.literal_eval(glob_conf.config['DATA']['labels']) + self.df = self.df[self.df[target].isin(labels)] + self.util.debug(f'Categories: {self.df[target].unique()}') def check_continous_classification(self, df): datatype = self.util.config_val('DATA', 'type', 'dummy')