diff --git a/notebooks/BlenderQuinnAnalysis.ipynb b/notebooks/BlenderQuinnAnalysis.ipynb index 8023475..387270d 100644 --- a/notebooks/BlenderQuinnAnalysis.ipynb +++ b/notebooks/BlenderQuinnAnalysis.ipynb @@ -1339,7 +1339,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.8.2 (default, Mar 26 2020, 10:43:30) \n[Clang 4.0.1 (tags/RELEASE_401/final)]" }, "orig_nbformat": 4, "vscode": { diff --git a/notebooks/ContainmentSupportDecodingAnalysis.ipynb b/notebooks/ContainmentSupportDecodingAnalysis.ipynb index dd8ed83..ca9c3d3 100644 --- a/notebooks/ContainmentSupportDecodingAnalysis.ipynb +++ b/notebooks/ContainmentSupportDecodingAnalysis.ipynb @@ -1104,7 +1104,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.8.2 (default, Mar 26 2020, 10:43:30) \n[Clang 4.0.1 (tags/RELEASE_401/final)]" }, "orig_nbformat": 4, "vscode": { diff --git a/notebooks/ContainmentSupportTSNE.ipynb b/notebooks/ContainmentSupportTSNE.ipynb index 0113138..4f51dbb 100644 --- a/notebooks/ContainmentSupportTSNE.ipynb +++ b/notebooks/ContainmentSupportTSNE.ipynb @@ -1679,7 +1679,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.9.12 | packaged by conda-forge | (main, Mar 24 2022, 23:27:05) \n[Clang 12.0.1 ]" }, "orig_nbformat": 4, "vscode": { diff --git a/run/run_containment_support_linear_decoding.py b/run/run_containment_support_linear_decoding.py index 8bfb0e9..c34e85f 100644 --- a/run/run_containment_support_linear_decoding.py +++ b/run/run_containment_support_linear_decoding.py @@ -86,14 +86,18 @@ def handle_single_args_setting(args): dataset = ContainmentSupportDataset(args.dataset_path) - all_model_results = run_containment_support_linear_decoding_multiple_models( + all_model_results, all_model_per_example_results = run_containment_support_linear_decoding_multiple_models( model_names, model_kwarg_dicts, dataset, args.n_epochs, args.lr, args.by_target_object, args.by_reference_object, args.test_proportion, args.n_test_proportion_random_seeds, args.batch_size, args.validation_proportion, args.patience_epochs, args.patience_margin, args.seed) result_df = pd.DataFrame.from_records(all_model_results) result_df = result_df.assign(global_seed=args.seed, unpooled_output=args.unpooled_output) - return result_df + + per_example_df = pd.DataFrame.from_records(all_model_per_example_results) + per_example_df = per_example_df.assign(global_seed=args.seed, unpooled_output=args.unpooled_output) + + return result_df, per_example_df if __name__ == '__main__': @@ -113,9 +117,7 @@ def handle_single_args_setting(args): for k, v in main_var_args.items(): print(' ' * 26 + k + ': ' + str(v)) - out_df = handle_single_args_setting(main_args) - # out_df = pd.concat(dataframes) - # out_df.reset_index(drop=True, inplace=True) + out_df, out_per_example_df = handle_single_args_setting(main_args) output_file = main_args.output_file output_folder, _ = os.path.split(output_file) @@ -124,3 +126,7 @@ def handle_single_args_setting(args): while os.path.exists(output_file): output_file += '_1' out_df.to_csv(output_file) + + per_example_output_file = output_file.replace('.csv', '_per_example.csv') + out_per_example_df.to_csv(per_example_output_file) + diff --git a/silicon_menagerie b/silicon_menagerie index dad066b..05e2a8c 160000 --- a/silicon_menagerie +++ b/silicon_menagerie @@ -1 +1 @@ -Subproject commit dad066b44ecd117442b1cbe116f3bfbee4d770d4 +Subproject commit 05e2a8c6c105cad2bff20721690d4db1750eec88 diff --git a/simple_relational_reasoning/embeddings/containment_support_dataset.py b/simple_relational_reasoning/embeddings/containment_support_dataset.py index 2c2697c..42d1803 100644 --- a/simple_relational_reasoning/embeddings/containment_support_dataset.py +++ b/simple_relational_reasoning/embeddings/containment_support_dataset.py @@ -39,6 +39,7 @@ class DecodingDatasets(typing.NamedTuple): val: TensorDataset test: TensorDataset n_classes: int + test_configurations: typing.List[typing.Dict[str, typing.Union[int, str]]] class ContainmentSupportDataset: @@ -175,8 +176,11 @@ def generate_decoding_datasets(self, test_target_object: typing.Optional[str] = else: train_indices.append(i) + test_configurations = [dict(configuration_index=self.dataset_configuration_indices[i], reference_object=self.dataset_reference_objects[i], target_object=self.dataset_target_objects[i]) for i in test_indices] + train_indices = np.array(train_indices) test_indices = np.array(test_indices) + train_indices, validation_indices = self._split_indices(train_indices, validation_proportion) @@ -184,5 +188,6 @@ def generate_decoding_datasets(self, test_target_object: typing.Optional[str] = TensorDataset(*self._indices_to_X_y(train_indices)), TensorDataset(*self._indices_to_X_y(validation_indices)), TensorDataset(*self._indices_to_X_y(test_indices)), # type: ignore - 3 + n_classes=3, + test_configurations=test_configurations, ) diff --git a/simple_relational_reasoning/embeddings/containment_support_linear_decoding.py b/simple_relational_reasoning/embeddings/containment_support_linear_decoding.py index aff607f..4bc547a 100644 --- a/simple_relational_reasoning/embeddings/containment_support_linear_decoding.py +++ b/simple_relational_reasoning/embeddings/containment_support_linear_decoding.py @@ -48,7 +48,7 @@ def containment_support_linear_decoding_single_model_single_feature( patience_epochs: int = DEFAULT_PATIENCE_EPOCHS, patience_margin: float = DEFAULT_PATIENCE_MARGIN, batch_size: int = BATCH_SIZE, device: typing.Optional[torch.device] = None, - ) -> typing.Dict[str, typing.Any]: + ) -> typing.Tuple[typing.Dict[str, typing.Any], typing.List[typing.Dict[str, typing.Union[str, int]]]]: if device is None: device = next(model.parameters()).device @@ -146,6 +146,7 @@ def containment_support_linear_decoding_single_model_single_feature( test_losses = [] test_accs = [] + per_example_test_correct = [] for X, y in test_dataloader: X = X.to(device) y = y.to(device) @@ -155,7 +156,9 @@ def containment_support_linear_decoding_single_model_single_feature( logits = best_decoder(embeddings) loss = criterion(logits, y) test_losses.append(loss.item()) - test_accs.append((logits.argmax(dim=1) == y).float().mean().item()) + correct = (logits.argmax(dim=1) == y) + test_accs.append(correct.float().mean().item()) + per_example_test_correct.append(correct.detach().cpu()) test_loss = np.mean(test_losses) test_acc = np.mean(test_accs) @@ -167,12 +170,18 @@ def containment_support_linear_decoding_single_model_single_feature( train_min_epoch = np.argmin(train_losses) val_min_epoch = np.argmin(val_losses) - return dict( + summary_results = dict( train_min_loss=train_losses[train_min_epoch], train_min_acc=train_accuracies[train_min_epoch], train_min_epoch=train_min_epoch + 1, val_min_loss=val_losses[val_min_epoch], val_min_acc=val_accuracies[val_min_epoch], val_min_epoch=val_min_epoch + 1, test_loss=test_loss, test_acc=test_acc, test_epoch=val_min_epoch + 1, ) + per_example_test_correct = torch.cat(per_example_test_correct, dim=0).squeeze().numpy() + for i, config in enumerate(datasets.test_configurations): + config['correct'] = per_example_test_correct[i] + + return summary_results, datasets.test_configurations + def run_containment_support_linear_decoding_single_model_multiple_features( model: nn.Module, dataset: ContainmentSupportDataset, @@ -180,12 +189,13 @@ def run_containment_support_linear_decoding_single_model_multiple_features( test_proportion: typing.Optional[float] = None, n_test_proportion_random_seeds: int = DEFAULT_N_TEST_PROPORTION_RANDOM_SEEDS, batch_size: int = BATCH_SIZE, validation_proportion: float = DEFAULT_VALIDATION_PROPORTION, patience_epochs: int = DEFAULT_PATIENCE_EPOCHS, patience_margin: float = DEFAULT_PATIENCE_MARGIN, - random_seed: int = DEFAULT_RANDOM_SEED, ): + random_seed: int = DEFAULT_RANDOM_SEED) -> typing.Tuple[typing.List[typing.Dict[str, typing.Any]], typing.List[typing.Dict[str, typing.Union[int, str]]]]: if by_target_object is None and by_reference_object is None and test_proportion is None: raise ValueError('test_reference_object, test_target_object, and test_proportion cannot all be None') model_results = [] + model_per_example_results = [] decoding_dataset_kwarg_names = [] decoding_dataset_kwarg_value_sets = [] @@ -209,8 +219,10 @@ def run_containment_support_linear_decoding_single_model_multiple_features( print(f'Running decoding with {kwarg_dict}') decoding_datasets = dataset.generate_decoding_datasets(validation_proportion=validation_proportion, **kwarg_dict) - feature_results = containment_support_linear_decoding_single_model_single_feature(model, decoding_datasets, n_epochs, lr, patience_epochs, patience_margin, batch_size) + feature_results, per_example_results = containment_support_linear_decoding_single_model_single_feature(model, decoding_datasets, n_epochs, lr, patience_epochs, patience_margin, batch_size) feature_results.update(kwarg_dict) + for result in per_example_results: + result.update(kwarg_dict) test_type = '' if by_target_object: @@ -221,9 +233,13 @@ def run_containment_support_linear_decoding_single_model_multiple_features( test_type += f'{"_" if len(test_type) else ""}configuration' feature_results['test_type'] = test_type + for result in per_example_results: + result['test_type'] = test_type + model_results.append(feature_results) + model_per_example_results.extend(per_example_results) - return model_results + return model_results, model_per_example_results def run_containment_support_linear_decoding_multiple_models( @@ -234,9 +250,10 @@ def run_containment_support_linear_decoding_multiple_models( test_proportion: typing.Optional[float] = None, n_test_proportion_random_seeds: int = DEFAULT_N_TEST_PROPORTION_RANDOM_SEEDS, batch_size: int = BATCH_SIZE, validation_proportion: float = DEFAULT_VALIDATION_PROPORTION, patience_epochs: int = DEFAULT_PATIENCE_EPOCHS, patience_margin: float = DEFAULT_PATIENCE_MARGIN, - random_seed: int = DEFAULT_RANDOM_SEED, ): + random_seed: int = DEFAULT_RANDOM_SEED) -> typing.Tuple[typing.List[typing.Dict[str, typing.Any]], typing.List[typing.Dict[str, typing.Union[int, str]]]]: all_model_results = [] + all_model_per_example_results = [] if by_target_object is None and by_reference_object is None and test_proportion is None: raise ValueError('test_reference_object, test_target_object, and test_proportion cannot all be None') @@ -245,17 +262,21 @@ def run_containment_support_linear_decoding_multiple_models( print(f'Starting model {name}') model = build_model(**model_kwargs) - model_results = run_containment_support_linear_decoding_single_model_multiple_features( + model_results, model_per_example_results = run_containment_support_linear_decoding_single_model_multiple_features( model, dataset, n_epochs, lr, by_target_object, by_reference_object, test_proportion, n_test_proportion_random_seeds, batch_size, validation_proportion, patience_epochs, patience_margin, random_seed) for feature_result in model_results: feature_result['model'] = name + for per_example_result in model_per_example_results: + per_example_result['model'] = name + all_model_results.extend(model_results) + all_model_per_example_results.extend(model_per_example_results) del model torch.cuda.empty_cache() - return all_model_results + return all_model_results, all_model_per_example_results