From b5465736b9231f8c495d0b44edcca5b2db502f27 Mon Sep 17 00:00:00 2001 From: Andrew Montanez Date: Mon, 18 Oct 2021 19:33:17 -0500 Subject: [PATCH] Create validate_transformer_quality function --- tests/contributing.py | 65 +++++++++++++++++++++++++++++++++++ tests/quality/test_quality.py | 14 ++++---- 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/tests/contributing.py b/tests/contributing.py index 1e4c6ad8b..117efe636 100644 --- a/tests/contributing.py +++ b/tests/contributing.py @@ -7,7 +7,10 @@ import pandas as pd from tabulate import tabulate +from rdt.transformers import get_transformers_by_type from tests.integration.test_transformers import validate_transformer +from tests.quality.test_quality import ( + TEST_THRESHOLD, get_regression_scores, get_results_table, get_test_cases) # Mapping of validation method to (check name, check description). CHECK_DETAILS = { @@ -121,3 +124,65 @@ def validate_transformer_integration(transformer): print(tabulate(summary, headers='keys', showindex=False)) return validation_error is None and error_trace is None + + +def validate_transformer_quality(transformer): + """Validate quality tests for a transformer. + + This function creates a DataFrame containing the results + from running the quality tests for this transformer against + all the datasets with columns of its input type. It does the + following steps: + 1. A DataFrame containing the regression scores obtained from running the + transformers of the input type against the datasets in the test cases is + created. Each row in the DataFrame has the transformer name, dataset name, + column name and score. The scores are computed as follows: + - For every transformer of the data type, transform all the + columns of that data type. + - For every numerical column in the dataset, the transformed + columns are used as features to train a regression model. + - The score is the coefficient of determination obtained from + that model trying to predict the target column. + 2. Once the scores are gathered, a results table is created. Each row has + a transformer name, dataset name, average score for the dataset, + a score comparing the transformer's average score for the dataset to + the average of the average score for the dataset across all transformers of + the same data type, and whether or not the score passed the test threshold. + 3. The table described above is printed when this function is run. + + Args: + transformer (string or rdt.transformers.BaseTransformer): + The transformer to validate. + Output: + bool: + Whether or not the transformer passes all quality checks. + """ + if isinstance(transformer, str): + transformer = get_class(transformer) + + print(f'Validating Quality Tests for transformer {transformer.__name__}\n') + + input_type = transformer.get_input_type() + test_cases = get_test_cases({input_type}) + regression_scores = get_regression_scores(test_cases, get_transformers_by_type()) + results = get_results_table(regression_scores) + + transformer_results = results[results['transformer_name'] == transformer.__name__] + transformer_results = transformer_results.drop('transformer_name', axis=1) + transformer_results['Acceptable'] = False + passing_relative_scores = transformer_results['score_relative_to_average'] > TEST_THRESHOLD + acceptable_indices = passing_relative_scores | (transformer_results['score'] > TEST_THRESHOLD) + transformer_results.loc[acceptable_indices, 'Acceptable'] = True + new_names = { + 'dataset_name': 'Dataset', + 'score': 'Score', + 'score_relative_to_average': 'Compared To Average' + } + transformer_results = transformer_results.rename(columns=new_names) + + if transformer_results['Acceptable'].all(): + print('SUCCESS: The quality tests were successful.\n') + else: + print('Failure: The quality tests were NOT successful.\n') + + return transformer_results diff --git a/tests/quality/test_quality.py b/tests/quality/test_quality.py index b13f9c150..9fc764d4c 100644 --- a/tests/quality/test_quality.py +++ b/tests/quality/test_quality.py @@ -167,7 +167,7 @@ def get_results_table(regression_scores): 'transformer_name': transformer_name, 'dataset_name': dataset_name, 'score': transformer_average, - 'Compared to Average': transformer_average / average_without_transformer + 'score_relative_to_average': transformer_average / average_without_transformer }) results = results.append(row, ignore_index=True) @@ -191,11 +191,11 @@ def test_quality(subtests): columns are used as features to train a regression model. - The score is the coefficient of determination obtained from that model trying to predict the target column. - 3. Once the scores are gathered, a results table is created containing. - Each row has a transformer name, dataset name, average score for the dataset - and a score comparing the transformer's average score for the dataset to - the average of the average score for the dataset across all transformers of - the same data type. + 3. Once the scores are gathered, a results table is created. Each row has + a transformer name, dataset name, average score for the dataset and a score + comparing the transformer's average score for the dataset to the average + of the average score for the dataset across all transformers of the same + data type. 4. For every unique transformer in the results, a test is run to check that the transformer's score for each table is either higher than the threshold, or the comparitive score is higher than the threshold. @@ -214,5 +214,5 @@ def test_quality(subtests): with subtests.test( msg=f'Testing transformer {transformer}', transformer=transformer): - relative_scores = frame['Compared to Average'] + relative_scores = frame['score_relative_to_average'] assert all((relative_scores > TEST_THRESHOLD) | (frame['score'] > TEST_THRESHOLD))