Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create validate_transformer_quality function #299

Merged
merged 1 commit into from
Oct 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions tests/contributing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@
import pandas as pd
from tabulate import tabulate

from rdt.transformers import get_transformers_by_type
from tests.code_style import (
load_transformer, validate_test_location, validate_test_names, validate_transformer_addon,
validate_transformer_importable_from_parent_module, validate_transformer_module,
validate_transformer_name, validate_transformer_subclass)
from tests.integration.test_transformers import validate_transformer
from tests.quality.test_quality import (
TEST_THRESHOLD, get_regression_scores, get_results_table, get_test_cases)

# Mapping of validation method to (check name, check description).
CHECK_DETAILS = {
Expand Down Expand Up @@ -299,3 +302,64 @@ def validate_transformer_code_style(transformer):
print(error)

return not bool(errors)


def validate_transformer_quality(transformer):
"""Validate quality tests for a transformer.

This function creates a DataFrame containing the results
from running the quality tests for this transformer against
all the datasets with columns of its input type. It does the
following steps:
1. A DataFrame containing the regression scores obtained from running the
transformers of the input type against the datasets in the test cases is
created. Each row in the DataFrame has the transformer name, dataset name,
column name and score. The scores are computed as follows:
- For every transformer of the data type, transform all the
columns of that data type.
- For every numerical column in the dataset, the transformed
columns are used as features to train a regression model.
- The score is the coefficient of determination obtained from
that model trying to predict the target column.
2. Once the scores are gathered, a results table is created. Each row has
a transformer name, dataset name, average score for the dataset,
a score comparing the transformer's average score for the dataset to
the average of the average score for the dataset across all transformers of
the same data type, and whether or not the score passed the test threshold.
3. The table described above is printed when this function is run.

Returns:
DataFrame containing the following columns for each dataset the transformer
is validated against: ``Dataset``, ``Score``, ``Compared To Average``, ``Acceptable``.
"""
if isinstance(transformer, str):
transformer = get_class(transformer)

print(f'Validating Quality Tests for transformer {transformer.__name__}\n')

input_type = transformer.get_input_type()
test_cases = get_test_cases({input_type})
regression_scores = get_regression_scores(test_cases, get_transformers_by_type())
results = get_results_table(regression_scores)

transformer_results = results[results['transformer_name'] == transformer.__name__]
transformer_results = transformer_results.drop('transformer_name', axis=1)
transformer_results['Acceptable'] = False
passing_relative_scores = transformer_results['score_relative_to_average'] > TEST_THRESHOLD
acceptable_indices = passing_relative_scores | (transformer_results['score'] > TEST_THRESHOLD)
transformer_results.loc[acceptable_indices, 'Acceptable'] = True
new_names = {
'dataset_name': 'Dataset',
'score': 'Score',
'score_relative_to_average': 'Compared To Average'
}
transformer_results = transformer_results.rename(columns=new_names)

if transformer_results['Acceptable'].all():
print('SUCCESS: The quality tests were successful.\n')
else:
print('Failure: The quality tests were NOT successful.\n')

print(tabulate(transformer_results, headers='keys', showindex=False))

return transformer_results
14 changes: 7 additions & 7 deletions tests/quality/test_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def get_results_table(regression_scores):
'transformer_name': transformer_name,
'dataset_name': dataset_name,
'score': transformer_average,
'Compared to Average': transformer_average / average_without_transformer
'score_relative_to_average': transformer_average / average_without_transformer
})
results = results.append(row, ignore_index=True)

Expand All @@ -191,11 +191,11 @@ def test_quality(subtests):
columns are used as features to train a regression model.
- The score is the coefficient of determination obtained from
that model trying to predict the target column.
3. Once the scores are gathered, a results table is created containing.
Each row has a transformer name, dataset name, average score for the dataset
and a score comparing the transformer's average score for the dataset to
the average of the average score for the dataset across all transformers of
the same data type.
3. Once the scores are gathered, a results table is created. Each row has
a transformer name, dataset name, average score for the dataset and a score
comparing the transformer's average score for the dataset to the average
of the average score for the dataset across all transformers of the same
data type.
4. For every unique transformer in the results, a test is run to check
that the transformer's score for each table is either higher than the
threshold, or the comparitive score is higher than the threshold.
Expand All @@ -214,5 +214,5 @@ def test_quality(subtests):
with subtests.test(
msg=f'Testing transformer {transformer}',
transformer=transformer):
relative_scores = frame['Compared to Average']
relative_scores = frame['score_relative_to_average']
assert all((relative_scores > TEST_THRESHOLD) | (frame['score'] > TEST_THRESHOLD))