Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.

Add OneVsRest support to export to onnx tests and increase test coverage #446

Merged
merged 1 commit into from
Feb 24, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 54 additions & 45 deletions src/python/tests_extended/test_export_to_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
from nimbusml.feature_extraction.text import NGramFeaturizer
from nimbusml.feature_extraction.text.extractor import Ngram
from nimbusml.feature_selection import CountSelector, MutualInformationSelector
from nimbusml.linear_model import FastLinearBinaryClassifier
from nimbusml.linear_model import (AveragedPerceptronBinaryClassifier,
FastLinearBinaryClassifier,
LinearSvmBinaryClassifier)
from nimbusml.multiclass import OneVsRestClassifier
from nimbusml.naive_bayes import NaiveBayesClassifier
from nimbusml.preprocessing import (TensorFlowScorer, FromKey, ToKey,
DateTimeSplitter, OnnxRunner)
Expand Down Expand Up @@ -75,6 +78,7 @@
infert_df.rename(columns={'case': 'Label'}, inplace=True)

infert_onehot_df = (OneHotVectorizer() << 'education_str').fit_transform(infert_df)
infert_onehot_df['Label'] = infert_onehot_df['Label'].astype(np.uint32)

# rank group carrier price Class dep_day nbr_stops duration
# 0 2 1 AA 240 3 1 0 12.0
Expand Down Expand Up @@ -118,6 +122,8 @@
}

INSTANCES = {
'AveragedPerceptronBinaryClassifier': AveragedPerceptronBinaryClassifier(
feature=['education_str.0-5yrs', 'education_str.6-11yrs', 'education_str.12+ yrs']),
'Binner': Binner(num_bins=3),
'CharTokenizer': CharTokenizer(columns={'SentimentText_Transform': 'SentimentText'}),
'ColumnConcatenator': ColumnConcatenator(columns={'Features': [
Expand Down Expand Up @@ -174,12 +180,28 @@
slots_in_output=2) # only accept one column
]),
'NaiveBayesClassifier': NaiveBayesClassifier(feature=['Sepal_Width', 'Sepal_Length']),
'NGramFeaturizer': NGramFeaturizer(word_feature_extractor=Ngram(),
'NGramFeaturizer': NGramFeaturizer(word_feature_extractor=Ngram(),
char_feature_extractor=Ngram(),
keep_diacritics=True,
columns={ 'features': ['SentimentText']}),
'OneHotHashVectorizer': OneHotHashVectorizer(columns=['education_str']),
'OneHotVectorizer': OneHotVectorizer(columns=['education_str']),
'OneVsRestClassifier(AveragedPerceptronBinaryClassifier)': \
OneVsRestClassifier(AveragedPerceptronBinaryClassifier(),
use_probabilities=True,
feature=['age',
'education_str.0-5yrs',
'education_str.6-11yrs',
'education_str.12+ yrs'],
label='induced'),
'OneVsRestClassifier(LinearSvmBinaryClassifier)': \
OneVsRestClassifier(LinearSvmBinaryClassifier(),
use_probabilities=True,
feature=['age',
'education_str.0-5yrs',
'education_str.6-11yrs',
'education_str.12+ yrs'],
label='induced'),
'PcaAnomalyDetector': PcaAnomalyDetector(rank=3),
'PcaTransformer': PcaTransformer(rank=2),
'PixelExtractor': Pipeline([
Expand Down Expand Up @@ -238,6 +260,7 @@
'GlobalContrastRowScaler': iris_df.astype(np.float32),
'Handler': iris_with_nan_df,
'Indicator': iris_with_nan_df,
'LightGbmBinaryClassifier': iris_binary_df,
'LightGbmRanker': gen_tt_df,
'LinearSvmBinaryClassifier': iris_binary_df,
'Loader': image_paths_df,
Expand All @@ -251,6 +274,8 @@
'OneHotHashVectorizer': infert_df,
'OneHotVectorizer': infert_df,
'OnlineGradientDescentRegressor': iris_regression_df,
'OneVsRestClassifier(AveragedPerceptronBinaryClassifier)': infert_onehot_df,
'OneVsRestClassifier(LinearSvmBinaryClassifier)': infert_onehot_df,
'OrdinaryLeastSquaresRegressor': iris_regression_df,
'PcaAnomalyDetector': iris_no_label_df,
'PcaTransformer': iris_regression_df,
Expand All @@ -266,10 +291,9 @@

EXPECTED_RESULTS = {
'AveragedPerceptronBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'CharTokenizer': {'cols': [('SentimentText_Transform.%03d' % i, 'SentimentText_Transform.%03d' % i)
for i in range(0, 422)]},
'CharTokenizer': {'num_cols': 424, 'cols': 0},
'ColumnConcatenator': {'num_cols': 11, 'cols': 0},
'ColumnDuplicator': {'cols': [('dup', 'dup')]},
'ColumnDuplicator': {'num_cols': 7, 'cols': 0},
'ColumnSelector': {
'num_cols': 2,
'cols': [('Sepal_Width', 'Sepal_Width'), ('Sepal_Length', 'Sepal_Length')]
Expand All @@ -284,17 +308,10 @@
'FastTreesBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'FastTreesRegressor': {'cols': [('Score', 'Score')]},
'FastTreesTweedieRegressor': {'cols': [('Score', 'Score')]},
'FromKey': {'cols': [('Sepal_Length', 'Sepal_Length'), ('Label', 'Label')]},
'GlobalContrastRowScaler': {'cols': [
('normed_columns.Petal_Length', 'normed_columns.Petal_Length'),
('normed_columns.Sepal_Width', 'normed_columns.Sepal_Width'),
('normed_columns.Sepal_Length', 'normed_columns.Sepal_Length')
]},
'Handler': {'cols': [
('NewVals.NewVals', 'NewVals.NewVals'),
('NewVals.IsMissing.NewVals', 'NewVals.IsMissing.NewVals')
]},
'Indicator': {'cols': [('Has_Nan', 'Has_Nan')]},
'FromKey': {'num_cols': 6, 'cols': 0},
'GlobalContrastRowScaler': {'num_cols': 12, 'cols': 0},
'Handler': {'num_cols': 8, 'cols': 0},
'Indicator': {'num_cols': 7, 'cols': 0},
'KMeansPlusPlus': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LightGbmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LightGbmClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
Expand All @@ -303,38 +320,26 @@
'LinearSvmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LogisticRegressionBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LogisticRegressionClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LpScaler': {'cols': [
('normed_columns.Petal_Length', 'normed_columns.Petal_Length'),
('normed_columns.Sepal_Width', 'normed_columns.Sepal_Width'),
('normed_columns.Sepal_Length', 'normed_columns.Sepal_Length')
]},
'MeanVarianceScaler': {'cols': list(zip(
['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Setosa'],
['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Setosa']
))},
'MinMaxScaler': {'cols': list(zip(
['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Setosa'],
['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Setosa']
))},
#'MutualInformationSelector',
'LpScaler': {'num_cols': 10, 'cols': 0},
'MeanVarianceScaler': {'num_cols': 5, 'cols': 0},
'MinMaxScaler': {'num_cols': 5, 'cols': 0},
'MutualInformationSelector': {'num_cols': 8, 'cols': 0},
'NGramFeaturizer': {'num_cols': 273, 'cols': 0},
'NaiveBayesClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'OneHotVectorizer': {'cols': list(zip(
['education_str.0-5yrs', 'education_str.6-11yrs', 'education_str.12+ yrs'],
['education_str.0-5yrs', 'education_str.6-11yrs', 'education_str.12+ yrs']
))},
'OneHotVectorizer': {'num_cols': 12, 'cols': 0},
'OneVsRestClassifier(AveragedPerceptronBinaryClassifier)': \
{'cols': [('PredictedLabel', 'PredictedLabel')]},
'OneVsRestClassifier(LinearSvmBinaryClassifier)': \
{'cols': [('PredictedLabel', 'PredictedLabel')]},
'OnlineGradientDescentRegressor': {'cols': [('Score', 'Score')]},
'OrdinaryLeastSquaresRegressor': {'cols': [('Score', 'Score')]},
'PcaTransformer': {'num_cols': 9, 'cols': 0},
'PoissonRegressionRegressor': {'cols': [('Score', 'Score')]},
'PrefixColumnConcatenator': {'cols': [
('Features.Sepal_Length', 'Features.Sepal_Length'),
('Features.Sepal_Width', 'Features.Sepal_Width')
]},
'PrefixColumnConcatenator': {'num_cols': 8, 'cols': 0},
'SgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'SymSgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'ToKey': {'cols': [('edu_1', 'edu_1'), ('parity_1', 'parity_1')]},
'TypeConverter': {'cols': [('group', 'group')]},
'ToKey': {'num_cols': 12, 'cols': 0},
'TypeConverter': {'num_cols': 8, 'cols': 0},
'WordTokenizer': {'num_cols': 73, 'cols': 0}
}

Expand Down Expand Up @@ -571,6 +576,10 @@ def test_export_to_onnx(estimator, class_name):

manifest_diff = os.path.join(script_dir, '..', 'tools', 'manifest_diff.json')
entry_points = load_json(manifest_diff)['EntryPoints']
entry_points.extend([
{'NewName': 'OneVsRestClassifier(AveragedPerceptronBinaryClassifier)'},
{'NewName': 'OneVsRestClassifier(LinearSvmBinaryClassifier)'}
])
entry_points = sorted(entry_points, key=lambda ep: ep['NewName'])

exportable_estimators = set()
Expand All @@ -581,21 +590,21 @@ def test_export_to_onnx(estimator, class_name):
for entry_point in entry_points:
class_name = entry_point['NewName']

# if not class_name in ['CharTokenizer']:
# continue
# if not class_name in ['Handler']:
# continue

print('\n===========> %s' % class_name)

if class_name in SKIP:
print("skipped")
continue

mod = __import__('nimbusml.' + entry_point['Module'],
fromlist=[str(class_name)])

if class_name in INSTANCES:
estimator = INSTANCES[class_name]
else:
mod = __import__('nimbusml.' + entry_point['Module'],
fromlist=[str(class_name)])

the_class = getattr(mod, class_name)
estimator = the_class()

Expand Down