Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.

Upgrade CI build to use latest onnxruntime and automl scenario based … #463

Merged
merged 2 commits into from
Mar 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ if "%InstallPythonPackages%" == "True" (

call "%PythonExe%" -m pip install --upgrade "%__currentScriptDir%target\%WheelFile%"
call "%PythonExe%" -m pip install "scikit-learn==0.19.2"
call "%PythonExe%" -m pip install --upgrade onnxruntime
)

if "%RunTests%" == "False" (
Expand Down
1 change: 1 addition & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ then
fi
"${PythonExe}" -m pip install --upgrade "${Wheel}"
"${PythonExe}" -m pip install "scikit-learn==0.19.2"
"${PythonExe}" -m pip install --upgrade onnxruntime
fi

if [ ${__runTests} = true ]
Expand Down
2 changes: 1 addition & 1 deletion src/python/nimbusml.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@
<Compile Include="nimbusml\timeseries\timeseriesimputer.py" />
<Compile Include="nimbusml\timeseries\__init__.py" />
<Compile Include="tests_extended\data_frame_tool.py" />
<Compile Include="tests_extended\vinod.py" />
<Compile Include="tests_extended\test_automl_scenario.py" />
<Compile Include="tests_extended\test_export_to_onnx.py" />
<Compile Include="tests\test_estimator_checks.py" />
<Compile Include="nimbusml\tests\feature_extraction\text\test_lightlda.py" />
Expand Down
2 changes: 2 additions & 0 deletions src/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
'tests': [
'nose>=1.3', 'pytest>=4.4.0',
'graphviz', 'imageio',
'onnxruntime',
],
'dprep': ['azureml-dataprep>=1.1.33'],
'utils': ['graphviz', 'imageio'],
Expand All @@ -134,6 +135,7 @@
'nbconvert>=4.2.0',
'nose>=1.3',
'pytest>=4.4.0',
'onnxruntime',
],

python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <3.8.*',
Expand Down
2 changes: 2 additions & 0 deletions src/python/setup.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ setup(
'tests': [
'nose>=1.3', 'pytest>=4.4.0',
'graphviz', 'imageio',
'onnxruntime',
],
'dprep': ['azureml-dataprep>=1.1.33'],
'utils': ['graphviz', 'imageio'],
Expand All @@ -134,6 +135,7 @@ setup(
'nbconvert>=4.2.0',
'nose>=1.3',
'pytest>=4.4.0',
'onnxruntime'
],

python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <3.8.*',
Expand Down
88 changes: 88 additions & 0 deletions src/python/tests_extended/test_automl_scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------------------------

import os
import time
import tempfile
import unittest
import pandas as pd
from nimbusml import Pipeline, FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.feature_extraction.text import NGramFeaturizer
from nimbusml.linear_model import AveragedPerceptronBinaryClassifier
from nimbusml.multiclass import OneVsRestClassifier
from nimbusml.preprocessing import DatasetTransformer
from data_frame_tool import DataFrameTool as DFT


def get_tmp_file(suffix=None):
fd, file_name = tempfile.mkstemp(suffix=suffix)
fl = os.fdopen(fd, 'w')
fl.close()
return file_name

path = get_dataset("wiki_detox_train").as_filepath()
train_set = FileDataStream.read_csv(path, sep='\t')
path = get_dataset("wiki_detox_test").as_filepath()
test_set = FileDataStream.read_csv(path, sep='\t')

class TestOnnxRuntime(unittest.TestCase):
"""
Tests automl use case:
1. Fit featurization pipeline separately.
2. Fit learner on top of the featurization pipeline.
3. Export to ONNX the learner pipeline.
4. Compare results between ML.NET and ORT
"""

def test_automl_usecase(self):
# train featurization pipeline
featurization_pipe = Pipeline([NGramFeaturizer(keep_diacritics=True, columns={'Features': ['SentimentText']})])
featurization_pipe.fit(train_set)

# train learner pipeline
learner_pipe = Pipeline([DatasetTransformer(featurization_pipe.model),
OneVsRestClassifier(AveragedPerceptronBinaryClassifier(),
feature=['Features'], label='Sentiment')
])
learner_pipe.fit(train_set)

# Export the learner pipeline to ONNX
onnx_path = get_tmp_file('.onnx')
learner_pipe.export_to_onnx(onnx_path, 'com.microsoft.ml', onnx_version='Stable')

# Perform the transform using the standard ML.Net backend
start = time.time()
result_standard = learner_pipe.predict(test_set)
end = time.time()
print('%ss done transform using standard backend' % round(end - start, 3))

# Perform the transform using the ORT backend
df_tool = DFT(onnx_path)
dataset = test_set.to_df()
start = time.time()
result_ort = df_tool.execute(dataset, ['PredictedLabel.output', 'Score.output'])
end = time.time()
print('%ss done transform using ORT backend (excludes df load time)' % round(end - start, 3))

# compare the results
for col_tuple in (('PredictedLabel', 'PredictedLabel.output'),
('Score.0', 'Score.output.0'),
('Score.1', 'Score.output.1'),
):
col_expected = result_standard.loc[:, col_tuple[0]]
col_ort = result_ort.loc[:, col_tuple[1]]

check_kwargs = {
'check_names': False,
'check_exact': False,
'check_dtype': True,
'check_less_precise': True
}

pd.testing.assert_series_equal(col_expected, col_ort, **check_kwargs)

if __name__ == '__main__':
unittest.main()
80 changes: 0 additions & 80 deletions src/python/tests_extended/vinod.py

This file was deleted.