Skip to content
This repository was archived by the owner on Nov 5, 2022. It is now read-only.

Commit 17f3512

Browse files
authored
Fixed tests and added CI (#5)
* add dockerfile for testing * add cloud build and travis CI * fixed tests * add python version badge
1 parent 52d8abf commit 17f3512

File tree

8 files changed

+212
-66
lines changed

8 files changed

+212
-66
lines changed

.travis.yml

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
language: python
2+
python:
3+
- "3.6"
4+
- "3.7"
5+
# Tensorflow 1.x does not support python 3.8+
6+
install:
7+
- pip install -e .
8+
script:
9+
- python -m unittest

Dockerfile

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
FROM python:3.6
16+
COPY . /app
17+
WORKDIR /app
18+
RUN pip install -e ".[dev]"
19+
RUN python -m unittest

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# ML Pipeline Generator
2+
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ml-pipeline-gen)
23
[![PyPI version](https://badge.fury.io/py/ml-pipeline-gen.svg)](https://badge.fury.io/py/ml-pipeline-gen)
4+
[![Build
5+
Status](https://travis-ci.com/GoogleCloudPlatform/ml-pipeline-generator-python.svg?branch=master)](https://travis-ci.com/GoogleCloudPlatform/ml-pipeline-generator-python)
36

47
ML Pipeline Generator is a tool for generating end-to-end pipelines composed of GCP components so that users can easily migrate their local ML models onto GCP and start realizing the benefits of the Cloud quickly.
58

@@ -131,5 +134,5 @@ as inputs in the config file.
131134
To modify the behavior of the library, install `ml-pipeline-gen` using:
132135

133136
```bash
134-
pip install -e .
137+
pip install -e ".[dev]"
135138
```

setup.py

+31-25
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
# Copyright 2020 Google Inc. All Rights Reserved.
22
#
3-
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
44
# you may not use this file except in compliance with the License.
55
# You may obtain a copy of the License at
66
#
77
# http://www.apache.org/licenses/LICENSE-2.0
88
#
99
# Unless required by applicable law or agreed to in writing, software
10-
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
@@ -16,37 +16,43 @@
1616
import setuptools
1717
import ml_pipeline_gen
1818

19-
with open("README.md", "r") as f:
19+
with open('README.md', 'r') as f:
2020
long_description = f.read()
2121

2222
setuptools.setup(
23-
name="ml-pipeline-gen",
23+
name='ml-pipeline-gen',
2424
version=ml_pipeline_gen.__version__,
25-
author="Michael Hu",
26-
author_email="author@example.com",
27-
description="A tool for generating end-to-end pipelines on GCP.",
25+
author='Michael Hu',
26+
author_email='author@example.com',
27+
description='A tool for generating end-to-end pipelines on GCP.',
2828
long_description=long_description,
29-
long_description_content_type="text/markdown",
30-
url="https://github.com/GoogleCloudPlatform/ml-pipeline-generator-python",
31-
packages=["ml_pipeline_gen"],
29+
long_description_content_type='text/markdown',
30+
url='https://github.com/GoogleCloudPlatform/ml-pipeline-generator-python',
31+
packages=['ml_pipeline_gen'],
3232
install_requires=[
33-
"cloudml-hypertune",
34-
"gcsfs",
35-
"google-api-python-client",
36-
"jinja2",
37-
"joblib",
38-
"kfp",
39-
"pandas",
40-
"pyyaml",
41-
"scikit-learn",
42-
"tensorflow>=0.14.0,<2.0.0",
43-
"xgboost",
33+
'cloudml-hypertune',
34+
'gcsfs',
35+
'google-api-python-client',
36+
'jinja2',
37+
'joblib',
38+
'kfp',
39+
'pandas',
40+
'pyyaml',
41+
'scikit-learn',
42+
'tensorflow>=1.14.0,<2.0.0',
43+
'xgboost',
4444
],
45+
extras_require={
46+
'dev': [
47+
'mock',
48+
]
49+
},
4550
classifiers=[
46-
"Programming Language :: Python :: 3",
47-
"License :: OSI Approved :: Apache Software License",
48-
"Operating System :: OS Independent",
51+
'Programming Language :: Python :: 3.6',
52+
'Programming Language :: Python :: 3.7',
53+
'License :: OSI Approved :: Apache Software License',
54+
'Operating System :: OS Independent',
4955
],
50-
python_requires=">=3.6",
56+
python_requires='>=3.6',
5157
include_package_data=True,
5258
)

tests/examples/sklearn/test_sklearn_model.py

+44-7
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,66 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
"""Unit tests demo scikit-learn model."""
15+
import argparse
16+
import os
17+
import shutil
18+
import sys
19+
import tempfile
1520
import unittest
1621

17-
from examples.sklearn import sklearn_model
22+
from tests import test_utils
1823

1924

2025
class TestModel(unittest.TestCase):
2126
"""Tests demo model."""
2227

28+
@classmethod
29+
def setUpClass(cls):
30+
super(TestModel, cls).setUpClass()
31+
cls.test_dir = tempfile.mkdtemp()
32+
cls.demo_dir = os.path.join(cls.test_dir, 'demo')
33+
shutil.copytree('examples/sklearn', cls.demo_dir)
34+
35+
# TODO(humichael) We can't import the model using __import__ because
36+
# several other examples are also adding their demo dirs to sys.path.
37+
# It's very likely the model module that is imported is not the one from
38+
# this test. All examples currently use the same census_preprocess.
39+
# These tests will break if any example uses a different preprocessing
40+
# script.
41+
# We should just mock this.
42+
sys.path.append(cls.demo_dir)
43+
sklearn_model = test_utils.load_module(
44+
'sklearn_model', os.path.join(
45+
cls.demo_dir, 'model', 'sklearn_model.py'))
46+
sklearn_preprocess = test_utils.load_module(
47+
'sklearn_preprocess', os.path.join(
48+
cls.demo_dir, 'model', 'census_preprocess.py'))
49+
sys.path.remove(cls.demo_dir)
50+
params = argparse.Namespace(C=1.0)
51+
cls.model = sklearn_model.get_model(params)
52+
cls.features, cls.labels, _, _ = sklearn_preprocess.load_data()
53+
54+
@classmethod
55+
def tearDownClass(cls):
56+
super(TestModel, cls).tearDownClass()
57+
shutil.rmtree(cls.test_dir)
58+
2359
def setUp(self):
2460
super(TestModel, self).setUp()
25-
self.features, self.labels = sklearn_model.get_data()
61+
self.model = self.__class__.model
62+
self.features = self.__class__.features
63+
self.labels = self.__class__.labels
2664

2765
def test_get_data(self):
2866
"""Checks that there is a label for each feature."""
2967
self.assertEqual(self.features.shape[0], self.labels.shape[0])
3068

3169
def test_get_model(self):
3270
"""Checks that the model can be trained and used for predictions."""
33-
model = sklearn_model.get_model()
34-
model.fit(self.features, self.labels)
35-
preds = model.predict(self.features)
36-
self.assertEqual(preds.shape, self.labels.shape)
71+
self.model.fit(self.features, self.labels)
72+
preds = self.model.predict(self.features)
73+
self.assertEqual(preds.shape[0], self.labels.shape[0])
3774

3875

39-
if __name__ == "__main__":
76+
if __name__ == '__main__':
4077
unittest.main()

tests/examples/tensorflow/test_tf_model.py

+51-5
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,75 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
"""Unit tests demo TF model."""
15+
import argparse
16+
import os
17+
import shutil
18+
import sys
19+
import tempfile
1520
import unittest
1621

17-
from examples.tf import tf_model
22+
import tensorflow.compat.v1 as tf
1823

24+
from tests import test_utils
1925

20-
class TestModel(unittest.TestCase):
26+
27+
class TestModel(tf.test.TestCase):
2128
"""Tests TF demo model."""
2229

30+
@classmethod
31+
def setUpClass(cls):
32+
super(TestModel, cls).setUpClass()
33+
cls.test_dir = tempfile.mkdtemp()
34+
cls.demo_dir = os.path.join(cls.test_dir, 'demo')
35+
shutil.copytree('examples/tf', cls.demo_dir)
36+
37+
# TODO(humichael) We can't import the model using __import__ because
38+
# several other examples are also adding their demo dirs to sys.path.
39+
# It's very likely the model module that is imported is not the one from
40+
# this test. All examples currently use the same census_preprocess.
41+
# These tests will break if any example uses a different preprocessing
42+
# script.
43+
# We should just mock this.
44+
sys.path.append(cls.demo_dir)
45+
tf_model = test_utils.load_module(
46+
'tf_model', os.path.join(cls.demo_dir, 'model', 'tf_model.py'))
47+
tf_preprocess = test_utils.load_module(
48+
'tf_preprocess', os.path.join(
49+
cls.demo_dir, 'model', 'census_preprocess.py'))
50+
sys.path.remove(cls.demo_dir)
51+
52+
cls.features, cls.labels, _, _ = tf_preprocess.load_data()
53+
cls.model = tf_model
54+
55+
@classmethod
56+
def tearDownClass(cls):
57+
super(TestModel, cls).tearDownClass()
58+
shutil.rmtree(cls.test_dir)
59+
60+
# pylint: disable=g-import-not-at-top
2361
def setUp(self):
2462
super(TestModel, self).setUp()
25-
self.features, self.labels = tf_model.get_data()
63+
self.model = self.__class__.model
64+
self.features = self.__class__.features
65+
self.labels = self.__class__.labels
2666

2767
def test_get_data(self):
2868
"""Checks that there is a label for each feature."""
2969
self.assertEqual(self.features.shape[0], self.labels.shape[0])
3070

3171
def test_get_model(self):
3272
"""Checks that the model can be trained and used for predictions."""
33-
model = tf_model.get_model()
73+
input_layer = tf.keras.layers.Input(shape=(self.features.shape[1],))
74+
params = argparse.Namespace(first_layer_size=50, num_layers=5)
75+
predictions = self.model.get_model(input_layer, params)
76+
77+
model = tf.keras.models.Model(inputs=input_layer, outputs=predictions)
78+
model.compile(optimizer='adam', loss=tf.losses.sigmoid_cross_entropy,
79+
metrics=['accuracy'])
3480
model.fit(self.features, self.labels)
3581
preds = model.predict(self.features)
3682
self.assertEqual(preds.shape[0], self.labels.shape[0])
3783

3884

39-
if __name__ == "__main__":
85+
if __name__ == '__main__':
4086
unittest.main()

tests/ml_pipeline_gen/test_models.py

+42-28
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,14 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
"""Unit tests for models classes."""
16+
import mock
1617
import os
18+
import shutil
19+
import tempfile
1720
import unittest
1821

22+
from googleapiclient import discovery
23+
1924
from ml_pipeline_gen.models import BaseModel
2025
from ml_pipeline_gen.models import SklearnModel
2126

@@ -33,45 +38,54 @@ class TestSklearnModel(unittest.TestCase):
3338
"""Tests SklearnModel class."""
3439

3540
@classmethod
36-
def setUpClass(cls):
37-
"""Instantiates a model."""
41+
@mock.patch.object(discovery, 'build')
42+
def setUpClass(cls, build_mock):
43+
"""Copies a demo and instantiates a model."""
3844
super(TestSklearnModel, cls).setUpClass()
39-
cls.config = "examples/sklearn/config.yaml"
45+
build_mock.return_value = None
46+
cls.cwd = os.getcwd()
47+
cls.test_dir = tempfile.mkdtemp()
48+
cls.demo_dir = os.path.join(cls.test_dir, 'demo')
49+
shutil.copytree('examples/sklearn', cls.demo_dir)
50+
51+
os.chdir(cls.demo_dir)
52+
cls.config = 'config.yaml.example'
4053
cls.model = SklearnModel(cls.config)
4154

4255
@classmethod
4356
def tearDownClass(cls):
44-
"""Cleans up generated directories."""
57+
"""Switch back to the original working dir and removes the demo."""
4558
super(TestSklearnModel, cls).tearDownClass()
46-
cls.model.clean_up()
59+
os.chdir(cls.cwd)
60+
shutil.rmtree(cls.test_dir)
4761

48-
# TODO(humichael): technically private functions don't need to be tested. It
49-
# should reflect in public functions.
50-
def test_set_config(self):
51-
"""Ensures instance variables are created."""
52-
model = self.__class__.model
53-
model.model = {}
62+
def setUp(self):
63+
super(TestSklearnModel, self).setUp()
64+
self.model = self.__class__.model
5465

55-
model._set_config(self.__class__.config)
56-
self.assertEqual(model.model["name"], "sklearn_demo_model")
66+
def tearDown(self):
67+
super(TestSklearnModel, self).tearDown()
68+
try:
69+
self.__class__.model.clean_up()
70+
except FileNotFoundError:
71+
pass
5772

58-
def test_populate_trainer(self):
73+
def test_generate_files(self):
5974
"""Ensures task.py and model.py are created."""
60-
model = self.__class__.model
61-
model.clean_up()
62-
63-
model._populate_trainer()
64-
trainer_files = os.listdir("trainer")
65-
self.assertIn("task.py", trainer_files)
66-
self.assertIn("model.py", trainer_files)
67-
68-
@unittest.skip("How to test without running training?")
75+
self.assertFalse(os.path.exists('trainer'))
76+
self.model.generate_files()
77+
self.assertTrue(os.path.exists('trainer'))
78+
trainer_files = os.listdir('trainer')
79+
self.assertIn('task.py', trainer_files)
80+
self.assertIn('model.py', trainer_files)
81+
82+
@unittest.skip('How to test without running training?')
6983
def test_local_train(self):
7084
"""Tests local training."""
71-
model = self.__class__.model
72-
model.train()
73-
model_files = os.listdir("models")
74-
self.assertIn("{}.joblib".format(model.model["name"]), model_files)
85+
self.model.generate_files()
86+
self.model.train()
87+
model_files = os.listdir('models')
88+
self.assertIn('{}.joblib'.format(self.model.model['name']), model_files)
7589

7690
# TODO(humichael): Need to spoof CAIP calls to test this.
7791
def test_cloud_train(self):
@@ -84,5 +98,5 @@ def test_serve(self):
8498
pass
8599

86100

87-
if __name__ == "__main__":
101+
if __name__ == '__main__':
88102
unittest.main()

0 commit comments

Comments
 (0)