Skip to content
This repository was archived by the owner on Sep 3, 2022. It is now read-only.

Commit f3eb838

Browse files
authored
Improve inception package so there is no need to have an GCS copy of the package. Instead cloud training and preprocessing will repackage it from local installation and upload it to staging. (#175)
1 parent fdb695a commit f3eb838

File tree

4 files changed

+25
-26
lines changed

4 files changed

+25
-26
lines changed

solutionbox/inception/datalab_solutions/inception/_cloud.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232

3333

3434
_TF_GS_URL= 'gs://cloud-datalab/deploy/tf/tensorflow-0.12.0rc1-cp27-none-linux_x86_64.whl'
35-
35+
# Keep in sync with "data_files" in package's setup.py
36+
_SETUP_PY = '/datalab/packages_setup/inception/setup.py'
3637

3738
class Cloud(object):
3839
"""Class for cloud training, preprocessing and prediction."""
@@ -42,17 +43,31 @@ def __init__(self, checkpoint=None):
4243
if self._checkpoint is None:
4344
self._checkpoint = _util._DEFAULT_CHECKPOINT_GSURL
4445

46+
def _repackage_to_staging(self, output_path):
47+
"""Repackage inception from local installed location and copy it to GCS.
48+
"""
49+
50+
import datalab.mlalpha as mlalpha
51+
52+
# Find the package root. __file__ is under [package_root]/datalab_solutions/inception.
53+
package_root = os.path.join(os.path.dirname(__file__), '../../')
54+
staging_package_url = os.path.join(output_path, 'staging', 'inception.tar.gz')
55+
mlalpha.package_and_copy(package_root, _SETUP_PY, staging_package_url)
56+
return staging_package_url
57+
4558
def preprocess(self, dataset, output_dir, pipeline_option=None):
4659
"""Cloud preprocessing with Cloud DataFlow."""
4760

4861
import datalab.mlalpha as mlalpha
62+
4963
job_name = 'preprocess-inception-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S')
64+
staging_package_url = self._repackage_to_staging(output_dir)
5065
options = {
5166
'staging_location': os.path.join(output_dir, 'tmp', 'staging'),
5267
'temp_location': os.path.join(output_dir, 'tmp'),
5368
'job_name': job_name,
5469
'project': _util.default_project(),
55-
'extra_packages': [ml.sdk_location, _util._PACKAGE_GS_URL, _TF_GS_URL],
70+
'extra_packages': [ml.sdk_location, staging_package_url, _TF_GS_URL],
5671
'teardown_policy': 'TEARDOWN_ALWAYS',
5772
'no_save_main_session': True
5873
}
@@ -64,7 +79,8 @@ def preprocess(self, dataset, output_dir, pipeline_option=None):
6479
if type(dataset) is mlalpha.CsvDataSet:
6580
_preprocess.configure_pipeline_csv(p, self._checkpoint, dataset.files, output_dir, job_name)
6681
elif type(dataset) is mlalpha.BigQueryDataSet:
67-
_preprocess.configure_pipeline_bigquery(p, self._checkpoint, dataset.sql, output_dir, job_name)
82+
_preprocess.configure_pipeline_bigquery(p, self._checkpoint, dataset.sql,
83+
output_dir, job_name)
6884
else:
6985
raise ValueError('preprocess takes CsvDataSet or BigQueryDataset only.')
7086
p.run()
@@ -75,6 +91,8 @@ def train(self, input_dir, batch_size, max_steps, output_path,
7591
"""Cloud training with CloudML trainer service."""
7692

7793
import datalab.mlalpha as mlalpha
94+
95+
staging_package_url = self._repackage_to_staging(output_path)
7896
job_args = {
7997
'input_dir': input_dir,
8098
'output_path': output_path,
@@ -83,7 +101,7 @@ def train(self, input_dir, batch_size, max_steps, output_path,
83101
'checkpoint': self._checkpoint
84102
}
85103
job_request = {
86-
'package_uris': _util._PACKAGE_GS_URL,
104+
'package_uris': staging_package_url,
87105
'python_module': 'datalab_solutions.inception.task',
88106
'scale_tier': scale_tier,
89107
'region': region,

solutionbox/inception/datalab_solutions/inception/_util.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from tensorflow.python.lib.io import file_io
2525

2626

27-
_PACKAGE_GS_URL = 'gs://cloud-datalab/packages/inception-0.1.tar.gz'
2827
_DEFAULT_CHECKPOINT_GSURL = 'gs://cloud-ml-data/img/flower_photos/inception_v3_2016_08_28.ckpt'
2928

3029

solutionbox/inception/release.sh

Lines changed: 0 additions & 21 deletions
This file was deleted.

solutionbox/inception/setup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
'datalab_solutions',
2626
'datalab_solutions.inception',
2727
],
28+
# setup.py needs to be deployed so it can be repackaged from local installation for cloud run.
29+
data_files=[('/datalab/packages_setup/inception', ['setup.py'])],
30+
2831
description='Google Cloud Datalab Inception Package',
2932
author='Google',
3033
author_email='google-cloud-datalab-feedback@googlegroups.com',

0 commit comments

Comments
 (0)