32
32
33
33
34
34
_TF_GS_URL = 'gs://cloud-datalab/deploy/tf/tensorflow-0.12.0rc1-cp27-none-linux_x86_64.whl'
35
-
35
+ # Keep in sync with "data_files" in package's setup.py
36
+ _SETUP_PY = '/datalab/packages_setup/inception/setup.py'
36
37
37
38
class Cloud (object ):
38
39
"""Class for cloud training, preprocessing and prediction."""
@@ -42,17 +43,31 @@ def __init__(self, checkpoint=None):
42
43
if self ._checkpoint is None :
43
44
self ._checkpoint = _util ._DEFAULT_CHECKPOINT_GSURL
44
45
46
+ def _repackage_to_staging (self , output_path ):
47
+ """Repackage inception from local installed location and copy it to GCS.
48
+ """
49
+
50
+ import datalab .mlalpha as mlalpha
51
+
52
+ # Find the package root. __file__ is under [package_root]/datalab_solutions/inception.
53
+ package_root = os .path .join (os .path .dirname (__file__ ), '../../' )
54
+ staging_package_url = os .path .join (output_path , 'staging' , 'inception.tar.gz' )
55
+ mlalpha .package_and_copy (package_root , _SETUP_PY , staging_package_url )
56
+ return staging_package_url
57
+
45
58
def preprocess (self , dataset , output_dir , pipeline_option = None ):
46
59
"""Cloud preprocessing with Cloud DataFlow."""
47
60
48
61
import datalab .mlalpha as mlalpha
62
+
49
63
job_name = 'preprocess-inception-' + datetime .datetime .now ().strftime ('%y%m%d-%H%M%S' )
64
+ staging_package_url = self ._repackage_to_staging (output_dir )
50
65
options = {
51
66
'staging_location' : os .path .join (output_dir , 'tmp' , 'staging' ),
52
67
'temp_location' : os .path .join (output_dir , 'tmp' ),
53
68
'job_name' : job_name ,
54
69
'project' : _util .default_project (),
55
- 'extra_packages' : [ml .sdk_location , _util . _PACKAGE_GS_URL , _TF_GS_URL ],
70
+ 'extra_packages' : [ml .sdk_location , staging_package_url , _TF_GS_URL ],
56
71
'teardown_policy' : 'TEARDOWN_ALWAYS' ,
57
72
'no_save_main_session' : True
58
73
}
@@ -64,7 +79,8 @@ def preprocess(self, dataset, output_dir, pipeline_option=None):
64
79
if type (dataset ) is mlalpha .CsvDataSet :
65
80
_preprocess .configure_pipeline_csv (p , self ._checkpoint , dataset .files , output_dir , job_name )
66
81
elif type (dataset ) is mlalpha .BigQueryDataSet :
67
- _preprocess .configure_pipeline_bigquery (p , self ._checkpoint , dataset .sql , output_dir , job_name )
82
+ _preprocess .configure_pipeline_bigquery (p , self ._checkpoint , dataset .sql ,
83
+ output_dir , job_name )
68
84
else :
69
85
raise ValueError ('preprocess takes CsvDataSet or BigQueryDataset only.' )
70
86
p .run ()
@@ -75,6 +91,8 @@ def train(self, input_dir, batch_size, max_steps, output_path,
75
91
"""Cloud training with CloudML trainer service."""
76
92
77
93
import datalab .mlalpha as mlalpha
94
+
95
+ staging_package_url = self ._repackage_to_staging (output_path )
78
96
job_args = {
79
97
'input_dir' : input_dir ,
80
98
'output_path' : output_path ,
@@ -83,7 +101,7 @@ def train(self, input_dir, batch_size, max_steps, output_path,
83
101
'checkpoint' : self ._checkpoint
84
102
}
85
103
job_request = {
86
- 'package_uris' : _util . _PACKAGE_GS_URL ,
104
+ 'package_uris' : staging_package_url ,
87
105
'python_module' : 'datalab_solutions.inception.task' ,
88
106
'scale_tier' : scale_tier ,
89
107
'region' : region ,
0 commit comments