23
23
Datalab will look for functions with the above names.
24
24
"""
25
25
26
+ import google .cloud .ml as ml
26
27
import logging
27
28
import os
28
29
import urllib
35
36
from . import _util
36
37
37
38
38
- def local_preprocess (input_csvs , labels_file , output_dir , checkpoint = None ):
39
+ def local_preprocess (input_csvs , output_dir , checkpoint = None ):
39
40
"""Preprocess data locally. Produce output that can be used by training efficiently.
40
41
Args:
41
42
input_csvs: A list of CSV files which include two columns only: image_gs_url, label.
42
43
Preprocessing will concatenate the data inside all files and split them into
43
44
train/eval dataset. Can be local or GCS path.
44
- labels_file: The path to the labels file which lists all labels, each in a separate line.
45
- It can be a local or a GCS path.
46
45
output_dir: The output directory to use. Preprocessing will create a sub directory under
47
46
it for each run, and also update "latest" file which points to the latest preprocessed
48
47
directory. Users are responsible for cleanup. Can be local or GCS path.
@@ -52,19 +51,17 @@ def local_preprocess(input_csvs, labels_file, output_dir, checkpoint=None):
52
51
print 'Local preprocessing...'
53
52
# TODO: Move this to a new process to avoid pickling issues
54
53
# TODO: Expose train/eval split ratio
55
- _local .Local (checkpoint ).preprocess (input_csvs , labels_file , output_dir )
54
+ _local .Local (checkpoint ).preprocess (input_csvs , output_dir )
56
55
print 'Done'
57
56
58
57
59
- def cloud_preprocess (input_csvs , labels_file , output_dir , checkpoint = None ,
60
- pipeline_option = None ):
58
+ def cloud_preprocess (input_csvs , output_dir , checkpoint = None , pipeline_option = None ):
61
59
"""Preprocess data in Cloud with DataFlow.
62
60
Produce output that can be used by training efficiently.
63
61
Args:
64
62
input_csvs: A list of CSV files which include two columns only: image_gs_url, label.
65
63
Preprocessing will concatenate the data inside all files and split them into
66
64
train/eval dataset. GCS paths only.
67
- labels_file: The GCS path to the labels file which lists all labels, each in a separate line.
68
65
output_dir: The output directory to use. Preprocessing will create a sub directory under
69
66
it for each run, and also update "latest" file which points to the latest preprocessed
70
67
directory. Users are responsible for cleanup. GCS path only.
@@ -74,8 +71,7 @@ def cloud_preprocess(input_csvs, labels_file, output_dir, checkpoint=None,
74
71
# TODO: Move this to a new process to avoid pickling issues
75
72
# TODO: Expose train/eval split ratio
76
73
# TODO: Consider exposing explicit train/eval datasets
77
- _cloud .Cloud (checkpoint = checkpoint ).preprocess (input_csvs , labels_file , output_dir ,
78
- pipeline_option )
74
+ _cloud .Cloud (checkpoint = checkpoint ).preprocess (input_csvs , output_dir , pipeline_option )
79
75
if (_util .is_in_IPython ()):
80
76
import IPython
81
77
@@ -87,11 +83,9 @@ def cloud_preprocess(input_csvs, labels_file, output_dir, checkpoint=None,
87
83
IPython .display .display_html (html , raw = True )
88
84
89
85
90
- def local_train (labels_file , input_dir , batch_size , max_steps , output_dir , checkpoint = None ):
86
+ def local_train (input_dir , batch_size , max_steps , output_dir , checkpoint = None ):
91
87
"""Train model locally. The output can be used for local prediction or for online deployment.
92
88
Args:
93
- labels_file: The path to the labels file which lists all labels, each in a separate line.
94
- It can be a local or a GCS path.
95
89
input_dir: A directory path containing preprocessed results. Can be local or GCS path.
96
90
batch_size: size of batch used for training.
97
91
max_steps: number of steps to train.
@@ -104,27 +98,25 @@ def local_train(labels_file, input_dir, batch_size, max_steps, output_dir, check
104
98
logger .setLevel (logging .INFO )
105
99
print 'Local training...'
106
100
try :
107
- _local .Local (checkpoint ).train (labels_file , input_dir , batch_size , max_steps , output_dir )
101
+ _local .Local (checkpoint ).train (input_dir , batch_size , max_steps , output_dir )
108
102
finally :
109
103
logger .setLevel (original_level )
110
104
print 'Done'
111
105
112
106
113
- def cloud_train (labels_file , input_dir , batch_size , max_steps , output_dir ,
107
+ def cloud_train (input_dir , batch_size , max_steps , output_dir ,
114
108
region , scale_tier = 'BASIC' , checkpoint = None ):
115
109
"""Train model in the cloud with CloudML trainer service.
116
110
The output can be used for local prediction or for online deployment.
117
111
Args:
118
- labels_file: The path to the labels file which lists all labels, each in a separate line.
119
- GCS path only.
120
112
input_dir: A directory path containing preprocessed results. GCS path only.
121
113
batch_size: size of batch used for training.
122
114
max_steps: number of steps to train.
123
115
output_dir: The output directory to use. GCS path only.
124
116
checkpoint: the Inception checkpoint to use.
125
117
"""
126
118
127
- job_info = _cloud .Cloud (checkpoint = checkpoint ).train (labels_file , input_dir , batch_size ,
119
+ job_info = _cloud .Cloud (checkpoint = checkpoint ).train (input_dir , batch_size ,
128
120
max_steps , output_dir , region , scale_tier )
129
121
if (_util .is_in_IPython ()):
130
122
import IPython
@@ -146,58 +138,55 @@ def _display_predict_results(results, show_image):
146
138
if show_image is True :
147
139
IPython .display .display_html ('<p style="font-size:28px">%s(%.5f)</p>' % label_and_score ,
148
140
raw = True )
149
- IPython .display .display (IPython .display .Image (filename = image_file ))
141
+ with ml .util ._file .open_local_or_gcs (image_file , mode = 'r' ) as f :
142
+ IPython .display .display (IPython .display .Image (data = f .read ()))
150
143
else :
151
144
IPython .display .display_html (
152
145
'<p>%s  %s(%.5f)</p>' % ((image_file ,) + label_and_score ), raw = True )
153
146
else :
154
147
print results
155
148
156
149
157
- def local_predict (model_dir , image_files , labels_file , show_image = True ):
150
+ def local_predict (model_dir , image_files , show_image = True ):
158
151
"""Predict using an offline model.
159
152
Args:
160
153
model_dir: The directory of a trained inception model. Can be local or GCS paths.
161
154
image_files: The paths to the image files to predict labels. Can be local or GCS paths.
162
- labels_file: The path to the labels file which lists all labels, each in a separate line.
163
- Can be local or GCS paths.
164
155
show_image: Whether to show images in the results.
165
156
"""
166
-
167
- labels_and_scores = _local .Local ().predict (model_dir , image_files , labels_file )
157
+ print ( 'Predicting...' )
158
+ labels_and_scores = _local .Local ().predict (model_dir , image_files )
168
159
results = zip (image_files , labels_and_scores )
169
160
_display_predict_results (results , show_image )
161
+ print ('Done' )
170
162
171
163
172
- def cloud_predict (model_id , image_files , labels_file , show_image = True ):
164
+ def cloud_predict (model_id , image_files , show_image = True ):
173
165
"""Predict using a deployed (online) model.
174
166
Args:
175
167
model_id: The deployed model id in the form of "model.version".
176
168
image_files: The paths to the image files to predict labels. GCS paths only.
177
- labels_file: The path to the labels file which lists all labels, each in a separate line.
178
- GCS paths only.
179
169
show_image: Whether to show images in the results.
180
170
"""
181
-
182
- labels_and_scores = _cloud .Cloud ().predict (model_id , image_files , labels_file )
171
+ print ( 'Predicting...' )
172
+ labels_and_scores = _cloud .Cloud ().predict (model_id , image_files )
183
173
results = zip (image_files , labels_and_scores )
184
174
_display_predict_results (results , show_image )
185
175
186
176
187
- def local_batch_predict (model_dir , input_csv , labels_file , output_file , output_bq_table = None ):
177
+ def local_batch_predict (model_dir , input_csv , output_file , output_bq_table = None ):
188
178
"""Batch predict using an offline model.
189
179
Args:
190
180
model_dir: The directory of a trained inception model. Can be local or GCS paths.
191
181
input_csv: The input csv which include two columns only: image_gs_url, label.
192
182
Can be local or GCS paths.
193
- labels_file: The path to the labels file which lists all labels, each in a separate line.
194
- Can be local or GCS paths.
195
183
output_file: The output csv file containing prediction results.
196
184
output_bq_table: If provided, will also save the results to BigQuery table.
197
185
"""
198
- _local .Local ().batch_predict (model_dir , input_csv , labels_file , output_file , output_bq_table )
199
-
186
+ print ('Predicting...' )
187
+ _local .Local ().batch_predict (model_dir , input_csv , output_file , output_bq_table )
188
+ print ('Done' )
200
189
201
- def cloud_batch_predict (model_dir , image_files , labels_file , show_image = True , output_file = None ):
190
+ def cloud_batch_predict (model_dir , image_files , show_image = True , output_file = None ):
202
191
"""Not Implemented Yet"""
203
192
pass
0 commit comments