Skip to content
This repository was archived by the owner on Sep 3, 2022. It is now read-only.

Commit ecc57ab

Browse files
committed
A util function to repackage and copy the package to staging location. (#169)
* A util function to repackage and copy the package to staging location, so in packages we can use the staging URL as package URL in cloud training. * Follow up CR comments. * Follow up CR comments.
1 parent 6b61f15 commit ecc57ab

File tree

2 files changed

+65
-0
lines changed

2 files changed

+65
-0
lines changed

datalab/mlalpha/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from ._analysis import csv_to_dataframe
3030
from ._package_runner import PackageRunner
3131
from ._feature_slice_view import FeatureSliceView
32+
from ._util import *
33+
3234

3335
from plotly.offline import init_notebook_mode
3436

datalab/mlalpha/_util.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright 2017 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import os
17+
import shutil
18+
import subprocess
19+
import tempfile
20+
21+
22+
def package_and_copy(package_root_dir, setup_py, output_tar_path):
23+
"""Repackage an CloudML package and copy it to a staging dir.
24+
25+
Args:
26+
package_root_dir: the root dir to install package from. Usually you can get the path
27+
from inside your module using a relative path to __file__.
28+
setup_py: the path to setup.py.
29+
output_tar_path: the GCS path of the output tarball package.
30+
Raises:
31+
ValueError if output_tar_path is not a GCS path, or setup_py does not exist.
32+
"""
33+
if not output_tar_path.startswith('gs://'):
34+
raise ValueError('output_tar_path needs to be a GCS path.')
35+
if not os.path.isfile(setup_py):
36+
raise ValueError('Supplied file "%s" does not exist.' % setup_py)
37+
38+
dest_setup_py = os.path.join(package_root_dir, 'setup.py')
39+
# setuptools requires a "setup.py" in the current dir, so copy setup.py there.
40+
# Also check if there is an existing setup.py. If so, back it up.
41+
if os.path.isfile(dest_setup_py):
42+
os.rename(dest_setup_py, dest_setup_py + '._bak_')
43+
shutil.copyfile(setup_py, dest_setup_py)
44+
45+
tempdir = tempfile.mkdtemp()
46+
previous_cwd = os.getcwd()
47+
os.chdir(package_root_dir)
48+
try:
49+
# Repackage.
50+
sdist = ['python', dest_setup_py, 'sdist', '--format=gztar', '-d', tempdir]
51+
subprocess.check_call(sdist)
52+
53+
# Copy to GCS.
54+
source = os.path.join(tempdir, '*.tar.gz')
55+
gscopy = ['gsutil', 'cp', source, output_tar_path]
56+
subprocess.check_call(gscopy)
57+
return
58+
finally:
59+
os.chdir(previous_cwd)
60+
os.remove(dest_setup_py)
61+
if os.path.isfile(dest_setup_py + '._bak_'):
62+
os.rename(dest_setup_py + '._bak_', dest_setup_py)
63+
shutil.rmtree(tempdir)

0 commit comments

Comments
 (0)