Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix distributed run on built dir #5810

Merged
merged 8 commits into from
Aug 9, 2021
16 changes: 8 additions & 8 deletions ci/test/distributed_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ async def create_remote_workspace_dir(
f"ssh {remote_host} mkdir -p {workspace_dir}/{path}"
)
await spawn_shell_and_check(
f"rsync -azP --omit-dir-times --no-perms --no-group --copy-links --exclude='__pycache__' {path} {remote_host}:{workspace_dir}/{path}"
f"rsync -azPq --omit-dir-times --no-perms --no-group --copy-links --exclude='__pycache__' {path} {remote_host}:{workspace_dir}/{path}"
)
print("create_remote_workspace_dir done")

Expand All @@ -123,7 +123,7 @@ async def launch_remote_container(
if oneflow_wheel_path:
pythonpath_args = ""
elif oneflow_python_path:
pythonpath_args = f"--env PYTHONPATH={workspace_dir}/oneflow_python"
pythonpath_args = f"--env PYTHONPATH={workspace_dir}/python"
else:
raise ValueError("must have oneflow_wheel_path or oneflow_python_path")
docker_cmd = f"""docker run --privileged -d --network host --shm-size=8g --rm -v {workspace_dir}:{workspace_dir} -w {workspace_dir} -v /dataset:/dataset -v /model_zoo:/model_zoo --name {container_name} {pythonpath_args} {img_tag} sleep {survival_time}
Expand Down Expand Up @@ -180,7 +180,7 @@ def wait_for_env_proto_and_launch_workers(
f.write(env_proto_txt.encode())
f.flush()
subprocess.check_call(
f"rsync -azP --omit-dir-times --no-perms --no-group {f.name} {remote_host}:{workspace_dir}/env.prototxt",
f"rsync -azPq --omit-dir-times --no-perms --no-group {f.name} {remote_host}:{workspace_dir}/env.prototxt",
shell=True,
)
run_docker_cmd = f"ssh {remote_host} docker exec {container_name}"
Expand Down Expand Up @@ -342,7 +342,7 @@ def handle_lib(lib):
await asyncio.gather(
*[
spawn_shell_and_check(
f"ssh {remote_host} 'mkdir -p {workspace_dir}/oneflow_python/oneflow/libs'",
f"ssh {remote_host} 'mkdir -p {workspace_dir}/python/oneflow/libs'",
)
for remote_host in remote_hosts
]
Expand All @@ -351,7 +351,7 @@ def handle_lib(lib):
async def copy_file(path=None, remote_host=None):
relpath = os.path.relpath(path, tmp_dir.name)
await spawn_shell_and_check(
f"scp {path} {remote_host}:{workspace_dir}/oneflow_python/oneflow/{relpath}",
f"scp {path} {remote_host}:{workspace_dir}/python/oneflow/{relpath}",
)

files = [
Expand Down Expand Up @@ -525,7 +525,7 @@ def get_remote_hosts(args):
asyncio.gather(
*[
spawn_shell_and_check(
f"rsync -azP --omit-dir-times --no-perms --no-group --copy-links --include='*.py' --exclude='*.so' --exclude='__pycache__' --exclude='oneflow/include' --include='*/' --exclude='*' {args.oneflow_python_path} {remote_host}:{workspace_dir}"
f"rsync -azPq --omit-dir-times --no-perms --no-group --copy-links --include='*.py' --exclude='*.so' --exclude='__pycache__' --exclude='oneflow/include' --include='*/' --exclude='*' {args.oneflow_python_path} {remote_host}:{workspace_dir}"
)
for remote_host in remote_hosts
]
Expand All @@ -544,7 +544,7 @@ def get_remote_hosts(args):
asyncio.gather(
*[
spawn_shell_and_check(
f"rsync -azP --omit-dir-times --no-perms --no-group {oneflow_wheel_path} {remote_host}:{workspace_dir}"
f"rsync -azPq --omit-dir-times --no-perms --no-group {oneflow_wheel_path} {remote_host}:{workspace_dir}"
)
for remote_host in remote_hosts
]
Expand Down Expand Up @@ -605,7 +605,7 @@ def exit_handler():
asyncio.gather(
*[
spawn_shell(
f"rsync -azP --omit-dir-times --no-perms --no-group --exclude='*.whl' --exclude='oneflow_python' {extra_exclude_args} {remote_host}:{workspace_dir}/ {args.oneflow_test_tmp_dir}/{remote_host}"
f"rsync -azPq --omit-dir-times --no-perms --no-group --exclude='*.whl' --exclude='python' {extra_exclude_args} {remote_host}:{workspace_dir}/ {args.oneflow_test_tmp_dir}/{remote_host}"
)
for remote_host in remote_hosts
]
Expand Down