Skip to content

Commit

Permalink
chore(client): ignore conda env by default when building model (#2232)
Browse files Browse the repository at this point in the history
  • Loading branch information
jialeicui authored May 15, 2023
1 parent 2e8136a commit bf3565c
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 37 deletions.
29 changes: 13 additions & 16 deletions client/starwhale/base/blob/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from abc import ABC
from pathlib import Path

from starwhale.utils.venv import check_valid_venv_prefix, check_valid_conda_prefix
from starwhale.utils.config import SWCliConfigMixed
from starwhale.base.blob.file import PathLike, BlakeFile, OptionalPathLike

Expand Down Expand Up @@ -90,39 +91,35 @@ def _is_file_under_dir(file: Path, dir: Path) -> bool:
return file.is_relative_to(dir)

@staticmethod
def _check_if_is_venv(path: PathLike) -> bool:
def _check_if_is_py_env(path: PathLike) -> bool:
"""
Check if the path is a virtual environment.
Args:
path: the path to be checked
Returns: True if the path is a virtual environment
Returns: True if the path is a virtual environment or conda environment
"""
return (
Path(path).is_dir()
and Path(path).joinpath("bin/activate").exists()
and Path(path).joinpath("bin/python").exists()
)
return check_valid_venv_prefix(path) or check_valid_conda_prefix(path)

@classmethod
def _search_all_venvs(cls, path: PathLike) -> t.List[Path]:
def _search_all_py_envs(cls, path: PathLike) -> t.List[Path]:
"""
Search all virtual environments under the path.
Args:
path: the path to be searched
Returns: a list of virtual environments
Returns: a list of virtual environments or conda environments
"""
venvs = []
envs = []
for root, dirs, files in os.walk(path):
if cls._check_if_is_venv(root):
venvs.append(Path(root))
return venvs
if cls._check_if_is_py_env(root):
envs.append(Path(root))
return envs

def copy_dir(
self,
src_dir: PathLike,
dst_dir: PathLike,
excludes: t.List[str],
ignore_venv: bool = True,
ignore_venv_or_conda: bool = True,
) -> t.Tuple[int, t.List[Path]]:
"""
Copy a directory from src_dir to the dst_dir using the object store cache.
Expand All @@ -131,12 +128,12 @@ def copy_dir(
src_dir: the source directory
dst_dir: the destination directory
excludes: the files to be excluded
ignore_venv: whether to ignore virtual environments
ignore_venv_or_conda: whether to ignore virtual environments or conda environments
Returns: a tuple of (total file size in bytes, ignored directories or files)
"""
src_dir = Path(src_dir)
dst_dir = Path(dst_dir)
ignore_dirs = self._search_all_venvs(src_dir) if ignore_venv else []
ignore_dirs = self._search_all_py_envs(src_dir) if ignore_venv_or_conda else []

size = 0 # no record for soft link (inode not working in windows)
ignored = ignore_dirs
Expand Down
4 changes: 2 additions & 2 deletions client/starwhale/core/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def _copy_src(
Args:
workdir: source code dir
model_config: model config
add_all: copy all files, include python cache files(defined in BuiltinPyExcludes) and venv files
add_all: copy all files, include python cache files(defined in BuiltinPyExcludes) and venv or conda files
Returns: None
"""
console.print(
Expand All @@ -791,7 +791,7 @@ def _copy_src(
src_dir=workdir.resolve(),
dst_dir=self.store.src_dir.resolve(),
excludes=excludes,
ignore_venv=not add_all,
ignore_venv_or_conda=not add_all,
)
for i in ignored:
console.info(f"ignored : {str(i)}")
Expand Down
6 changes: 4 additions & 2 deletions client/starwhale/utils/venv.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,11 +744,13 @@ def get_user_runtime_python_bin(py_env: str) -> str:


def check_valid_venv_prefix(prefix: t.Union[str, Path]) -> bool:
return (Path(prefix) / "pyvenv.cfg").exists()
expect = Path(prefix) / "pyvenv.cfg"
return expect.exists() and expect.is_file()


def check_valid_conda_prefix(prefix: t.Union[str, Path]) -> bool:
return (Path(prefix) / "conda-meta").exists()
expect = Path(prefix) / "conda-meta"
return expect.exists() and expect.is_dir()


def guess_python_env_mode(prefix: t.Union[str, Path]) -> str:
Expand Down
50 changes: 34 additions & 16 deletions client/tests/base/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,32 +43,50 @@ def test_local_file_object_store(
workdir = tmp_path / "workdir"
venv = workdir / "venv"
# simulate a venv in the working dir
(venv / "bin").mkdir(parents=True)
(venv / "bin" / "python").touch()
(venv / "bin" / "activate").touch()
venv.mkdir(parents=True, exist_ok=True)
(venv / "pyvenv.cfg").touch()
# simulate a conda env in the working dir
conda = workdir / "conda"
conda.mkdir(parents=True, exist_ok=True)
(conda / "conda-meta").mkdir(parents=True, exist_ok=True)
# make a fake file in the conda meta dir, make sure the copy touches the dir
(conda / "conda-meta" / "fake").touch()

# user script
(workdir / "main.py").touch()

venv_files = {
dst / "venv" / "bin",
dst / "venv" / "bin" / "python",
dst / "venv" / "bin" / "activate",
env_files = {
dst / "venv",
dst / "venv" / "pyvenv.cfg",
dst / "conda",
dst / "conda" / "conda-meta",
dst / "conda" / "conda-meta" / "fake",
}

shutil.rmtree(dst)
# do not ignore venv
store.copy_dir(workdir, dst, [], ignore_venv=False)
# do not ignore env paths
sz, ignores = store.copy_dir(workdir, dst, [], ignore_venv_or_conda=False)
assert (dst / "main.py").exists()
assert all(f.exists() for f in venv_files)
assert all(f.exists() for f in env_files)
assert sz == (dst / "main.py").stat().st_size + sum(
f.stat().st_size for f in [venv / "pyvenv.cfg", conda / "conda-meta" / "fake"]
)
assert ignores == []

shutil.rmtree(dst)
# ignore venv
store.copy_dir(workdir, dst, [], ignore_venv=True)
# ignore env paths
sz, ignores = store.copy_dir(workdir, dst, [], ignore_venv_or_conda=True)
assert (dst / "main.py").exists()
assert not any(f.exists() for f in venv_files)
assert not any(f.exists() for f in env_files)
assert sz == (dst / "main.py").stat().st_size
assert set(ignores) == {venv, conda}

shutil.rmtree(dst)
# ignore venv with exclude
store.copy_dir(workdir, dst, ["venv/*"], ignore_venv=False)
# ignore env paths with exclude
sz, ignores = store.copy_dir(
workdir, dst, ["venv/*", "conda/*"], ignore_venv_or_conda=False
)
assert (dst / "main.py").exists()
assert not any(f.exists() for f in venv_files)
assert not any(f.exists() for f in env_files)
assert sz == (dst / "main.py").stat().st_size
assert set(ignores) == {venv / "pyvenv.cfg", conda / "conda-meta" / "fake"}
2 changes: 1 addition & 1 deletion client/tests/core/test_runtime_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_run_with_model_uri(

m_restore.reset_mock()
m_extract.reset_mock()
ensure_file(conda_dir / "conda-meta", "")
ensure_file(conda_dir / "conda-meta" / "fake", "", parents=True)

with patch.object(sys, "argv", run_argv):
p = Process(uri, force_restore=False)
Expand Down

0 comments on commit bf3565c

Please sign in to comment.