Skip to content

Commit

Permalink
enhance(client): support excludes for model build and finetune decora…
Browse files Browse the repository at this point in the history
…tor (#3072)
  • Loading branch information
tianweidut authored Dec 12, 2023
1 parent 381a2e1 commit bca9510
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 10 deletions.
3 changes: 3 additions & 0 deletions client/starwhale/api/_impl/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def finetune(*args: t.Any, **kw: t.Any) -> t.Any:
model_modules: [List[str|object], optional] The search models for model building. Default is None.
The search modules supports object(function, class or module) or str(example: "to.path.module", "to.path.module:object").
If the argument is not specified, the search modules are the imported modules.
excludes: [List[str], optional] The excludes files or dirs in the workdir. The excludes files or dirs will be ignored when building the Starwhale Model Package.
Examples:
```python
Expand All @@ -59,6 +60,7 @@ def ft(train_datasets):
require_validation_datasets = kw.get("require_validation_datasets", False)
auto_build_model = kw.get("auto_build_model", True)
model_modules = kw.get("model_modules")
model_build_excludes = kw.get("excludes")
workdir = Path.cwd()

def _register_wrapper(func: t.Callable) -> t.Any:
Expand Down Expand Up @@ -104,6 +106,7 @@ def _run_wrapper(*args: t.Any, **kw: t.Any) -> t.Any:
modules=model_modules,
workdir=workdir,
tags=[tag],
excludes=model_build_excludes,
)

return ret
Expand Down
4 changes: 4 additions & 0 deletions client/starwhale/api/_impl/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def build(
remote_project_uri: t.Optional[str] = None,
add_all: bool = False,
tags: t.List[str] | None = None,
excludes: t.List[str] | None = None,
) -> None:
"""Build Starwhale Model Package.
Expand All @@ -46,6 +47,8 @@ def build(
add_all: (bool, optional) Add all files in the workdir to the Starwhale Model Package. If the argument is False, the python cache files and virtualenv files will be ignored.
the ".swignore" file in the workdir will always take effect.
tags: (list(str), optional) The tags for the model version. `latest` and `^v\d+$` tags are reserved tags.
excludes: (list(str), optional) The excludes files or dirs in the workdir. The excludes files or dirs will be ignored when building the Starwhale Model Package.
The `.swignore` file in the workdir will always take effect.
Examples:
```python
Expand Down Expand Up @@ -120,6 +123,7 @@ def build(
),
add_all=add_all,
tags=tags,
excludes=excludes,
)

if remote_project_uri:
Expand Down
17 changes: 15 additions & 2 deletions client/starwhale/core/model/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,17 @@ def model_cmd(ctx: click.Context) -> None:
"--add-all",
is_flag=True,
default=False,
help="Add all files in the working directory to the model package"
"(excludes python cache files and virtual environment files when disabled)."
help="Add all files in the working directory to the model package."
"By default, python cache files and virtual environment files are ignored automatically."
"When the option is enabled, the auto ignored files will be added to the model package."
"The '.swignore' file and '--ignore' option still take effect.",
)
@click.option(
"excludes",
"-e",
"--exclude",
multiple=True,
help="Ignore files or directories. The option can be used multiple times."
"The '.swignore' file still takes effect.",
)
def _build(
Expand All @@ -93,6 +102,7 @@ def _build(
name: str,
desc: str,
add_all: bool,
excludes: t.List[str],
) -> None:
"""Build starwhale model package.
Only standalone instance supports model build.
Expand All @@ -112,6 +122,8 @@ def _build(
swcli model build . --module mnist.evaluate --runtime pytorch/version/v1 --no-package-runtime
# build model package with tags.
swcli model build . --tag tag1 --tag tag2
# build model package with ignores.
swcli model build . --exclude .git --exclude checkpoint/*
"""
if model_yaml is None:
yaml_path = Path(workdir) / DefaultYAMLName.MODEL
Expand All @@ -136,6 +148,7 @@ def _build(
package_runtime=package_runtime,
add_all=add_all,
tags=tags,
excludes=excludes,
)


Expand Down
24 changes: 17 additions & 7 deletions client/starwhale/core/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ def buildImpl(self, workdir: Path, **kw: t.Any) -> None: # type: ignore[overrid
workdir=workdir,
model_config=model_config,
add_all=kw.get("add_all", False),
excludes=kw.get("excludes"),
),
),
]
Expand Down Expand Up @@ -807,29 +808,38 @@ def _prepare_snapshot(self) -> None:
)

def _copy_src(
self, workdir: Path, model_config: ModelConfig, add_all: bool
self,
workdir: Path,
model_config: ModelConfig,
add_all: bool,
excludes: t.List[str] | None = None,
) -> None:
"""
Copy source code files to snapshot workdir
Args:
workdir: source code dir
model_config: model config
add_all: copy all files, include python cache files(defined in BuiltinPyExcludes) and venv or conda files
excludes: exclude files or dirs
Returns: None
"""
console.print(
f":peacock: copy source code files: {workdir} -> {self.store.src_dir}"
)

excludes = []
ignore = workdir / SW_IGNORE_FILE_NAME
if ignore.exists():
with open(ignore, "r") as f:
excludes = [line.strip() for line in f.readlines()]
excludes = excludes or []
swignore_path = workdir / SW_IGNORE_FILE_NAME
if swignore_path.exists():
for line in swignore_path.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
excludes.append(line)

if not add_all:
excludes += BuiltinPyExcludes

console.debug(
console.info(
f"copy dir: {workdir} -> {self.store.src_dir}, excludes: {excludes}"
)
total_size = self._object_store.copy_dir(
Expand Down
2 changes: 2 additions & 0 deletions client/starwhale/core/model/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ def build(
runtime_uri: str = "",
package_runtime: bool = False,
tags: t.List[str] | None = None,
excludes: t.List[str] | None = None,
) -> None:
if runtime_uri:
RuntimeProcess(uri=Resource(runtime_uri, typ=ResourceType.runtime)).run()
Expand All @@ -380,6 +381,7 @@ def build(
packaging_runtime_uri=packaging_runtime_uri,
add_all=add_all,
tags=tags,
excludes=excludes,
)

@classmethod
Expand Down
10 changes: 10 additions & 0 deletions client/tests/core/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ def test_build_workflow(
Path(self.workdir) / DefaultYAMLName.MODEL
)

(Path(self.workdir) / ".swignore").write_text("\n\n#exclude1/*\n exclude2/*")

model_uri = Resource(
self.name,
typ=ResourceType.model,
Expand All @@ -159,6 +161,7 @@ def test_build_workflow(
workdir=Path(self.workdir),
model_config=model_config,
tags=["test01", "test02"],
excludes=["checkpoints/*"],
)

build_version = sm.uri.version
Expand Down Expand Up @@ -247,6 +250,13 @@ def test_build_workflow(
== "/home/starwhale/myproject"
)
assert str(m_copy_dir.call_args_list[0][1]["dst_dir"]).endswith("/src")
assert m_copy_dir.call_args_list[0][1]["excludes"] == [
"checkpoints/*",
"exclude2/*",
"__pycache__/",
"*.py[cod]",
"*$py.class",
]

assert bundle_path.exists()
tags = sm.tag.list()
Expand Down
3 changes: 2 additions & 1 deletion client/tests/sdk/test_job_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ def test_finetune_deco(self, mock_build: MagicMock, mock_exists: MagicMock) -> N
content = """
from starwhale import finetune, Dataset
@finetune(require_validation_datasets=True)
@finetune(require_validation_datasets=True, excludes=['.git'])
def ft1(train_datasets, val_datasets):
assert isinstance(train_datasets[0], Dataset)
assert isinstance(val_datasets[0], Dataset)
Expand Down Expand Up @@ -861,6 +861,7 @@ def ft2(): ...
assert mock_build.call_count == 1
assert mock_build.call_args[1]["name"] == "mock_model_name"
assert mock_build.call_args[1]["modules"] is None
assert mock_build.call_args[1]["excludes"] == [".git"]

assert {
"mock_user_module:ft1",
Expand Down
2 changes: 2 additions & 0 deletions client/tests/sdk/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,13 +246,15 @@ def test_build_with_workdir(
modules=[mock_handler],
workdir=workdir,
name="mnist",
excludes=[".git"],
)

kwargs = m_model_view.build.call_args[1]
assert kwargs["project"] == ""
assert kwargs["workdir"] == workdir
assert kwargs["model_config"].run.modules == ["evaluator"]
assert kwargs["model_config"].name == "mnist"
assert kwargs["excludes"] == [".git"]

sub_dir = workdir / "sub"
ensure_dir(sub_dir)
Expand Down

0 comments on commit bca9510

Please sign in to comment.