Skip to content

Commit

Permalink
chore(dataset): remove useless dataset render-fuse-json command (#954)
Browse files Browse the repository at this point in the history
remove useless dataset render-fuse-json command
  • Loading branch information
tianweidut authored Aug 23, 2022
1 parent ee3acba commit 007edb4
Show file tree
Hide file tree
Showing 6 changed files with 1 addition and 138 deletions.
3 changes: 0 additions & 3 deletions client/starwhale/consts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
DEFAULT_EVALUATION_JOBS_FNAME = "eval_jobs.yaml"
DEFAULT_EVALUATION_PIPELINE = "starwhale.core.model.default_handler"
DEFAULT_LOCAL_SW_CONTROLLER_ADDR = "localhost:7827"
LOCAL_FUSE_JSON_NAME = "local_fuse.json"
DEFAULT_INPUT_JSON_FNAME = "input.json"
LOCAL_CONFIG_VERSION = "2.0"

# used by the versions before 2.0
Expand Down Expand Up @@ -113,7 +111,6 @@ class SWDSSubFileType:


SWDS_DATA_FNAME_FMT = "data_ubyte_{index}.%s" % SWDSSubFileType.BIN
SWDS_LABEL_FNAME_FMT = "label_ubyte_{index}.%s" % SWDSSubFileType.BIN
ARCHIVED_SWDS_META_FNAME = "archive.%s" % SWDSSubFileType.META
DUMPED_SWDS_META_FNAME = "_meta.jsonl"

Expand Down
26 changes: 1 addition & 25 deletions client/starwhale/core/dataset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,7 @@

import click

from starwhale.consts import (
DefaultYAMLName,
DEFAULT_PAGE_IDX,
DEFAULT_PAGE_SIZE,
LOCAL_FUSE_JSON_NAME,
)
from starwhale.consts import DefaultYAMLName, DEFAULT_PAGE_IDX, DEFAULT_PAGE_SIZE

from .view import get_term_view, DatasetTermView

Expand Down Expand Up @@ -114,25 +109,6 @@ def _summary(view: t.Type[DatasetTermView], dataset: str) -> None:
view(dataset).summary()


@dataset_cmd.command("render-fuse")
@click.argument("target")
@click.option(
"-f",
"--force",
is_flag=True,
help=f"Force to render, if {LOCAL_FUSE_JSON_NAME} was already existed",
)
@click.pass_obj
def _render_fuse(view: t.Type[DatasetTermView], target: str, force: bool) -> None:
"""
[ONLY Standalone]Render Dataset fuse input.json for standalone ppl
TARGET: dataset uri or dataset workdir path
"""

view.render_fuse_json(target, force)


@dataset_cmd.command("copy", help="Copy dataset, standalone <--> cloud")
@click.argument("src")
@click.argument("dest")
Expand Down
65 changes: 0 additions & 65 deletions client/starwhale/core/dataset/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import json
import typing as t
import tarfile
from abc import ABCMeta, abstractmethod
Expand All @@ -15,18 +14,10 @@
from starwhale.utils import console, load_yaml
from starwhale.consts import (
HTTPMethod,
JSON_INDENT,
DataLoaderKind,
DefaultYAMLName,
SWDSBackendType,
SWDSSubFileType,
DEFAULT_PAGE_IDX,
DEFAULT_PAGE_SIZE,
VERSION_PREFIX_CNT,
SWDS_DATA_FNAME_FMT,
DEFAULT_COPY_WORKERS,
LOCAL_FUSE_JSON_NAME,
SWDS_LABEL_FNAME_FMT,
DEFAULT_MANIFEST_NAME,
ARCHIVED_SWDS_META_FNAME,
)
Expand All @@ -35,7 +26,6 @@
from starwhale.utils.fs import (
move_dir,
ensure_dir,
ensure_file,
blake2b_file,
BLAKE2B_SIGNATURE_ALGO,
)
Expand All @@ -61,10 +51,6 @@ def __str__(self) -> str:
def summary(self) -> DatasetSummary:
raise NotImplementedError

@classmethod
def render_fuse_json(cls, workdir: Path, force: bool = False) -> str:
return StandaloneDataset.render_fuse_json(workdir, force)

@classmethod
def get_dataset(cls, uri: URI) -> Dataset:
_cls = cls._get_cls(uri)
Expand Down Expand Up @@ -116,57 +102,6 @@ def add_tags(self, tags: t.List[str], quiet: bool = False) -> None:
def remove_tags(self, tags: t.List[str], quiet: bool = False) -> None:
self.tag.remove(tags, quiet)

@classmethod
def render_fuse_json(cls, workdir: Path, force: bool = False) -> str:
_mf = workdir / DEFAULT_MANIFEST_NAME
if not _mf.exists():
raise Exception(f"need {DEFAULT_MANIFEST_NAME} @ {workdir}")

_manifest = load_yaml(_mf)
_fuse = dict(
backend=SWDSBackendType.FUSE,
kind=DataLoaderKind.SWDS,
swds=[],
)

ds_name = _manifest["name"]
ds_version = _manifest["version"]
swds_bins = [
_k
for _k in _manifest["signature"].keys()
if _k.startswith("data_") and _k.endswith(SWDSSubFileType.BIN)
]

bucket = workdir.parent.parent.parent
path_prefix = f"{ds_name}/{ds_version[:VERSION_PREFIX_CNT]}/{ds_version}{BundleType.DATASET}/data"
for idx in range(0, len(swds_bins)):
_fuse["swds"].append( # type: ignore
dict(
bucket=str(bucket.resolve()),
key=dict(
data=f"{path_prefix}/{SWDS_DATA_FNAME_FMT.format(index=idx)}",
label=f"{path_prefix}/{SWDS_LABEL_FNAME_FMT.format(index=idx)}",
# TODO: add extra_attr ds_name, ds_version
),
ext_attr=dict(
ds_name=ds_name,
ds_version=ds_version,
),
)
)

_f = workdir / LOCAL_FUSE_JSON_NAME
if _f.exists() and not force:
console.print(f":joy_cat: {LOCAL_FUSE_JSON_NAME} existed, skip render")
else:
ensure_file(_f, json.dumps(_fuse, indent=JSON_INDENT))
console.print(
f":clap: render swds {ds_name}:{ds_version} {LOCAL_FUSE_JSON_NAME}"
)

console.print(f":mag: {_f}")
return str(_f.resolve())

def history(
self,
page: int = DEFAULT_PAGE_IDX,
Expand Down
14 changes: 0 additions & 14 deletions client/starwhale/core/dataset/view.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import typing as t
from pathlib import Path

Expand All @@ -9,7 +8,6 @@
from starwhale.base.uri import URI
from starwhale.base.type import URIType, InstanceType
from starwhale.base.view import BaseTermView
from starwhale.core.dataset.store import DatasetStorage

from .model import Dataset

Expand Down Expand Up @@ -79,18 +77,6 @@ def copy(cls, src_uri: str, dest_uri: str, force: bool = False) -> None:
Dataset.copy(src_uri, dest_uri, force)
console.print(":clap: copy done")

@classmethod
def render_fuse_json(cls, target: str, force: bool = False) -> None:
if os.path.exists(target) and os.path.isdir(target):
workdir = Path(target)
else:
uri = URI(target, URIType.DATASET)
store = DatasetStorage(uri)
workdir = store.loc

console.print(f":crown: try to render fuse json@{workdir}...")
Dataset.render_fuse_json(workdir, force)

@BaseTermView._header
def tag(self, tags: t.List[str], remove: bool = False, quiet: bool = False) -> None:
if remove:
Expand Down
7 changes: 0 additions & 7 deletions client/tests/core/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import json
from pathlib import Path
from unittest.mock import patch, MagicMock

Expand Down Expand Up @@ -127,11 +126,6 @@ def test_build_workflow(self, m_import: MagicMock, m_copy_fs: MagicMock) -> None
_list, _ = StandaloneDataset.list(URI(""))
assert not _list[name][0]["is_removed"]

_fuse_json_path = StandaloneDataset.render_fuse_json(snapshot_workdir, True)
_fuse_json = json.load(open(_fuse_json_path))
assert _fuse_json["backend"] == "fuse"
assert _fuse_json["kind"] == "swds"

DatasetTermView(name).info()
DatasetTermView(name).history()
fname = f"{name}/version/{build_version}"
Expand All @@ -141,7 +135,6 @@ def test_build_workflow(self, m_import: MagicMock, m_copy_fs: MagicMock) -> None
DatasetTermView(fname).recover()
DatasetTermView.list()

DatasetTermView.render_fuse_json(fname, force=True)
DatasetTermView.build(workdir, "self")

# make sure tmp dir is empty
Expand Down
24 changes: 0 additions & 24 deletions docs/docs/reference/cli/dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ swcli [GLOBAL OPTIONS] dataset [OPTIONS] COMMAND [ARGS]...
|list|||
|recover|||
|remove|||
|render-fuse|||
|tag|||

## Build a dataset
Expand Down Expand Up @@ -180,29 +179,6 @@ swcli dataset tag [OPTIONS] DATASET TAGS
❯ swcli dataset tag mnist/version/hfsdmyrtgzst v1 test
```
## Render dataset's fuse input.json

```bash
swcli dataset render-fuse [OPTIONS] TARGET
```

- This command renders an `input.json` with fuse storage backend, which can be used as the `swcli model ppl --input-json` option for debugging.
- The `TARGET` argument is required. `Dataset URI` or dataset working dir is ok for the `TARGET` argument.
- Options:

|Option|Alias Option|Required|Type|Default|Description|
|------|--------|-------|-----------|-----|-----------|
|`--force`|`-f`||Boolean|False|Force to render input.json|

- Example:

```bash
❯ swcli dataset render-fuse mnist/version/latest
👑 try to render fuse json@/home/liutianwei/.cache/starwhale/self/dataset/mnist/gv/gvsgemdbhazwknrtmftdgyjzoaygynq.swds...
👏 render swds mnist:gvsgemdbhazwknrtmftdgyjzoaygynq local_fuse.json
🔍 /home/liutianwei/.cache/starwhale/self/dataset/mnist/gv/gvsgemdbhazwknrtmftdgyjzoaygynq.swds/local_fuse.json
```

## Copy a dataset
```bash
Expand Down

0 comments on commit 007edb4

Please sign in to comment.