From e105bc6f8fce254d82400aef6fe5451ea05ded20 Mon Sep 17 00:00:00 2001
From: tianwei <liutianweidlut@gmail.com>
Date: Sat, 6 May 2023 18:28:49 +0800
Subject: [PATCH] PipelineHandler support predict and evaluate function
 interface

---
 .github/workflows/client.yaml            |   1 +
 .github/workflows/e2e-test.yml           |   2 +
 client/starwhale/api/_impl/evaluation.py |  83 +++++++++++-------
 client/starwhale/api/_impl/job.py        |  13 +--
 client/starwhale/base/scheduler/task.py  |  24 +++---
 client/starwhale/core/job/view.py        |   4 +-
 client/starwhale/core/runtime/model.py   |  11 +--
 client/starwhale/utils/progress.py       |   2 +
 client/starwhale/utils/venv.py           |   7 +-
 client/tests/sdk/test_evaluation.py      |   8 +-
 client/tests/sdk/test_job.py             | 103 +++++++++++++++--------
 example/cifar10/cifar/evaluator.py       |   6 +-
 example/mnist/mnist/evaluator.py         |   6 +-
 example/ucf101/ucf101/evaluator.py       |   2 +-
 scripts/client_test/cli_test.py          |   3 +
 scripts/client_test/cli_test.sh          |  15 +++-
 scripts/example/runtime_conda.yaml       |   2 +
 17 files changed, 187 insertions(+), 105 deletions(-)

diff --git a/.github/workflows/client.yaml b/.github/workflows/client.yaml
index ff8f6bc97d..848f18641c 100644
--- a/.github/workflows/client.yaml
+++ b/.github/workflows/client.yaml
@@ -194,4 +194,5 @@ jobs:
         env:
           GITHUB_ACTION: 1
           PYTHON_VERSION: ${{matrix.python-version}}
+          SKIP_UI_BUILD: 1
         run: bash scripts/client_test/cli_test.sh sdk simple
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
index 7f81204ba3..5368e57624 100644
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -169,6 +169,8 @@ jobs:
       - name: Test by client side
         working-directory: ./scripts/e2e_test
         run: bash start_test.sh client_test
+        env:
+          SKIP_UI_BUILD: 1
 
       - name: Post output client-side test logs
         if: failure()
diff --git a/client/starwhale/api/_impl/evaluation.py b/client/starwhale/api/_impl/evaluation.py
index b523f4eb0a..384059c2bb 100644
--- a/client/starwhale/api/_impl/evaluation.py
+++ b/client/starwhale/api/_impl/evaluation.py
@@ -1,9 +1,9 @@
-from __future__ import annotations
 
+from __future__ import annotations
 import time
 import typing as t
 import threading
-from abc import ABCMeta, abstractmethod
+from abc import ABCMeta
 from types import TracebackType
 from pathlib import Path
 from functools import wraps
@@ -31,14 +31,14 @@
 class PipelineHandler(metaclass=ABCMeta):
     def __init__(
         self,
-        ppl_batch_size: int = 1,
+        predict_batch_size: int = 1,
         ignore_dataset_data: bool = False,
         ignore_error: bool = False,
         flush_result: bool = False,
-        ppl_auto_log: bool = True,
+        predict_auto_log: bool = True,
         dataset_uris: t.Optional[t.List[str]] = None,
     ) -> None:
-        self.ppl_batch_size = ppl_batch_size
+        self.predict_batch_size = predict_batch_size
         self.svc = Service()
         self.context = Context.get_runtime_context()
 
@@ -48,7 +48,7 @@ def __init__(
         self.ignore_dataset_data = ignore_dataset_data
         self.ignore_error = ignore_error
         self.flush_result = flush_result
-        self.ppl_auto_log = ppl_auto_log
+        self.predict_auto_log = predict_auto_log
 
         _logdir = JobStorage.local_run_dir(self.context.project, self.context.version)
         _run_dir = (
@@ -84,15 +84,6 @@ def __exit__(
 
         self._timeline_writer.close()
 
-    @abstractmethod
-    def ppl(self, data: t.Any, **kw: t.Any) -> t.Any:
-        # TODO: how to handle each element is not equal.
-        raise NotImplementedError
-
-    @abstractmethod
-    def cmp(self, *args: t.Any, **kw: t.Any) -> t.Any:
-        raise NotImplementedError
-
     def _record_status(func):  # type: ignore
         @wraps(func)  # type: ignore
         def _wrapper(*args: t.Any, **kwargs: t.Any) -> None:
@@ -113,15 +104,15 @@ def _wrapper(*args: t.Any, **kwargs: t.Any) -> None:
         return _wrapper
 
     @_record_status  # type: ignore
-    def _starwhale_internal_run_cmp(self) -> None:
+    def _starwhale_internal_run_evaluate(self) -> None:
         now = now_str()
         try:
-            if self.ppl_auto_log:
-                self.cmp(self.evaluation_store.get_results(deserialize=True))
+            if self.predict_auto_log:
+                self._do_evaluate(self.evaluation_store.get_results(deserialize=True))
             else:
-                self.cmp()
+                self._do_evaluate()
         except Exception as e:
-            console.exception(f"cmp exception: {e}")
+            console.exception(f"evaluate exception: {e}")
             self._timeline_writer.write(
                 {"time": now, "status": False, "exception": str(e)}
             )
@@ -129,11 +120,39 @@ def _starwhale_internal_run_cmp(self) -> None:
         else:
             self._timeline_writer.write({"time": now, "status": True, "exception": ""})
 
-    def _is_ppl_batch(self) -> bool:
-        return self.ppl_batch_size > 1
+    def _do_predict(self, *args: t.Any, **kw: t.Any) -> t.Any:
+        predict_func = getattr(self, "predict", None)
+        ppl_func = getattr(self, "ppl", None)
+
+        if predict_func and ppl_func:
+            raise ParameterError("predict and ppl cannot be defined at the same time")
+
+        if predict_func:
+            return predict_func(*args, **kw)
+        elif ppl_func:
+            return ppl_func(*args, **kw)
+        else:
+            raise ParameterError(
+                "predict or ppl must be defined, predict function is recommended"
+            )
+
+    def _do_evaluate(self, *args: t.Any, **kw: t.Any) -> t.Any:
+        evaluate_func = getattr(self, "evaluate", None)
+        cmp_func = getattr(self, "cmp", None)
+        if evaluate_func and cmp_func:
+            raise ParameterError("evaluate and cmp cannot be defined at the same time")
+
+        if evaluate_func:
+            return evaluate_func(*args, **kw)
+        elif cmp_func:
+            return cmp_func(*args, **kw)
+        else:
+            raise ParameterError(
+                "evaluate or cmp must be defined, evaluate function is recommended"
+            )
 
     @_record_status  # type: ignore
-    def _starwhale_internal_run_ppl(self) -> None:
+    def _starwhale_internal_run_predict(self) -> None:
         if not self.dataset_uris:
             raise FieldTypeOrValueError("context.dataset_uris is empty")
         join_str = "_#@#_"
@@ -146,13 +165,13 @@ def _starwhale_internal_run_ppl(self) -> None:
             dataset_info = ds.info
             cnt = 0
             idx_prefix = f"{_uri.typ}-{_uri.name}-{_uri.version}"
-            for rows in ds.batch_iter(self.ppl_batch_size):
+            for rows in ds.batch_iter(self.predict_batch_size):
                 _start = time.time()
                 _exception = None
                 _results: t.Any = b""
                 try:
-                    if self._is_ppl_batch():
-                        _results = self.ppl(
+                    if self.predict_batch_size > 1:
+                        _results = self._do_predict(
                             [row.features for row in rows],
                             index=[row.index for row in rows],
                             index_with_dataset=[
@@ -162,7 +181,7 @@ def _starwhale_internal_run_ppl(self) -> None:
                         )
                     else:
                         _results = [
-                            self.ppl(
+                            self._do_predict(
                                 rows[0].features,
                                 index=rows[0].index,
                                 index_with_dataset=f"{idx_prefix}{join_str}{rows[0].index}",
@@ -198,7 +217,7 @@ def _starwhale_internal_run_ppl(self) -> None:
                         }
                     )
 
-                    if self.ppl_auto_log:
+                    if self.predict_auto_log:
                         if not self.ignore_dataset_data:
                             for artifact in TabularDatasetRow.artifacts_of(_features):
                                 if artifact.link:
@@ -213,7 +232,7 @@ def _starwhale_internal_run_ppl(self) -> None:
                             serialize=True,
                         )
 
-        if self.flush_result and self.ppl_auto_log:
+        if self.flush_result and self.predict_auto_log:
             self.evaluation_store.flush_result()
 
         console.info(
@@ -446,9 +465,9 @@ def _register_predict(
         needs=needs,
         replicas=replicas,
         extra_kwargs=dict(
-            ppl_batch_size=batch_size,
+            predict_batch_size=batch_size,
             ignore_error=not fail_on_error,
-            ppl_auto_log=auto_log,
+            predict_auto_log=auto_log,
             ignore_dataset_data=not auto_log,
             dataset_uris=datasets,
         ),
@@ -514,6 +533,6 @@ def _register_evaluate(
         replicas=1,
         needs=needs,
         extra_kwargs=dict(
-            ppl_auto_log=use_predict_auto_log,
+            predict_auto_log=use_predict_auto_log,
         ),
     )(func)
diff --git a/client/starwhale/api/_impl/job.py b/client/starwhale/api/_impl/job.py
index 4471853b57..5bbb82677a 100644
--- a/client/starwhale/api/_impl/job.py
+++ b/client/starwhale/api/_impl/job.py
@@ -263,14 +263,15 @@ def _preload_registering_handlers(
                     and issubclass(v, PipelineHandler)
                     and v != PipelineHandler
                 ):
-                    ppl_func = getattr(v, "ppl")
-                    cmp_func = getattr(v, "cmp")
-                    Handler.register(replicas=2, name="ppl")(ppl_func)
+                    # compatible with old version: ppl and cmp function are renamed to predict and evaluate
+                    predict_func = getattr(v, "predict", None) or getattr(v, "ppl")
+                    evaluate_func = getattr(v, "evaluate", None) or getattr(v, "cmp")
+                    Handler.register(replicas=2, name="predict")(predict_func)
                     Handler.register(
                         replicas=1,
-                        needs=[ppl_func],
-                        name="cmp",
-                    )(cmp_func)
+                        needs=[predict_func],
+                        name="evaluate",
+                    )(evaluate_func)
 
 
 def generate_jobs_yaml(
diff --git a/client/starwhale/base/scheduler/task.py b/client/starwhale/base/scheduler/task.py
index 2051cf884a..4d69e27686 100644
--- a/client/starwhale/base/scheduler/task.py
+++ b/client/starwhale/base/scheduler/task.py
@@ -51,10 +51,10 @@ def status(self) -> str:
         return self.__status
 
     def _get_internal_func_name(self, func_name: str) -> str:
-        if func_name == "ppl":
-            return "_starwhale_internal_run_ppl"
-        elif func_name == "cmp":
-            return "_starwhale_internal_run_cmp"
+        if func_name in ("ppl", "predict"):
+            return "_starwhale_internal_run_predict"
+        elif func_name in ("cmp", "evaluate"):
+            return "_starwhale_internal_run_evaluate"
         else:
             raise RuntimeError(
                 f"failed to map func name({func_name}) into PipelineHandler internal func name"
@@ -68,8 +68,8 @@ def _run_in_pipeline_handler_cls(
         from starwhale.api._impl.evaluation import PipelineHandler
 
         patch_func_map = {
-            "ppl": lambda *args, **kwargs: ...,
-            "cmp": lambda *args, **kwargs: ...,
+            "predict": lambda *args, **kwargs: ...,
+            "evaluate": lambda *args, **kwargs: ...,
         }
 
         if func_name not in patch_func_map:
@@ -107,9 +107,9 @@ def _do_execute(self) -> None:
         if cls_ is None:
             func = getattr(module, self.step.func_name)
             if getattr(func, DecoratorInjectAttr.Evaluate, False):
-                self._run_in_pipeline_handler_cls(func, "cmp")
+                self._run_in_pipeline_handler_cls(func, "evaluate")
             elif getattr(func, DecoratorInjectAttr.Predict, False):
-                self._run_in_pipeline_handler_cls(func, "ppl")
+                self._run_in_pipeline_handler_cls(func, "predict")
             elif getattr(func, DecoratorInjectAttr.Step, False):
                 func()
             else:
@@ -127,17 +127,17 @@ def _do_execute(self) -> None:
                 with cls_() as instance:
                     func = getattr(instance, func_name)
                     if getattr(func, DecoratorInjectAttr.Evaluate, False):
-                        self._run_in_pipeline_handler_cls(func, "cmp")
+                        self._run_in_pipeline_handler_cls(func, "evaluate")
                     elif getattr(func, DecoratorInjectAttr.Predict, False):
-                        self._run_in_pipeline_handler_cls(func, "ppl")
+                        self._run_in_pipeline_handler_cls(func, "predict")
                     else:
                         func()
             else:
                 func = getattr(cls_(), func_name)
                 if getattr(func, DecoratorInjectAttr.Evaluate, False):
-                    self._run_in_pipeline_handler_cls(func, "cmp")
+                    self._run_in_pipeline_handler_cls(func, "evaluate")
                 elif getattr(func, DecoratorInjectAttr.Predict, False):
-                    self._run_in_pipeline_handler_cls(func, "ppl")
+                    self._run_in_pipeline_handler_cls(func, "predict")
                 else:
                     func()
 
diff --git a/client/starwhale/core/job/view.py b/client/starwhale/core/job/view.py
index 53707d7c3f..51c5c70276 100644
--- a/client/starwhale/core/job/view.py
+++ b/client/starwhale/core/job/view.py
@@ -107,8 +107,8 @@ def info(
 
         if "location" in _rt:
             console.rule("Process dirs")
-            console.print(f":cactus: ppl: {_rt['location']['ppl']}")
-            console.print(f":camel: cmp: {_rt['location']['cmp']}")
+            console.print(f":cactus: predict: {_rt['location']['predict']}")
+            console.print(f":camel: evaluate: {_rt['location']['evaluate']}")
 
         if "tasks" in _rt:
             self._print_tasks(_rt["tasks"][0])
diff --git a/client/starwhale/core/runtime/model.py b/client/starwhale/core/runtime/model.py
index 50d233fb1e..79639aea3d 100644
--- a/client/starwhale/core/runtime/model.py
+++ b/client/starwhale/core/runtime/model.py
@@ -1868,11 +1868,6 @@ def _install_dependencies_within_restore(
             # We assume the equation in the runtime auto-lock build mode:
             #   the lock files = pip_pkg + pip_req_file + conda_pkg + conda_env_file
             raw_deps = []
-            for dep in deps["raw_deps"]:
-                kind = DependencyType(dep["kind"])
-                if kind in (DependencyType.NATIVE_FILE, DependencyType.WHEEL):
-                    raw_deps.append(dep)
-
             for lf in lock_files:
                 if lf.endswith(RuntimeLockFileType.CONDA):
                     raw_deps.append({"deps": lf, "kind": DependencyType.CONDA_ENV_FILE})
@@ -1882,6 +1877,12 @@ def _install_dependencies_within_restore(
                     raise NoSupportError(
                         f"lock file({lf}) cannot be converted into raw_deps"
                     )
+
+            # NATIVE_FILE and WHEEL must be installed after CONDA_ENV_FILE or PIP_REQ_FILE installation.
+            for dep in deps["raw_deps"]:
+                kind = DependencyType(dep["kind"])
+                if kind in (DependencyType.NATIVE_FILE, DependencyType.WHEEL):
+                    raw_deps.append(dep)
         else:
             raw_deps = deps["raw_deps"]
 
diff --git a/client/starwhale/utils/progress.py b/client/starwhale/utils/progress.py
index cdc98af269..8d20d90e6d 100644
--- a/client/starwhale/utils/progress.py
+++ b/client/starwhale/utils/progress.py
@@ -3,6 +3,7 @@
 
 from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn
 
+from starwhale.utils import console
 from starwhale.consts import ENV_DISABLE_PROGRESS_BAR
 
 
@@ -23,6 +24,7 @@ def run_with_progress_bar(
             *Progress.get_default_columns(),
             TimeElapsedColumn(),
             refresh_per_second=1,
+            console=console.rich_console,
         ) as progress:
             task = progress.add_task(
                 f"[red]{title}", total=sum([o[1] for o in operations])
diff --git a/client/starwhale/utils/venv.py b/client/starwhale/utils/venv.py
index 4f3b06b68f..d5b8ed9f62 100644
--- a/client/starwhale/utils/venv.py
+++ b/client/starwhale/utils/venv.py
@@ -72,7 +72,12 @@ def conda_install_req(
         return
 
     configs = configs or {}
-    prefix_cmd = [get_conda_bin(), "run" if use_pip_install else "install"]
+    prefix_cmd = [get_conda_bin()]
+
+    if use_pip_install:
+        prefix_cmd += ["run", "--live-stream"]
+    else:
+        prefix_cmd += ["install"]
 
     if env_name:
         prefix_cmd += ["--name", env_name]
diff --git a/client/tests/sdk/test_evaluation.py b/client/tests/sdk/test_evaluation.py
index 84a6c083e7..1fcf7d9bb4 100644
--- a/client/tests/sdk/test_evaluation.py
+++ b/client/tests/sdk/test_evaluation.py
@@ -139,7 +139,7 @@ def test_cmp(
         )
         Context.set_runtime_context(context)
         with SimpleHandler() as _handler:
-            _handler._starwhale_internal_run_cmp()
+            _handler._starwhale_internal_run_evaluate()
 
         status_file_path = os.path.join(_status_dir, "current")
         assert os.path.exists(status_file_path)
@@ -202,7 +202,7 @@ def test_ppl(
         )
         Context.set_runtime_context(context)
         with SimpleHandler() as _handler:
-            _handler._starwhale_internal_run_ppl()
+            _handler._starwhale_internal_run_predict()
 
         m_eval_log.assert_called_once()
         status_file_path = os.path.join(_status_dir, "current")
@@ -300,7 +300,7 @@ def cmp(self, _data_loader: t.Any) -> t.Any:
         Context.set_runtime_context(context)
         # mock
         with Dummy(flush_result=True) as _handler:
-            _handler._starwhale_internal_run_ppl()
+            _handler._starwhale_internal_run_predict()
 
         context = Context(
             workdir=Path(),
@@ -312,7 +312,7 @@ def cmp(self, _data_loader: t.Any) -> t.Any:
         )
         Context.set_runtime_context(context)
         with Dummy() as _handler:
-            _handler._starwhale_internal_run_cmp()
+            _handler._starwhale_internal_run_evaluate()
 
 
 class TestEvaluationLogStore(BaseTestCase):
diff --git a/client/tests/sdk/test_job.py b/client/tests/sdk/test_job.py
index 32c2052886..cd39d7eaee 100644
--- a/client/tests/sdk/test_job.py
+++ b/client/tests/sdk/test_job.py
@@ -210,8 +210,8 @@ def video_evaluate_handler(*args, **kwargs): ...
                     "dataset_uris": None,
                     "ignore_dataset_data": False,
                     "ignore_error": False,
-                    "ppl_auto_log": True,
-                    "ppl_batch_size": 1,
+                    "predict_auto_log": True,
+                    "predict_batch_size": 1,
                 },
                 "func_name": "img_predict_handler",
                 "module_name": "mock_user_module",
@@ -225,7 +225,7 @@ def video_evaluate_handler(*args, **kwargs): ...
                 "cls_name": "",
                 "concurrency": 1,
                 "extra_args": [],
-                "extra_kwargs": {"ppl_auto_log": True},
+                "extra_kwargs": {"predict_auto_log": True},
                 "func_name": "img_evaluate_handler",
                 "module_name": "mock_user_module",
                 "name": "mock_user_module:img_evaluate_handler",
@@ -245,8 +245,8 @@ def video_evaluate_handler(*args, **kwargs): ...
                     "dataset_uris": None,
                     "ignore_dataset_data": False,
                     "ignore_error": False,
-                    "ppl_auto_log": True,
-                    "ppl_batch_size": 1,
+                    "predict_auto_log": True,
+                    "predict_batch_size": 1,
                 },
                 "func_name": "video_predict_handler",
                 "module_name": "mock_user_module",
@@ -260,7 +260,7 @@ def video_evaluate_handler(*args, **kwargs): ...
                 "cls_name": "",
                 "concurrency": 1,
                 "extra_args": [],
-                "extra_kwargs": {"ppl_auto_log": True},
+                "extra_kwargs": {"predict_auto_log": True},
                 "func_name": "video_evaluate_handler",
                 "module_name": "mock_user_module",
                 "name": "mock_user_module:video_evaluate_handler",
@@ -271,7 +271,9 @@ def video_evaluate_handler(*args, **kwargs): ...
             },
         ]
 
-    @patch("starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_ppl")
+    @patch(
+        "starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_predict"
+    )
     def test_predict_deco_on_function(self, mock_ppl: MagicMock) -> None:
         content = """
 from starwhale import evaluation
@@ -302,8 +304,8 @@ def evaluate_handler(*args, **kwargs): ...
                         "dataset_uris": None,
                         "ignore_dataset_data": False,
                         "ignore_error": False,
-                        "ppl_auto_log": True,
-                        "ppl_batch_size": 1,
+                        "predict_auto_log": True,
+                        "predict_batch_size": 1,
                     },
                     "func_name": "predict_handler",
                     "module_name": "mock_user_module",
@@ -317,7 +319,7 @@ def evaluate_handler(*args, **kwargs): ...
                     "cls_name": "",
                     "concurrency": 1,
                     "extra_args": [],
-                    "extra_kwargs": {"ppl_auto_log": True},
+                    "extra_kwargs": {"predict_auto_log": True},
                     "func_name": "evaluate_handler",
                     "module_name": "mock_user_module",
                     "name": "mock_user_module:evaluate_handler",
@@ -336,8 +338,8 @@ def evaluate_handler(*args, **kwargs): ...
                         "dataset_uris": None,
                         "ignore_dataset_data": False,
                         "ignore_error": False,
-                        "ppl_auto_log": True,
-                        "ppl_batch_size": 1,
+                        "predict_auto_log": True,
+                        "predict_batch_size": 1,
                     },
                     "func_name": "predict_handler",
                     "module_name": "mock_user_module",
@@ -385,15 +387,40 @@ def evaluate_handler(*args, **kwargs): ...
         assert result.status == "success"
         assert mock_ppl.call_count == 1
 
-    @patch("starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_cmp")
-    def test_pipeline_handler(self, mock_cmp: MagicMock) -> None:
+    @patch(
+        "starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_evaluate"
+    )
+    def test_pipeline_handler_with_ppl_cmp(self, mock_cmp: MagicMock) -> None:
         content = """
 from starwhale import PipelineHandler
 
-class MockHandler(PipelineHandler):
+class MockPPLHandler(PipelineHandler):
     def ppl(self, *args, **kwargs): ...
     def cmp(self, *args, **kwargs): ...
         """
+        self._ensure_py_script(content)
+        yaml_path = self.workdir / "job.yaml"
+        generate_jobs_yaml(
+            [f"{self.module_name}:MockPPLHandler"], self.workdir, yaml_path
+        )
+        jobs_info = load_yaml(yaml_path)
+        assert "mock_user_module:MockPPLHandler.cmp" in jobs_info
+        assert "mock_user_module:MockPPLHandler.ppl" in jobs_info
+        assert jobs_info["mock_user_module:MockPPLHandler.cmp"][1]["needs"] == [
+            "mock_user_module:MockPPLHandler.ppl"
+        ]
+
+    @patch(
+        "starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_evaluate"
+    )
+    def test_pipeline_handler(self, mock_cmp: MagicMock) -> None:
+        content = """
+from starwhale import PipelineHandler
+
+class MockHandler(PipelineHandler):
+    def predict(self, *args, **kwargs): ...
+    def evaluate(self, *args, **kwargs): ...
+        """
 
         self._ensure_py_script(content)
         yaml_path = self.workdir / "job.yaml"
@@ -403,50 +430,52 @@ def cmp(self, *args, **kwargs): ...
 
         assert yaml_path.exists()
         jobs_info = load_yaml(yaml_path)
-        assert jobs_info["mock_user_module:MockHandler.cmp"] == [
+        assert jobs_info["mock_user_module:MockHandler.evaluate"] == [
             {
                 "cls_name": "MockHandler",
                 "concurrency": 1,
                 "extra_args": [],
                 "extra_kwargs": {},
-                "func_name": "ppl",
+                "func_name": "predict",
                 "module_name": "mock_user_module",
-                "name": "mock_user_module:MockHandler.ppl",
+                "name": "mock_user_module:MockHandler.predict",
                 "needs": [],
                 "replicas": 2,
                 "resources": [],
-                "show_name": "ppl",
+                "show_name": "predict",
             },
             {
                 "cls_name": "MockHandler",
                 "concurrency": 1,
                 "extra_args": [],
                 "extra_kwargs": {},
-                "func_name": "cmp",
+                "func_name": "evaluate",
                 "module_name": "mock_user_module",
-                "name": "mock_user_module:MockHandler.cmp",
-                "needs": ["mock_user_module:MockHandler.ppl"],
+                "name": "mock_user_module:MockHandler.evaluate",
+                "needs": ["mock_user_module:MockHandler.predict"],
                 "replicas": 1,
                 "resources": [],
-                "show_name": "cmp",
+                "show_name": "evaluate",
             },
         ]
-        assert jobs_info["mock_user_module:MockHandler.ppl"] == [
+        assert jobs_info["mock_user_module:MockHandler.predict"] == [
             {
                 "cls_name": "MockHandler",
                 "concurrency": 1,
                 "extra_args": [],
                 "extra_kwargs": {},
-                "func_name": "ppl",
+                "func_name": "predict",
                 "module_name": "mock_user_module",
-                "name": "mock_user_module:MockHandler.ppl",
+                "name": "mock_user_module:MockHandler.predict",
                 "needs": [],
                 "replicas": 2,
                 "resources": [],
-                "show_name": "ppl",
+                "show_name": "predict",
             }
         ]
-        steps = Step.get_steps_from_yaml("mock_user_module:MockHandler.cmp", yaml_path)
+        steps = Step.get_steps_from_yaml(
+            "mock_user_module:MockHandler.evaluate", yaml_path
+        )
         context = Context(
             workdir=self.workdir,
             project="test",
@@ -459,8 +488,12 @@ def cmp(self, *args, **kwargs): ...
         assert result.status == "success"
         assert mock_cmp.call_count == 1
 
-    @patch("starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_cmp")
-    @patch("starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_ppl")
+    @patch(
+        "starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_evaluate"
+    )
+    @patch(
+        "starwhale.api._impl.evaluation.PipelineHandler._starwhale_internal_run_predict"
+    )
     def test_predict_deco_on_cls_method(
         self, mock_ppl: MagicMock, mock_cmp: MagicMock
     ) -> None:
@@ -493,8 +526,8 @@ def evaluate_handler(self, *args, **kwargs): ...
                     "dataset_uris": None,
                     "ignore_dataset_data": False,
                     "ignore_error": False,
-                    "ppl_auto_log": True,
-                    "ppl_batch_size": 1,
+                    "predict_auto_log": True,
+                    "predict_batch_size": 1,
                 },
                 "func_name": "predict_handler",
                 "module_name": "mock_user_module",
@@ -515,8 +548,8 @@ def evaluate_handler(self, *args, **kwargs): ...
                     "dataset_uris": None,
                     "ignore_dataset_data": False,
                     "ignore_error": False,
-                    "ppl_auto_log": True,
-                    "ppl_batch_size": 1,
+                    "predict_auto_log": True,
+                    "predict_batch_size": 1,
                 },
                 "func_name": "predict_handler",
                 "module_name": "mock_user_module",
@@ -530,7 +563,7 @@ def evaluate_handler(self, *args, **kwargs): ...
                 "cls_name": "MockHandler",
                 "concurrency": 1,
                 "extra_args": [],
-                "extra_kwargs": {"ppl_auto_log": True},
+                "extra_kwargs": {"predict_auto_log": True},
                 "func_name": "evaluate_handler",
                 "module_name": "mock_user_module",
                 "name": "mock_user_module:MockHandler.evaluate_handler",
diff --git a/example/cifar10/cifar/evaluator.py b/example/cifar10/cifar/evaluator.py
index 7c2ca17c36..55ce2cdb01 100644
--- a/example/cifar10/cifar/evaluator.py
+++ b/example/cifar10/cifar/evaluator.py
@@ -21,7 +21,7 @@ def __init__(self) -> None:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model = self._load_model(self.device)
 
-    def ppl(self, data: dict, **kw):
+    def predict(self, data: dict, **kw):
         data_tensor = self._pre(data["image"])
         output = self.model(data_tensor)
         return self._post(output)
@@ -33,7 +33,7 @@ def ppl(self, data: dict, **kw):
         show_roc_auc=True,
         all_labels=[i for i in range(0, 10)],
     )
-    def cmp(self, ppl_result):
+    def evaluate(self, ppl_result):
         result, label, pr = [], [], []
         for _data in ppl_result:
             label.append(_data["ds_data"]["label"])
@@ -85,5 +85,5 @@ def online_eval(self, img: PILImage.Image):
             "ship",
             "truck",
         )
-        _, prob = self.ppl(Image(fp=buf.getvalue()))
+        _, prob = self.predict(Image(fp=buf.getvalue()))
         return {classes[i]: p for i, p in enumerate(prob[0])}
diff --git a/example/mnist/mnist/evaluator.py b/example/mnist/mnist/evaluator.py
index 4a4cefc889..c43eeb58ab 100644
--- a/example/mnist/mnist/evaluator.py
+++ b/example/mnist/mnist/evaluator.py
@@ -24,7 +24,7 @@ def __init__(self) -> None:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model = self._load_model(self.device)
 
-    def ppl(self, data: t.Dict[str, t.Any], **kw: t.Any) -> t.Tuple[float, t.List[float]]:  # type: ignore
+    def predict(self, data: t.Dict[str, t.Any], **kw: t.Any) -> t.Tuple[float, t.List[float]]:  # type: ignore
         data_tensor = self._pre(data["img"])
         output = self.model(data_tensor)
         return self._post(output)
@@ -33,7 +33,7 @@ def ppl(self, data: t.Dict[str, t.Any], **kw: t.Any) -> t.Tuple[float, t.List[fl
     def upload_bin_file(self, file: t.Any) -> t.Any:
         with open(file.name, "rb") as f:
             data = Image(f.read(), shape=(28, 28, 1))
-        _, prob = self.ppl({"img": data})
+        _, prob = self.predict({"img": data})
         return {i: p for i, p in enumerate(prob)}
 
     @multi_classification(
@@ -43,7 +43,7 @@ def upload_bin_file(self, file: t.Any) -> t.Any:
         show_roc_auc=True,
         all_labels=[i for i in range(0, 10)],
     )
-    def cmp(
+    def evaluate(
         self, ppl_result: t.Iterator
     ) -> t.Tuple[t.List[int], t.List[int], t.List[t.List[float]]]:
         result, label, pr = [], [], []
diff --git a/example/ucf101/ucf101/evaluator.py b/example/ucf101/ucf101/evaluator.py
index e7068fd5ac..119a41c3af 100644
--- a/example/ucf101/ucf101/evaluator.py
+++ b/example/ucf101/ucf101/evaluator.py
@@ -193,7 +193,7 @@ def ppl_pre(videos: t.List[Video], sampler, transforms) -> torch.Tensor:
 
 class UCF101PipelineHandler(PipelineHandler):
     def __init__(self):
-        super().__init__(ignore_error=False, ppl_batch_size=5)
+        super().__init__(ignore_error=False, predict_batch_size=5)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model = load_model(self.device)
         self.sampler = RandomSampling()
diff --git a/scripts/client_test/cli_test.py b/scripts/client_test/cli_test.py
index 0e7e627fe7..5831af8ba9 100644
--- a/scripts/client_test/cli_test.py
+++ b/scripts/client_test/cli_test.py
@@ -22,6 +22,7 @@
 from starwhale import URI
 from starwhale.utils import config
 from starwhale.base.type import DatasetChangeMode
+from starwhale.utils.debug import init_logger
 
 CURRENT_DIR = os.path.dirname(__file__)
 SCRIPT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir))
@@ -35,6 +36,8 @@
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
 
+init_logger(3)
+
 CPU_EXAMPLES: t.Dict[str, t.Dict[str, t.Any]] = {
     "mnist": {
         "run_handler": "mnist.evaluator:MNISTInference.cmp",
diff --git a/scripts/client_test/cli_test.sh b/scripts/client_test/cli_test.sh
index 2e4fe5cfed..c11a71b1c6 100644
--- a/scripts/client_test/cli_test.sh
+++ b/scripts/client_test/cli_test.sh
@@ -26,9 +26,22 @@ else
   python3 -m pip install -e client
 fi
 swcli --version
+
+ls -lah ${REPO_PATH}/client/dist
+ls -lah ${WORK_DIR}/client/dist
+
+if [ -f "${REPO_PATH}/client/dist/starwhale-100.0.0-py3-none-any.whl" ]; then
+    SRC_PATH=${REPO_PATH}/client/dist/starwhale-100.0.0-py3-none-any.whl
+else
+    python3 -m pip install -r client/requirements-install.txt
+    make -C client build-wheel
+    SRC_PATH=${WORK_DIR}/client/dist/starwhale-${PYPI_RELEASE_VERSION:=0.0.0.dev0}-py3-none-any.whl
+fi
+cp ${SRC_PATH} ${WORK_DIR}/scripts/example/starwhale-0.0.0.dev0-py3-none-any.whl
+
 popd
 
 bash "$SCRIPT_DIR"/update_controller_setting.sh
 for i in $@; do
     python3 "$SCRIPT_DIR"/cli_test.py $i || exit 1
-done
\ No newline at end of file
+done
diff --git a/scripts/example/runtime_conda.yaml b/scripts/example/runtime_conda.yaml
index f5964020d2..3412eb1f7e 100644
--- a/scripts/example/runtime_conda.yaml
+++ b/scripts/example/runtime_conda.yaml
@@ -4,6 +4,8 @@ dependencies:
       - numpy
   - pip:
       - git+https://github.com/star-whale/starwhale.git@94eeff7863b61480ffb83d055d79eeb6ceb1bd21#subdirectory=example/runtime/pytorch/dummy
+  - wheels:
+      - starwhale-0.0.0.dev0-py3-none-any.whl # generate by make build-wheel in the cli_test.sh script
 environment:
   arch: noarch
   os: ubuntu:20.04