Skip to content

Commit

Permalink
hotfix(e2e): tune e2e for the useless check (#2172)
Browse files Browse the repository at this point in the history
tune e2e for the useless check
  • Loading branch information
tianweidut authored Apr 29, 2023
1 parent 86ee5c4 commit 94eeff7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 21 deletions.
46 changes: 27 additions & 19 deletions scripts/client_test/cli_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import logging
import subprocess
from time import sleep
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures._base import Future
from concurrent.futures import as_completed, ThreadPoolExecutor

from cmds import DatasetExpl
from tenacity import retry
from cmds.job_cmd import Job
from tenacity.stop import stop_after_attempt
from tenacity.wait import wait_random
from cmds.base.invoke import invoke
from cmds.project_cmd import Project
from cmds.instance_cmd import Instance
Expand Down Expand Up @@ -210,7 +210,7 @@ def run_model_in_standalone(
model_uri: URI,
run_handler: str,
runtime_uris: t.Optional[t.List[URI | None]] = None,
) -> t.Any:
) -> t.List[str]:
logger.info("running evaluation at local...")
self.select_local_instance()

Expand All @@ -237,27 +237,27 @@ def run_model_in_server(
model_uri: URI,
runtime_uris: t.List[URI],
run_handler: str,
) -> t.List[Future]:
) -> t.List[str]:
self.instance_api.select(instance="server")
self.project_api.select(project=self.server_project)

job_status_checkers = []
remote_job_ids = []
for _rt_uri in runtime_uris:
logger.info("running evaluation at server...")
ok, remote_jid = self.model_api.run_in_server(
ok, jid = self.model_api.run_in_server(
model_uri=model_uri.object.version,
dataset_uris=[_ds_uri.object.version for _ds_uri in dataset_uris],
runtime_uri=_rt_uri.object.version,
project=f"{self.server_url}/project/{self.server_project}",
run_handler=run_handler,
)
assert ok
job_status_checkers.append(
self.executor.submit(self.get_remote_job_status, remote_jid)
)
return job_status_checkers
assert (
ok
), f"submit evaluation to server failed, model: {model_uri}, dataset: {dataset_uris}, runtime: {_rt_uri}"
remote_job_ids.append(jid)
return remote_job_ids

@retry(stop=stop_after_attempt(10))
@retry(stop=stop_after_attempt(20), wait=wait_random(min=2, max=20))
def get_remote_job_status(self, job_id: str) -> t.Tuple[str, str]:
while True:
_remote_job = self.job_api.info(
Expand Down Expand Up @@ -293,9 +293,9 @@ def test_simple(self) -> None:
mode=DatasetChangeMode.OVERWRITE,
)

remote_future_jobs = []
remote_job_ids = []
if self.server_url:
remote_future_jobs = self.run_model_in_server(
remote_job_ids = self.run_model_in_server(
dataset_uris=[dataset_uri],
model_uri=model_uri,
runtime_uris=[venv_runtime_uri],
Expand All @@ -309,8 +309,12 @@ def test_simple(self) -> None:
runtime_uris=[conda_runtime_uri],
)

for job in remote_future_jobs:
_, status = job.result()
futures = [
self.executor.submit(self.get_remote_job_status, jid)
for jid in remote_job_ids
]
for f in as_completed(futures):
_, status = f.result()
assert status in STATUS_SUCCESS

def test_all(self) -> None:
Expand Down Expand Up @@ -341,15 +345,19 @@ def test_all(self) -> None:
self.run_example(name, example["run_handler"], in_standalone=False)
for name, example in ALL_EXAMPLES.items()
]
status_checkers: t.List[Future] = sum(res, [])
remote_job_ids: t.List[str] = sum(res, [])

# model run on standalone
for name, example in CPU_EXAMPLES.items():
self.run_example(name, example["run_handler"], in_standalone=True)

failed_jobs = []
for _js in status_checkers:
jid, status = _js.result()
futures = [
self.executor.submit(self.get_remote_job_status, jid)
for jid in remote_job_ids
]
for f in as_completed(futures):
jid, status = f.result()
if status not in STATUS_SUCCESS:
failed_jobs.append((jid, status))

Expand Down
5 changes: 3 additions & 2 deletions scripts/client_test/cmds/job_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ def info(self, version: str) -> t.Any:
try:
return json.loads(_res) if _ret_code == 0 else {}
except Exception as e:
print(f"failed to get job info: {e}, ret-code:{_ret_code}, res:{_res}")
print(
f"failed to get job info[{version}]: {e}, ret-code:{_ret_code}, res:{_res}"
)
raise

def list(
Expand Down Expand Up @@ -43,7 +45,6 @@ def list(
_args.append("--show-removed")

_ret_code, _res = invoke(_args)
print(f"job info: code={_ret_code}, res={_res}")
return json.loads(_res.strip()) if _ret_code == 0 else []

def cancel(self, uri: str, force: bool = False) -> bool:
Expand Down
1 change: 1 addition & 0 deletions scripts/e2e_test/start_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ main() {
if ! in_github_action; then
trap exit_hook EXIT
publish_to_k8s
sleep 120
else
publish_to_mini_k8s
fi
Expand Down

0 comments on commit 94eeff7

Please sign in to comment.