diff --git a/python/ray/serve/BUILD b/python/ray/serve/BUILD index 915956c81fc63..897da97636c63 100644 --- a/python/ray/serve/BUILD +++ b/python/ray/serve/BUILD @@ -3,6 +3,7 @@ py_library( name = "serve_lib", srcs = glob(["**/*.py"], exclude=["tests/**/*.py"]), + visibility = ["//python/ray/serve:__subpackages__", "//release:__pkg__"], ) serve_tests_srcs = glob(["tests/**/*.py"]) diff --git a/release/serve_tests/workloads/multi_deployment_1k_noop_replica.py b/release/serve_tests/workloads/multi_deployment_1k_noop_replica.py index 37cde2e81466c..32c5468c60680 100644 --- a/release/serve_tests/workloads/multi_deployment_1k_noop_replica.py +++ b/release/serve_tests/workloads/multi_deployment_1k_noop_replica.py @@ -29,8 +29,8 @@ import math import os import random -import ray +import ray from ray import serve from ray.serve.utils import logger from serve_test_utils import ( @@ -48,8 +48,8 @@ from typing import Optional # Experiment configs -DEFAULT_SMOKE_TEST_NUM_REPLICA = 8 -DEFAULT_SMOKE_TEST_NUM_DEPLOYMENTS = 4 # 2 replicas each +DEFAULT_SMOKE_TEST_NUM_REPLICA = 4 +DEFAULT_SMOKE_TEST_NUM_DEPLOYMENTS = 4 # 1 replicas each # TODO:(jiaodong) We should investigate and change this back to 1k # for now, we won't get valid latency numbers from wrk at 1k replica @@ -144,7 +144,8 @@ def main(num_replicas: Optional[int], num_deployments: Optional[int], logger.info("Warming up cluster ....\n") rst_ray_refs = [] - for endpoint in serve.list_endpoints().keys(): + all_endpoints = list(serve.list_deployments().keys()) + for endpoint in all_endpoints: rst_ray_refs.append( warm_up_one_cluster.options(num_cpus=0.1).remote( 10, http_host, http_port, endpoint)) @@ -154,7 +155,6 @@ def main(num_replicas: Optional[int], num_deployments: Optional[int], logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n") # For detailed discussion, see https://github.com/wg/wrk/issues/205 # TODO:(jiaodong) What's the best number to use here ? - all_endpoints = list(serve.list_endpoints().keys()) all_metrics, all_wrk_stdout = run_wrk_on_all_nodes( trial_length, NUM_CONNECTIONS, @@ -176,3 +176,6 @@ def main(num_replicas: Optional[int], num_deployments: Optional[int], if __name__ == "__main__": main() + import pytest + import sys + sys.exit(pytest.main(["-v", "-s", __file__])) diff --git a/release/serve_tests/workloads/single_deployment_1k_noop_replica.py b/release/serve_tests/workloads/single_deployment_1k_noop_replica.py index 4aedd3fb7d631..e18d864335db2 100644 --- a/release/serve_tests/workloads/single_deployment_1k_noop_replica.py +++ b/release/serve_tests/workloads/single_deployment_1k_noop_replica.py @@ -46,7 +46,7 @@ from typing import Optional # Experiment configs -DEFAULT_SMOKE_TEST_NUM_REPLICA = 8 +DEFAULT_SMOKE_TEST_NUM_REPLICA = 4 DEFAULT_FULL_TEST_NUM_REPLICA = 1000 # Deployment configs @@ -120,7 +120,7 @@ def main(num_replicas: Optional[int], trial_length: Optional[str], logger.info(f"Starting wrk trial on all nodes for {trial_length} ....\n") # For detailed discussion, see https://github.com/wg/wrk/issues/205 # TODO:(jiaodong) What's the best number to use here ? - all_endpoints = list(serve.list_endpoints().keys()) + all_endpoints = list(serve.list_deployments().keys()) all_metrics, all_wrk_stdout = run_wrk_on_all_nodes( trial_length, NUM_CONNECTIONS, @@ -142,3 +142,6 @@ def main(num_replicas: Optional[int], trial_length: Optional[str], if __name__ == "__main__": main() + import pytest + import sys + sys.exit(pytest.main(["-v", "-s", __file__]))