forked from ray-project/ray
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Dashboard] Add API support for the logical view and machine view in …
…new backend (ray-project#11012) * Add API support for the logical view and machine view, which lean on datacenter in common. * Update dashboard/datacenter.py Co-authored-by: fyrestone <fyrestone@outlook.com> * Update dashboard/modules/logical_view/logical_view_head.py Co-authored-by: fyrestone <fyrestone@outlook.com> * Address PR comments * lint * Add dashboard tests to CI build * Fix integration issues * lint Co-authored-by: Max Fitton <max@semprehealth.com> Co-authored-by: fyrestone <fyrestone@outlook.com>
- Loading branch information
1 parent
ee71fec
commit ff6d412
Showing
10 changed files
with
460 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import logging | ||
import aiohttp.web | ||
import ray.utils | ||
import ray.new_dashboard.utils as dashboard_utils | ||
import ray.new_dashboard.actor_utils as actor_utils | ||
from ray.new_dashboard.utils import rest_response | ||
from ray.new_dashboard.datacenter import DataOrganizer | ||
from ray.core.generated import core_worker_pb2 | ||
from ray.core.generated import core_worker_pb2_grpc | ||
|
||
from grpc.experimental import aio as aiogrpc | ||
|
||
logger = logging.getLogger(__name__) | ||
routes = dashboard_utils.ClassMethodRouteTable | ||
|
||
|
||
class LogicalViewHead(dashboard_utils.DashboardHeadModule): | ||
@routes.get("/logical/actor_groups") | ||
async def get_actor_groups(self, req) -> aiohttp.web.Response: | ||
actors = await DataOrganizer.get_all_actors() | ||
actor_creation_tasks = await DataOrganizer.get_actor_creation_tasks() | ||
# actor_creation_tasks have some common interface with actors, | ||
# and they get processed and shown in tandem in the logical view | ||
# hence we merge them together before constructing actor groups. | ||
actors.update(actor_creation_tasks) | ||
actor_groups = actor_utils.construct_actor_groups(actors) | ||
return await rest_response( | ||
success=True, | ||
message="Fetched actor groups.", | ||
actor_groups=actor_groups) | ||
|
||
@routes.get("/logical/kill_actor") | ||
async def kill_actor(self, req) -> aiohttp.web.Response: | ||
try: | ||
actor_id = req.query["actorId"] | ||
ip_address = req.query["ipAddress"] | ||
port = req.query["port"] | ||
except KeyError: | ||
return await rest_response(success=False, message="Bad Request") | ||
try: | ||
channel = aiogrpc.insecure_channel(f"{ip_address}:{port}") | ||
stub = core_worker_pb2_grpc.CoreWorkerServiceStub(channel) | ||
|
||
await stub.KillActor( | ||
core_worker_pb2.KillActorRequest( | ||
intended_actor_id=ray.utils.hex_to_binary(actor_id))) | ||
|
||
except aiogrpc.AioRpcError: | ||
# This always throws an exception because the worker | ||
# is killed and the channel is closed on the worker side | ||
# before this handler, however it deletes the actor correctly. | ||
pass | ||
|
||
return await rest_response( | ||
success=True, message=f"Killed actor with id {actor_id}") | ||
|
||
async def run(self, server): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import os | ||
import sys | ||
import logging | ||
import requests | ||
import time | ||
import traceback | ||
import pytest | ||
import ray | ||
from ray.new_dashboard.tests.conftest import * # noqa | ||
from ray.test_utils import ( | ||
format_web_url, | ||
wait_until_server_available, | ||
) | ||
|
||
os.environ["RAY_USE_NEW_DASHBOARD"] = "1" | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def test_actor_groups(ray_start_with_dashboard): | ||
@ray.remote | ||
class Foo: | ||
def __init__(self, num): | ||
self.num = num | ||
|
||
def do_task(self): | ||
return self.num | ||
|
||
@ray.remote(num_gpus=1) | ||
class InfeasibleActor: | ||
pass | ||
|
||
foo_actors = [Foo.remote(4), Foo.remote(5)] | ||
infeasible_actor = InfeasibleActor.remote() # noqa | ||
results = [actor.do_task.remote() for actor in foo_actors] # noqa | ||
assert (wait_until_server_available(ray_start_with_dashboard["webui_url"]) | ||
is True) | ||
webui_url = ray_start_with_dashboard["webui_url"] | ||
webui_url = format_web_url(webui_url) | ||
|
||
timeout_seconds = 5 | ||
start_time = time.time() | ||
last_ex = None | ||
while True: | ||
time.sleep(1) | ||
try: | ||
response = requests.get(webui_url + "/logical/actor_groups") | ||
response.raise_for_status() | ||
actor_groups_resp = response.json() | ||
assert actor_groups_resp["result"] is True, actor_groups_resp[ | ||
"msg"] | ||
actor_groups = actor_groups_resp["data"]["actorGroups"] | ||
assert "Foo" in actor_groups | ||
summary = actor_groups["Foo"]["summary"] | ||
# 2 __init__ tasks and 2 do_task tasks | ||
assert summary["numExecutedTasks"] == 4 | ||
assert summary["stateToCount"]["ALIVE"] == 2 | ||
|
||
entries = actor_groups["Foo"]["entries"] | ||
assert len(entries) == 2 | ||
assert "InfeasibleActor" in actor_groups | ||
|
||
entries = actor_groups["InfeasibleActor"]["entries"] | ||
assert "requiredResources" in entries[0] | ||
assert "GPU" in entries[0]["requiredResources"] | ||
break | ||
except Exception as ex: | ||
last_ex = ex | ||
finally: | ||
if time.time() > start_time + timeout_seconds: | ||
ex_stack = traceback.format_exception( | ||
type(last_ex), last_ex, | ||
last_ex.__traceback__) if last_ex else [] | ||
ex_stack = "".join(ex_stack) | ||
raise Exception(f"Timed out while testing, {ex_stack}") | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(pytest.main(["-v", __file__])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
NODE_STATS_UPDATE_INTERVAL_SECONDS = 1 | ||
RETRY_GET_ALL_ACTOR_INFO_INTERVAL_SECONDS = 1 | ||
ACTOR_CHANNEL = "ACTOR" | ||
ERROR_INFO_UPDATE_INTERVAL_SECONDS = 5 | ||
LOG_INFO_UPDATE_INTERVAL_SECONDS = 5 |
Oops, something went wrong.