Skip to content

Commit

Permalink
Soufianej util 1.0 (#64)
Browse files Browse the repository at this point in the history
* Chi.Container 1.0 and added device res to Lease1.0

Implemented redesign of chi.container and added device reservations to
the lease module

* Fixed Backwards compat

* Chi.magic (incomplete)

* Fixed more backwards compatibility issues and finished implementation of chi.util

* Added next_free_timeslot implementation

* reformatting comments

* Improvements accross the board, and finalized features

* Improvements to stability and error reporting

---------

Co-authored-by: Mark Powers <markpowers@uchicago.edu>
  • Loading branch information
JOUNAIDSoufiane and Mark-Powers committed Sep 26, 2024
1 parent bd761cc commit c7a1b8f
Show file tree
Hide file tree
Showing 8 changed files with 484 additions and 91 deletions.
23 changes: 16 additions & 7 deletions chi/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
from typing import List, Tuple, Optional
from IPython.display import display, HTML

from zunclient.exceptions import NotFound

from .clients import zun
from .exception import ResourceError
from .exception import CHIValueError, ResourceError
from .network import bind_floating_ip, get_free_floating_ip, get_network_id

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -81,7 +83,7 @@ def __init__(self,
exposed_ports: List[str],
reservation_id: str = None,
start: bool = True,
start_timeout: int = None,
start_timeout: int = 0,
runtime: str = None):
self.name = name
self.image_ref = image_ref
Expand Down Expand Up @@ -133,8 +135,11 @@ def submit(self, wait_for_active: bool = True, wait_timeout: int = None,
if idempotent:
existing = get_container(self.name)
if existing:
self.__dict__.update(existing.__dict__)
return
if wait_for_active:
existing.wait(status="Running", timeout=wait_timeout)
if show:
existing.show(type=show, wait_for_active=wait_for_active)
return existing

container = create_container(
name=self.name,
Expand All @@ -152,7 +157,7 @@ def submit(self, wait_for_active: bool = True, wait_timeout: int = None,
else:
raise ResourceError("could not create container")

if wait_for_active:
if wait_for_active and self.status != "Running":
self.wait(status="Running", timeout=wait_timeout)

if show:
Expand Down Expand Up @@ -198,7 +203,7 @@ def show(self, type: str = "text", wait_for_active: bool = False):
type (str, optional): The type of display. Can be "text" or "widget". Defaults to "text".
wait_for_active (bool, optional): Whether to wait for the container to be in the "Running" state before displaying information. Defaults to False.
"""
if wait_for_active:
if wait_for_active and self.status != "Running":
self.wait(status="Running")

zun_container = get_container(self.id)
Expand Down Expand Up @@ -403,7 +408,10 @@ def get_container(name: str) -> Optional[Container]:
Returns:
Optional[Container]: The retrieved container object, or None if the container does not exist.
"""
zun_container = zun().containers.get(name)
try:
zun_container = zun().containers.get(name)
except NotFound:
return None
return Container.from_zun_container(zun_container)


Expand Down Expand Up @@ -519,6 +527,7 @@ def wait_for_active(container_ref: "str", timeout: int = (60 * 2)) -> "Container
def _wait_for_status(
container_ref: "str", status: "str", timeout: int = (60 * 2)
) -> "Container":
print(f"Waiting for container {container_ref} status to turn to Running. This can take a while depending on the image")
start_time = time.perf_counter()

while True:
Expand Down
52 changes: 45 additions & 7 deletions chi/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
DEFAULT_IMAGE_NAME = "CC-Ubuntu22.04"
DEFAULT_NODE_TYPE = "compute_skylake"
DEFAULT_AUTH_TYPE = "v3token"
DEFAULT_NETWORK = "sharednet1"
CONF_GROUP = "chi"
RESOURCE_API_URL = os.getenv("CHI_RESOURCE_API_URL", "https://api.chameleoncloud.org")

Expand Down Expand Up @@ -408,11 +409,27 @@ def choose_site() -> None:
global _sites
if not _sites:
_sites = list_sites()

use_site(list(_sites.keys())[0])

print("Please choose a site in the dropdown below")
site_dropdown = widgets.Dropdown(options=_sites.keys(), description="Select Site")
display(site_dropdown)
site_dropdown.observe(lambda change: use_site(change['new']), names='value')

site_dropdown = widgets.Dropdown(
options=_sites.keys(),
description="Select Site"
)

output = widgets.Output()

def on_change(change):
with output:
output.clear_output()
print(f"Selected site: {change['new']}")
use_site(change['new'])

site_dropdown.observe(on_change, names='value')

display(widgets.VBox([site_dropdown, output]))
else:
print("Choose site feature is only available in an ipynb environment.")

Expand Down Expand Up @@ -479,10 +496,31 @@ def choose_project() -> None:
Only works if running in a Ipynb notebook environment.
"""
if _is_ipynb():
project_dropdown = widgets.Dropdown(options=list_projects(), description="Select Project")
display(project_dropdown)
use_project(list_projects()[0])
project_dropdown.observe(lambda change: (use_project(change['new'])), names='value')
projects = list_projects()

project_dropdown = widgets.Dropdown(
options=projects,
description="Select Project"
)

output = widgets.Output()

def on_change(change):
with output:
output.clear_output()
print(f"Selected project: {change['new']}")
use_project(change['new'])

project_dropdown.observe(on_change, names='value')

# Use the first project as the default
use_project(projects[0])

# Display the initial selection
with output:
print(f"Initial project: {projects[0]}")

display(widgets.VBox([project_dropdown, output]))
else:
print("Choose project feature is only available in Jupyter notebook environment.")

Expand Down
52 changes: 36 additions & 16 deletions chi/hardware.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

LOG = logging.getLogger(__name__)

node_types = []

@dataclass
class Node:
"""
Expand Down Expand Up @@ -38,36 +40,41 @@ def next_free_timeslot(self) -> Tuple[datetime, Optional[datetime]]:
A tuple containing the start and end datetime of the next available timeslot.
If no timeslot is available, returns (end_datetime_of_last_allocation, None).
"""
raise NotImplementedError
def get_host_id(items, target_uid):
for item in items:
if item.get('uid') == target_uid:
return item['id']
return None

blazarclient = blazar()

# Get allocations for this specific host
allocations = blazarclient.allocation.get(resource_id=self.uid)
# Get allocation for this specific host
host_id = get_host_id(blazarclient.host.list(), self.uid)

# Sort allocations by start time
allocations.sort(key=lambda x: x['start_date'])
allocation = blazarclient.host.get_allocation(host_id)

now = datetime.now(timezone.utc)

if not allocations:
if not allocation:
return (now, None)

# Check if there's a free slot now
if datetime.fromisoformat(allocations[0]['start_date']) > now:
return (now, datetime.fromisoformat(allocations[0]['start_date']))
reservations = sorted(allocation['reservations'], key=lambda x: x['start_date'])

# Find the next free slot
for i in range(len(allocations) - 1):
current_end = datetime.fromisoformat(allocations[i]['end_date'])
next_start = datetime.fromisoformat(allocations[i+1]['start_date'])
def parse_datetime(dt_str: str) -> datetime:
dt = datetime.fromisoformat(dt_str)
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)

if parse_datetime(reservations[0]['start_date']) > now:
return (now, parse_datetime(reservations[0]['start_date']))

for i in range(len(reservations) - 1):
current_end = parse_datetime(reservations[i]['end_date'])
next_start = parse_datetime(reservations[i+1]['start_date'])

if current_end < next_start:
return (current_end, next_start)

# If no free slot found, return the end of the last allocation
last_end = datetime.fromisoformat(allocations[-1]['end_date'])
last_end = parse_datetime(reservations[-1]['end_date'])
return (last_end, None)


Expand Down Expand Up @@ -135,6 +142,8 @@ def get_nodes(
uid=node_data.get("uid"),
version=node_data.get("version"),
)
if node.type not in node_types:
node_types.append(node.type)

if isinstance(node.gpu, list):
gpu_filter = gpu is None or (node.gpu and gpu == bool(node.gpu[0]['gpu']))
Expand All @@ -146,4 +155,15 @@ def get_nodes(
if gpu_filter and cpu_filter:
nodes.append(node)

return nodes
return nodes

def get_node_types() -> List[str]:
"""
Retrieve a list of unique node types.
Returns:
List[str]: A list of unique node types.
"""
if len(node_types) < 1:
get_nodes()
return list(set(node_types))
108 changes: 89 additions & 19 deletions chi/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,105 @@
'list_images',
]

from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional

def get_image(ref):
"""Get an image by its ID or name.
from .clients import glance
from .exception import CHIValueError, ResourceError
from glanceclient.exc import NotFound

__all__ = [
'get_image',
'get_image_id',
'list_images',
]

@dataclass
class Image:
uuid: str
created_at: datetime
is_chameleon_supported: bool
name: str

@staticmethod
def from_glance_image(glance_image) -> 'Image':
"""Convert a glance image object to an Image object.
Args:
glance_image: The glance image object.
Returns:
Image: The Image object.
"""
if 'build-repo' in glance_image:
return Image(
uuid=glance_image.id,
created_at=glance_image.created_at,
is_chameleon_supported=(glance_image['build-repo'] == 'https://github.com/ChameleonCloud/cc-images'),
name=glance_image.name
)
else:
return Image(
uuid=glance_image.id,
created_at=glance_image.created_at,
is_chameleon_supported=False,
name=glance_image.name
)

def list_images(is_chameleon_supported: Optional[bool] = False) -> List[Image]:
"""List all images available at the current site, filtered by support status.
Args:
ref (str): The ID or name of the image.
is_chameleon_supported (bool, optional): Filter images by Chameleon support. Defaults to True.
Returns:
The image matching the ID or name.
List[Image]: A list of Image objects.
"""
if is_chameleon_supported:
glance_images = glance().images.list(filters={'build-repo': 'https://github.com/ChameleonCloud/cc-images'})
else:
glance_images = glance().images.list()
return [Image.from_glance_image(image) for image in glance_images]

def get_image(name: str) -> Image:
"""Get an image by its name.
Args:
name (str): The name of the image.
Returns:
Image: The Image object matching the name.
Raises:
NotFound: If the image could not be found.
CHIValueError: If no image is found with the given name.
ResourceError: If multiple images are found with the same name.
"""
glance_images = list(glance().images.list(filters={'name': name}))
if not glance_images:
raise CHIValueError(f'No images found matching name "{name}"')
elif len(glance_images) > 1:
raise ResourceError(f'Multiple images found matching name "{name}"')

return Image.from_glance_image(glance_images[0])

def get_image_name(id: str) -> str:
"""Look up an image's name from its ID.
Args:
id (str): The ID of the image.
Returns:
str: The name of the found image.
Raises:
CHIValueError: If the image could not be found.
"""
try:
return glance().images.get(ref)
image = glance().images.get(id)
return image.name
except NotFound:
return glance().images.get(get_image_id(ref))

raise CHIValueError(f'No image found with ID "{id}"')

def get_image_id(name):
"""Look up an image's ID from its name.
Expand All @@ -44,15 +125,4 @@ def get_image_id(name):
images = list(glance().images.list(filters={'name': name}))
if not images:
raise CHIValueError(f'No images found matching name "{name}"')
elif len(images) > 1:
raise ResourceError(f'Multiple images found matching name "{name}"')
return images[0].id


def list_images():
"""List all images under the current project.
Returns:
All images associated with the current project.
"""
return list(glance().images.list())
Loading

0 comments on commit c7a1b8f

Please sign in to comment.