Skip to content
This repository was archived by the owner on Aug 3, 2025. It is now read-only.

GPU status endpoints #16

Open
wants to merge 19 commits into
base: gpu_cluster_grows_up
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Created new branch off of multi-node and copy pasted code from hackil…
…linois branches
  • Loading branch information
JeffreyZh4ng committed Feb 24, 2019
commit 8150eef00a90fff3136e5b0c5f03e38bd040a2e3
37 changes: 27 additions & 10 deletions gpu_cluster/controllers/cpu_container_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,60 @@
from .container_controller import ContainerController
import docker


class CPUContainerController(ContainerController):

def __init__(self, config):
super().__init__(config)
self.client = docker.from_env(version='auto')

def create_container(self, image, user="", token_required=False, budget=-1):
uport = self.get_port()
mport = self.get_port()
while uport == mport:
mport = self.get_port()

ports = {'8888/tcp':uport,
'6006/tcp':mport}
ports = {'8888/tcp': uport,
'6006/tcp': mport}

print(image)
c_id = self.client.containers.run(image, "", auto_remove=True, detach=True, ports=ports).id
container_list = self.client.containers.list(filters={'name': image})
print(image + "test")

if container_list:
c_id = self.client.containers.run(image, "", auto_remove=True, detach=True, ports=ports).id

else:
# Add a client.images.search to check if the path to the container exists on docker hub. If not, error out
# docker_result = self.client.images.search(image)
# print(docker_result)
image_tag = image.split(':')
docker_image = self.client.images.pull('illiyan/test_container', 'latest')

# If pull returns more than one image, get the first one in the list
if hasattr(docker_image, '__len__'):
docker_image = docker_image[0]

# Do you have to build the image after you pull it from Docker Hub?
c_id = self.client.containers.run(docker_image, '', auto_remove=True, detach=True, ports=ports).id
print(c_id)

uurl = ""
murl = ""
if token_required:
if token_required:
c = self.client.containers.get(c_id)
token = c.exec_run('python3 /opt/cluster-container/jupyter_get.py')
uurl = "http://localhost:{}/?token={}".format(uport, token.decode("utf-8") )
uurl = "http://localhost:{}/?token={}".format(uport, token.decode("utf-8"))
murl = "http://localhost:" + str(mport)
else:
uurl = "http://localhost:" + str(uport)
murl = "http://localhost:" + str(mport)
print(image)

#TODO insert budget
db_session.add(Instance(c_id, uport, mport, uurl, murl, user, budget, token))
# TODO insert budget
db_session.add(Instance(c_id, uport, mport, uurl, murl, user, budget, token))
db_session.commit()
return c_id, uurl, murl

def kill_container(self, c_id):
c = self.client.containers.get(c_id)
c.stop()

66 changes: 40 additions & 26 deletions gpu_cluster/controllers/gpu_container_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,33 @@
from .container_controller import ContainerController
from nvdocker import NVDockerClient


class GPUContainerController(ContainerController):

def __init__(self, config):
super().__init__(config)
self.docker_client = NVDockerClient()

def create_container(self, image, user="", token_required=False, budget=-1, num_gpus=1):
if NVDockerClient.least_used_gpu() == None :
#TODO Add handle multi node functionality here
pass


def create_container(image, user="", token_required=False, budget=-1, num_gpus=1):
# Get 2 open ports for UI and Monitor
uport = super().get_port()
mport = super().get_port()
uport = self.get_port()
mport = self.get_port()
while uport == mport:
mport = super().get_port()
mport = self.get_port()

# Get select a gpu(s) that are least in use
num_available_gpus = len(NVDockerClient.gpu_info())
num_available_gpus = len(docker_client.list_gpus())
if num_gpus > num_available_gpus:
num_gpus = num_available_gpus

gpus = []
for g in range(num_gpus):
if NVDockerClient.gpu_memory_usage(g)["free_mb"] > 0:
gpus.append(g)

# Assemble config for container
memory_usage = docker_client.gpu_memory_usage()
for g in num_gpus:
for gpu, used in memory_usage.items():
if used < memory_usage[gpu[-1]]:
gpus.append(gpu)

# Assemble config for container
container_config = {
"ports": {
'8888/tcp': uport,
Expand All @@ -42,26 +41,41 @@ def create_container(self, image, user="", token_required=False, budget=-1, num_
"auto_remove": True
}

#create container
c_id = self.docker_client.create_container(image, **container_config).id
# create container
container_list = docker_client.docker_image_list(filters={'name': image})
if container_list:
c_id = docker_client.run(image, '', container_config).id

else:
# Add a client.images.search to check if the path to the container exists on docker hub. If not, error out
docker_response = docker_client.docker_image_search(image)
print(docker_response)
docker_image = docker_client.docker_image_pull(image)

# If pull returns more than one image, get the first one in the list
if hasattr(docker_image, '__len__'):
docker_image = docker_image[0]

#assemble endpoints for UI, monitor and get the access token if needed
# Do you have to build the image after you pull it from Docker Hub?
c_id = docker_client.create_container(docker_image, '', container_config).id

# assemble endpoints for UI, monitor and get the access token if needed
uurl = ""
murl = ""
token = ""
if token_required:
token = self.docker_client.exec_run(c_id, 'python3 /opt/cluster-container/jupyter_get.py')
uurl = "http://vault.acm.illinois.edu:{}/?token={}".format(uport, token.decode("utf-8") )
if token_required:
token = docker_client.exec_run(c_id, 'python3 /opt/cluster-container/jupyter_get.py')
uurl = "http://vault.acm.illinois.edu:{}/?token={}".format(uport, token.decode("utf-8"))
murl = "http://vault.acm.illinois.edu:" + str(mport)
else:
uurl = "http://vault.acm.illinois.edu:" + str(uport)
murl = "http://vault.acm.illinois.edu:" + str(mport)
#TODO insert budget
budget = -1
db_session.add(Instance(c_id, uport, mport, uurl, murl, user, budget, token))

# TODO insert budget
budget = -1
db_session.add(Instance(c_id, uport, mport, uurl, murl, user, budget, token))
db_session.commit()
return c_id, uurl, murl

def kill_container(self, c_id):
self.docker_client.stop_container(c_id)
self.docker_client.stop_container(c_id)