Skip to content

Commit a1855aa

Browse files
authored
Merge pull request #15 from acm-uiuc/nvdocker_2
Nvdocker 2
2 parents fe737c6 + 8f0156b commit a1855aa

File tree

5 files changed

+281
-43
lines changed

5 files changed

+281
-43
lines changed

README.md

Lines changed: 0 additions & 4 deletions
This file was deleted.

README.rst

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
nvdocker
2+
========
3+
4+
nvdocker is library built on top of the docker-py python sdk to build
5+
and run docker containers using nvidia-docker.
6+
7+
Targets nvidia-docker2
8+
9+
Installation
10+
------------
11+
12+
- Install nvidia-docker
13+
14+
https://github.com/NVIDIA/nvidia-docker#quickstart
15+
16+
- Install nvdocker
17+
18+
::
19+
20+
pip install nvdocker
21+
22+
Usage
23+
-----
24+
25+
These variables are already set in NVIDIA's `official CUDA
26+
images <https://hub.docker.com/r/nvidia/cuda/>`__.
27+
28+
``visible_devices``
29+
~~~~~~~~~~~~~~~~~~~
30+
31+
This variable controls which GPUs will be made accessible inside the
32+
container.
33+
34+
- Possible values:
35+
36+
- ``0,1,2``, ``GPU-fef8089b`` …: a comma-separated list of GPU
37+
UUID(s) or index(es),
38+
- ``all``: all GPUs will be accessible, this is the default value in
39+
our container images,
40+
- ``none``: no GPU will be accessible, but driver capabilities will
41+
be enabled.
42+
- ``void`` or *empty* or *unset*: ``nvidia-container-runtime`` will
43+
have the same behavior as ``runc``.
44+
45+
``driver_capabilites``
46+
~~~~~~~~~~~~~~~~~~~~~~
47+
48+
This option controls which driver libraries/binaries will be mounted
49+
inside the container.
50+
51+
- Possible values
52+
53+
- ``compute,video,graphics,utility`` …: a comma-separated list of
54+
driver features the container needs,
55+
- ``all``: enable all available driver capabilities.
56+
- *empty* or *unset*: use default driver capability: ``utility``.
57+
58+
- Supported driver capabilities
59+
60+
- ``compute``: required for CUDA and OpenCL applications,
61+
- ``compat32``: required for running 32-bit applications,
62+
- ``graphics``: required for running OpenGL and Vulkan applications,
63+
- ``utility``: required for using ``nvidia-smi`` and NVML,
64+
- ``video``: required for using the Video Codec SDK.
65+
66+
``require``
67+
~~~~~~~~~~~~~
68+
69+
A logical expression to define constraints on the configurations
70+
supported by the container.
71+
72+
- Supported constraints
73+
74+
- ``cuda``: constraint on the CUDA driver version,
75+
- ``driver``: constraint on the driver version,
76+
- ``arch``: constraint on the compute architectures of the selected
77+
GPUs.
78+
79+
Expressions
80+
^^^^^^^^^^^
81+
82+
| Multiple constraints can be expressed in a single environment
83+
variable: space-separated constraints are ORed, comma-separated
84+
constraints are ANDed.
85+
| Multiple environment variables of the form ``rew`` are ANDed together.
86+
87+
88+
``cuda``
89+
^^^^^^^^^^^^^^^^^^^^^^^
90+
91+
The version of the CUDA toolkit used by the container. If the version of the NVIDIA driver is insufficient to run this
92+
version of CUDA, the container will not be started.
93+
94+
Possible values
95+
'''''''''''''''
96+
97+
- ``cuda>=7.5``, ``cuda>=8.0``, ``cuda>=9.0`` …: any valid CUDA version
98+
in the form ``major.minor``.
99+
100+
101+
``cuda_vesion``
102+
~~~~~~~~~~~~~~~~
103+
104+
| Similar to ``NVIDIA_REQUIRE_CUDA``, for legacy CUDA images.
105+
| In addition, if ``NVIDIA_REQUIRE_CUDA`` is not set,
106+
``NVIDIA_VISIBLE_DEVICES`` and ``NVIDIA_DRIVER_CAPABILITIES`` will
107+
default to ``all``.
108+
109+
``disable_require``
110+
^^^^^^^^^^^^^^^^^^^^^^^^^^
111+
112+
Single switch to disable all the constraints of the form
113+
114+
Copyright and License
115+
---------------------
116+
117+
This project is released under the `UIUC/NCSA
118+
License <https://github.com/acm-uiuc/nvdocker/blob/masterLICENSE>`__.
119+
120+
121+
``docker-py`` is licensed under the `Apache License
122+
2.0 <https://github.com/docker/docker-py/blob/master/LICENSE>`__.
123+
124+
125+
nvidia-docker and nvidia-container-runtime are licensed under the `BSD
126+
3-clause
127+
license <https://github.com/NVIDIA/nvidia-container-runtime/blob/master/LICENSE>`__.

nvdocker/nvdocker.py

Lines changed: 148 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,178 @@
1-
#Just testing the docker-py SDK
1+
2+
import os
3+
from subprocess import check_output
4+
import re
25
import docker
36

47
class NVDockerClient:
5-
client = None
6-
7-
gpu_devices = ['/dev/nvidiactl', '/dev/nvidia-uvm', '/dev/nvidia1', '/dev/nvidia0']
8-
nvidia_driver = 'nvidia-docker'
9-
volumes = {'nvidia_driver_387.12':{'bind':'/usr/local/nvidia', 'mode':'ro'},
10-
'/vault': {'bind':'/vault', 'mode':'rw'}}
11-
ports = {'8888/tcp':8890,
12-
'6006/tcp':6969}
13-
8+
149
def __init__(self):
15-
self.client = docker.from_env(version='auto')
10+
self.docker_client = docker.from_env(version="auto")
11+
12+
#TODO: Testing on MultiGPU
13+
def create_container(self, image, **kwargs):
14+
#defaults
15+
config = {
16+
"auto_remove":False,
17+
"detach":True
18+
}
19+
environment = {}
20+
for arg in kwargs:
21+
if arg == "driver_capabilities":
22+
environment["NVIDIA_DRIVER_CAPABILITIES"] = kwargs["driver_capabilities"]
23+
elif arg == "visible_devices" in kwargs:
24+
vis_devices = ""
25+
if type(kwargs["visible_devices"]) is list:
26+
if len(kwargs["visible_devices"]) == 1:
27+
vis_devices = str(kwargs["visible_devices"][0])
28+
else:
29+
for dev in kwargs["visible_devices"]:
30+
vis_devices += dev + ','
31+
vis_devices = vis_devices[:-1]
32+
elif type(kwargs["visible_devices"]) is str:
33+
vis_devices = kwargs["visible_device"]
34+
elif type(kwargs["visible_devices"]) is int:
35+
vis_devices = str(kwargs["visible_devices"])
36+
environment["NVIDIA_VISIBLE_DEVICES"] = vis_devices
37+
elif arg == "disable_require" in kwargs:
38+
environment["NVIDIA_DISABLE_REQUIRE"] = kwargs["disable_require"]
39+
elif arg == "require":
40+
if "cuda" in kwargs["require"]:
41+
environment["NVIDIA_REQUIRE_CUDA"] = kwargs["require"]["cuda"]
42+
if "driver" in kwargs["require"]:
43+
environment["NVIDIA_REQUIRE_DRIVER"] = kwargs["require"]["driver"]
44+
if "arch" in kwargs["require"]:
45+
environment["NVIDIA_REQUIRE_ARCH"] = kwargs["require"]["arch"]
46+
elif arg == "cuda_version":
47+
print("WARNING: the CUDA_VERSION enviorment variable is a legacy variable, consider moving to NVIDIA_REQUIRE_CUDA")
48+
environment["CUDA_VERSION"] = kwargs["cuda_version"]
49+
elif arg == "environment":
50+
if type(kwargs["environment"]) is dict:
51+
for k,v in kwargs["environment"]:
52+
environment[k] = v
53+
elif type(kwargs["environment"]) is list:
54+
for e in kwargs["environment"]:
55+
kv = e.split("=")
56+
assert(len(kv) == 2), "Does not follow the format SOMEVAR=xxx"
57+
environment[kv[0]] = kv[1]
58+
else:
59+
config[arg] = kwargs[arg]
60+
config["environment"] = environment
61+
config["runtime"] = "nvidia"
1662

17-
def create_container(self, cmd, image=None, is_gpu=False, ports=None, user=""):
18-
home_dir = "/vault/"
19-
if user != "":
20-
home_dir = home_dir + user
63+
c = self.docker_client.containers.run(image, "", **config)
64+
65+
return c
2166

22-
if ports is not None:
23-
self.ports['8888/tcp'] = ports[0]
24-
self.ports['6006/tcp'] = ports[1]
2567

26-
if is_gpu:
27-
c = self.client.containers.run(image, cmd, auto_remove=True, ports=self.ports, devices=self.gpu_devices, volume_driver=self.nvidia_driver, volumes=self.volumes, detach=True, working_dir=home_dir)
68+
def run(self, image, cmd="", **kwargs):
69+
#defaults
70+
config = {}
71+
environment = {}
72+
for arg in kwargs:
73+
if arg == "driver_capabilities":
74+
environment["NVIDIA_DRIVER_CAPABILITIES"] = kwargs["driver_capabilities"]
75+
elif arg == "visible_devices" in kwargs:
76+
vis_devices = ""
77+
if type(kwargs["visible_devices"]) is list:
78+
if len(kwargs["visible_devices"]) == 1:
79+
vis_devices = str(kwargs["visible_devices"][0])
80+
else:
81+
for dev in kwargs["visible_devices"]:
82+
vis_devices += dev + ','
83+
vis_devices = vis_devices[:-1]
84+
elif type(kwargs["visible_devices"]) is str:
85+
vis_devices = kwargs["visible_device"]
86+
elif type(kwargs["visible_devices"]) is int:
87+
vis_devices = str(kwargs["visible_devices"])
88+
environment["NVIDIA_VISIBLE_DEVICES"] = vis_devices
89+
elif arg == "disable_require" in kwargs:
90+
environment["NVIDIA_DISABLE_REQUIRE"] = kwargs["disable_require"]
91+
elif arg == "require":
92+
if "cuda" in kwargs["require"]:
93+
environment["NVIDIA_REQUIRE_CUDA"] = kwargs["require"]["cuda"]
94+
if "driver" in kwargs["require"]:
95+
environment["NVIDIA_REQUIRE_DRIVER"] = kwargs["require"]["driver"]
96+
if "arch" in kwargs["require"]:
97+
environment["NVIDIA_REQUIRE_ARCH"] = kwargs["require"]["arch"]
98+
elif arg == "cuda_version":
99+
print("WARNING: the CUDA_VERSION enviorment variable is a legacy variable, consider moving to NVIDIA_REQUIRE_CUDA")
100+
environment["CUDA_VERSION"] = kwargs["cuda_version"]
101+
elif arg == "environment":
102+
if type(kwargs["environment"]) is dict:
103+
for k,v in kwargs["environment"]:
104+
environment[k] = v
105+
elif type(kwargs["environment"]) is list:
106+
for e in kwargs["environment"]:
107+
kv = e.split("=")
108+
assert(len(kv) == 2), "Does not follow the format SOMEVAR=xxx"
109+
environment[kv[0]] = kv[1]
110+
else:
111+
config[arg] = kwargs[arg]
112+
config["environment"] = environment
113+
config["runtime"] = "nvidia"
114+
115+
c = self.docker_client.containers.run(image, cmd, **config)
116+
117+
if cmd == "":
118+
return c.id
28119
else:
29-
c = self.client.containers.run(image, cmd, auto_remove=True, detach=True, working_dir=home_dir)
30-
31-
return c.id
120+
return c
32121

33122
def build_image(self, path):
34-
img = self.client.images.build(path);
123+
img = self.docker_client.images.build(path);
35124
return img
36125

37126
def get_container_logs(self, cid):
38-
c = self.client.containers.get(cid)
127+
c = self.docker_client.containers.get(cid)
39128
return c.logs()
40129

41130
def get_all_container_ids(self):
42-
return self.client.containers.list()
131+
return self.docker_client.containers.list()
43132

44133
def stop_container(self, cid):
45-
c = self.client.containers.get(cid)
134+
c = self.docker_client.containers.get(cid)
46135
c.stop()
47136

48137
def start_container(self, cid):
49-
c = self.client.containers.get(cid)
138+
c = self.docker_client.containers.get(cid)
50139
c.start()
51140

52141
def start_all_containers(self):
53-
for c in self.client.containers.list():
142+
for c in self.docker_client.containers.list():
54143
c.start()
55144

56145
def stop_all_containers(self):
57-
for c in self.client.containers.list():
146+
for c in self.docker_client.containers.list():
58147
c.stop()
59148

60-
def run_cmd(self, cid, cmd):
61-
c = self.client.containers.get(cid)
149+
def exec_run(self, cid, cmd):
150+
c = self.docker_client.containers.get(cid)
62151
return c.exec_run(cmd)
63-
152+
153+
@staticmethod
154+
def list_gpus():
155+
output = check_output(["nvidia-smi", "-L"]).decode("utf-8")
156+
regex = re.compile(r"GPU (?P<id>\d+):")
157+
gpus = []
158+
for line in output.strip().split("\n"):
159+
m = regex.match(line)
160+
assert m, "unable to parse " + line
161+
gpus.append(int(m.group("id")))
162+
return gpus
163+
164+
@staticmethod
165+
def gpu_memory_usage():
166+
output = check_output(["nvidia-smi"]).decode("utf-8")
167+
smi_output = output[output.find("GPU Memory"):]
168+
rows = smi_output.split("\n")
169+
regex = re.compile(r"[|]\s+?(?P<id>\d+)\D+?(?P<pid>\d+).+[ ](?P<usage>\d+)MiB")
170+
usage = {gpu_id: 0 for gpu_id in NVDockerClient.list_gpus()}
171+
for row in smi_output.split("\n"):
172+
gpu = regex.search(row)
173+
if not gpu:
174+
continue
175+
id = int(gpu.group("id"))
176+
memory = int(gpu.group("usage"))
177+
usage[id] += memory
178+
return usage

setup.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
[metadata]
22
name = nvdocker
3-
version = 0.0.1
3+
version = 0.0.2a3
44
author = ACM@UIUC
55
author-email = acm@illinois.edu
66
summary = Python interface for NVIDIA Docker
77
description-file =
8-
README.md
8+
README.rst
99
home-page = https://github.com/acm-uiuc/nvdocker
1010
requires-dist = setuptools
1111
classifiers =

setup.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44

55
here = path.abspath(path.dirname(__file__))
66

7-
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
7+
with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
88
long_description = f.read()
99

1010
# Arguments marked as "Required" below must be included for upload to PyPI.
1111
# Fields marked as "Optional" may be commented out.
1212

1313
setup(
1414
name='nvdocker',
15-
version='0.0.1',
16-
description='nvdocker is library built on top of the docker-py python sdk to build and run docker containers using nvidia-docker.',
15+
version='0.0.2a4',
16+
description='nvdocker is library built on top of the docker-py python sdk to build and run docker containers using nvidia-docker. Targets nvidia-docker2',
1717
long_description=long_description,
1818
url='https://github.com/acm-uiuc/nvdocker',
1919
author='ACM@UIUC',
@@ -35,4 +35,4 @@
3535
keywords='docker nvidia-docker development containers frameworks',
3636
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
3737
install_requires=['docker'],
38-
)
38+
)

0 commit comments

Comments
 (0)