Merge pull request #15 from acm-uiuc/nvdocker_2

narendasan · web-flow · commit a1855aa076cb · 2018-09-06T23:10:04.000-07:00
Nvdocker 2
diff --git a/README.md b/README.md
diff --git a/README.rst b/README.rst
@@ -0,0 +1,127 @@
+nvdocker
+========
+
+nvdocker is library built on top of the docker-py python sdk to build
+and run docker containers using nvidia-docker.
+
+Targets nvidia-docker2
+
+Installation
+------------
+
+-  Install nvidia-docker
+
+    https://github.com/NVIDIA/nvidia-docker#quickstart
+
+-  Install nvdocker
+
+::
+
+    pip install nvdocker
+
+Usage
+-----
+
+These variables are already set in NVIDIA's `official CUDA
+images <https://hub.docker.com/r/nvidia/cuda/>`__.
+
+``visible_devices``
+~~~~~~~~~~~~~~~~~~~
+
+This variable controls which GPUs will be made accessible inside the
+container.
+
+-  Possible values:
+
+   -  ``0,1,2``, ``GPU-fef8089b`` …: a comma-separated list of GPU
+      UUID(s) or index(es),
+   -  ``all``: all GPUs will be accessible, this is the default value in
+      our container images,
+   -  ``none``: no GPU will be accessible, but driver capabilities will
+      be enabled.
+   -  ``void`` or *empty* or *unset*: ``nvidia-container-runtime`` will
+      have the same behavior as ``runc``.
+
+``driver_capabilites``
+~~~~~~~~~~~~~~~~~~~~~~
+
+This option controls which driver libraries/binaries will be mounted
+inside the container.
+
+-  Possible values
+
+   -  ``compute,video,graphics,utility`` …: a comma-separated list of
+      driver features the container needs,
+   -  ``all``: enable all available driver capabilities.
+   -  *empty* or *unset*: use default driver capability: ``utility``.
+
+-  Supported driver capabilities
+
+   -  ``compute``: required for CUDA and OpenCL applications,
+   -  ``compat32``: required for running 32-bit applications,
+   -  ``graphics``: required for running OpenGL and Vulkan applications,
+   -  ``utility``: required for using ``nvidia-smi`` and NVML,
+   -  ``video``: required for using the Video Codec SDK.
+
+``require``
+~~~~~~~~~~~~~
+
+A logical expression to define constraints on the configurations
+supported by the container.
+
+-  Supported constraints
+
+   -  ``cuda``: constraint on the CUDA driver version,
+   -  ``driver``: constraint on the driver version,
+   -  ``arch``: constraint on the compute architectures of the selected
+      GPUs.
+
+Expressions
+^^^^^^^^^^^
+
+| Multiple constraints can be expressed in a single environment
+  variable: space-separated constraints are ORed, comma-separated
+  constraints are ANDed.
+| Multiple environment variables of the form ``rew`` are ANDed together.
+
+
+``cuda``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The version of the CUDA toolkit used by the container. If the version of the NVIDIA driver is insufficient to run this
+version of CUDA, the container will not be started.
+
+Possible values
+'''''''''''''''
+
+-  ``cuda>=7.5``, ``cuda>=8.0``, ``cuda>=9.0`` …: any valid CUDA version
+   in the form ``major.minor``.
+
+
+``cuda_vesion``
+~~~~~~~~~~~~~~~~
+
+| Similar to ``NVIDIA_REQUIRE_CUDA``, for legacy CUDA images.
+| In addition, if ``NVIDIA_REQUIRE_CUDA`` is not set,
+  ``NVIDIA_VISIBLE_DEVICES`` and ``NVIDIA_DRIVER_CAPABILITIES`` will
+  default to ``all``.
+
+``disable_require``
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Single switch to disable all the constraints of the form
+
+Copyright and License
+---------------------
+
+This project is released under the `UIUC/NCSA
+License <https://github.com/acm-uiuc/nvdocker/blob/masterLICENSE>`__.
+
+
+``docker-py`` is licensed under the `Apache License
+2.0 <https://github.com/docker/docker-py/blob/master/LICENSE>`__.
+
+
+nvidia-docker and nvidia-container-runtime are licensed under the `BSD
+3-clause
+license <https://github.com/NVIDIA/nvidia-container-runtime/blob/master/LICENSE>`__.
diff --git a/nvdocker/nvdocker.py b/nvdocker/nvdocker.py
@@ -1,63 +1,178 @@
-#Just testing the docker-py SDK
+
+import os 
+from subprocess import check_output
+import re
 import docker
 
 class NVDockerClient:
-    client = None
-
-    gpu_devices = ['/dev/nvidiactl', '/dev/nvidia-uvm', '/dev/nvidia1', '/dev/nvidia0']
-    nvidia_driver = 'nvidia-docker'
-    volumes = {'nvidia_driver_387.12':{'bind':'/usr/local/nvidia', 'mode':'ro'},
-               '/vault':              {'bind':'/vault', 'mode':'rw'}}
-    ports = {'8888/tcp':8890,
-             '6006/tcp':6969}
-    
+
     def __init__(self):
-        self.client = docker.from_env(version='auto')
+        self.docker_client = docker.from_env(version="auto")
+
+    #TODO: Testing on MultiGPU
+    def create_container(self, image, **kwargs):
+        #defaults
+        config = {
+            "auto_remove":False,
+            "detach":True
+        }
+        environment = {}
+        for arg in kwargs:
+            if arg == "driver_capabilities":
+                environment["NVIDIA_DRIVER_CAPABILITIES"] = kwargs["driver_capabilities"]
+            elif arg == "visible_devices" in kwargs:
+                vis_devices = ""
+                if type(kwargs["visible_devices"]) is list:
+                    if len(kwargs["visible_devices"]) == 1:
+                        vis_devices = str(kwargs["visible_devices"][0])
+                    else:
+                        for dev in kwargs["visible_devices"]:
+                            vis_devices += dev + ','
+                        vis_devices = vis_devices[:-1]
+                elif type(kwargs["visible_devices"]) is str:
+                    vis_devices = kwargs["visible_device"]
+                elif type(kwargs["visible_devices"]) is int:
+                    vis_devices = str(kwargs["visible_devices"])
+                environment["NVIDIA_VISIBLE_DEVICES"] = vis_devices
+            elif arg == "disable_require" in kwargs:
+                environment["NVIDIA_DISABLE_REQUIRE"] = kwargs["disable_require"]
+            elif arg == "require":
+                if "cuda" in kwargs["require"]:
+                    environment["NVIDIA_REQUIRE_CUDA"] = kwargs["require"]["cuda"]
+                if "driver" in kwargs["require"]:
+                    environment["NVIDIA_REQUIRE_DRIVER"] = kwargs["require"]["driver"]
+                if "arch" in kwargs["require"]:
+                    environment["NVIDIA_REQUIRE_ARCH"] = kwargs["require"]["arch"]
+            elif arg == "cuda_version":
+                print("WARNING: the CUDA_VERSION enviorment variable is a legacy variable, consider moving to NVIDIA_REQUIRE_CUDA")
+                environment["CUDA_VERSION"] = kwargs["cuda_version"]
+            elif arg == "environment":
+                if type(kwargs["environment"]) is dict:
+                    for k,v in kwargs["environment"]:
+                        environment[k] = v
+                elif type(kwargs["environment"]) is list:
+                    for e in kwargs["environment"]:
+                        kv = e.split("=")
+                        assert(len(kv) == 2), "Does not follow the format SOMEVAR=xxx"
+                        environment[kv[0]] = kv[1]
+            else:
+                config[arg] = kwargs[arg]
+        config["environment"] = environment
+        config["runtime"] = "nvidia"
         
-    def create_container(self, cmd, image=None, is_gpu=False, ports=None, user=""):
-        home_dir = "/vault/"
-        if user != "":
-            home_dir = home_dir + user
+        c = self.docker_client.containers.run(image, "", **config)
+
+        return c
 
-        if ports is not None:
-            self.ports['8888/tcp'] = ports[0]
-            self.ports['6006/tcp'] = ports[1]
 
-        if is_gpu:
-            c = self.client.containers.run(image, cmd, auto_remove=True, ports=self.ports, devices=self.gpu_devices, volume_driver=self.nvidia_driver, volumes=self.volumes, detach=True, working_dir=home_dir)
+    def run(self, image, cmd="", **kwargs):
+        #defaults
+        config = {}
+        environment = {}
+        for arg in kwargs:
+            if arg == "driver_capabilities":
+                environment["NVIDIA_DRIVER_CAPABILITIES"] = kwargs["driver_capabilities"]
+            elif arg == "visible_devices" in kwargs:
+                vis_devices = ""
+                if type(kwargs["visible_devices"]) is list:
+                    if len(kwargs["visible_devices"]) == 1:
+                        vis_devices = str(kwargs["visible_devices"][0])
+                    else:
+                        for dev in kwargs["visible_devices"]:
+                            vis_devices += dev + ','
+                        vis_devices = vis_devices[:-1]
+                elif type(kwargs["visible_devices"]) is str:
+                    vis_devices = kwargs["visible_device"]
+                elif type(kwargs["visible_devices"]) is int:
+                    vis_devices = str(kwargs["visible_devices"])
+                environment["NVIDIA_VISIBLE_DEVICES"] = vis_devices
+            elif arg == "disable_require" in kwargs:
+                environment["NVIDIA_DISABLE_REQUIRE"] = kwargs["disable_require"]
+            elif arg == "require":
+                if "cuda" in kwargs["require"]:
+                    environment["NVIDIA_REQUIRE_CUDA"] = kwargs["require"]["cuda"]
+                if "driver" in kwargs["require"]:
+                    environment["NVIDIA_REQUIRE_DRIVER"] = kwargs["require"]["driver"]
+                if "arch" in kwargs["require"]:
+                    environment["NVIDIA_REQUIRE_ARCH"] = kwargs["require"]["arch"]
+            elif arg == "cuda_version":
+                print("WARNING: the CUDA_VERSION enviorment variable is a legacy variable, consider moving to NVIDIA_REQUIRE_CUDA")
+                environment["CUDA_VERSION"] = kwargs["cuda_version"]
+            elif arg == "environment":
+                if type(kwargs["environment"]) is dict:
+                    for k,v in kwargs["environment"]:
+                        environment[k] = v
+                elif type(kwargs["environment"]) is list:
+                    for e in kwargs["environment"]:
+                        kv = e.split("=")
+                        assert(len(kv) == 2), "Does not follow the format SOMEVAR=xxx"
+                        environment[kv[0]] = kv[1]
+            else:
+                config[arg] = kwargs[arg]
+        config["environment"] = environment
+        config["runtime"] = "nvidia"
+
+        c = self.docker_client.containers.run(image, cmd, **config)
+
+        if cmd == "":
+            return c.id
         else:
-            c = self.client.containers.run(image, cmd, auto_remove=True, detach=True, working_dir=home_dir)
-            
-        return c.id
+            return c
 
     def build_image(self, path):
-        img = self.client.images.build(path);
+        img = self.docker_client.images.build(path);
         return img
         
     def get_container_logs(self, cid):
-        c = self.client.containers.get(cid)
+        c = self.docker_client.containers.get(cid)
         return c.logs()
 
     def get_all_container_ids(self):
-        return self.client.containers.list()
+        return self.docker_client.containers.list()
     
     def stop_container(self, cid):
-        c = self.client.containers.get(cid)
+        c = self.docker_client.containers.get(cid)
         c.stop()
 
     def start_container(self, cid):
-        c = self.client.containers.get(cid)
+        c = self.docker_client.containers.get(cid)
         c.start()
     
     def start_all_containers(self):
-        for c in self.client.containers.list():
+        for c in self.docker_client.containers.list():
             c.start()
         
     def stop_all_containers(self):    
-        for c in self.client.containers.list():
+        for c in self.docker_client.containers.list():
             c.stop()
 
-    def run_cmd(self, cid, cmd):
-        c = self.client.containers.get(cid)
+    def exec_run(self, cid, cmd):
+        c = self.docker_client.containers.get(cid)
         return c.exec_run(cmd)
-    
+
+    @staticmethod
+    def list_gpus():
+        output = check_output(["nvidia-smi", "-L"]).decode("utf-8") 
+        regex = re.compile(r"GPU (?P<id>\d+):")
+        gpus = []
+        for line in output.strip().split("\n"):
+            m = regex.match(line)
+            assert m, "unable to parse " + line
+            gpus.append(int(m.group("id")))
+        return gpus
+
+    @staticmethod
+    def gpu_memory_usage():
+        output = check_output(["nvidia-smi"]).decode("utf-8")
+        smi_output = output[output.find("GPU Memory"):]
+        rows = smi_output.split("\n")
+        regex = re.compile(r"[|]\s+?(?P<id>\d+)\D+?(?P<pid>\d+).+[ ](?P<usage>\d+)MiB")
+        usage = {gpu_id: 0 for gpu_id in NVDockerClient.list_gpus()}
+        for row in smi_output.split("\n"):
+            gpu = regex.search(row)
+            if not gpu:
+                continue
+            id = int(gpu.group("id"))
+            memory = int(gpu.group("usage"))
+            usage[id] += memory
+        return usage
diff --git a/setup.cfg b/setup.cfg
@@ -1,11 +1,11 @@
 [metadata]
 name = nvdocker
-version = 0.0.1
+version = 0.0.2a3
 author = ACM@UIUC
 author-email = acm@illinois.edu
 summary = Python interface for NVIDIA Docker 
 description-file = 
-    README.md
+    README.rst
 home-page = https://github.com/acm-uiuc/nvdocker
 requires-dist = setuptools
 classifiers = 
diff --git a/setup.py b/setup.py
@@ -4,16 +4,16 @@
 
 here = path.abspath(path.dirname(__file__))
 
-with open(path.join(here, 'README.md'), encoding='utf-8') as f:
+with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
     long_description = f.read()
 
 # Arguments marked as "Required" below must be included for upload to PyPI.
 # Fields marked as "Optional" may be commented out.
 
 setup(
     name='nvdocker',
-    version='0.0.1',  
-    description='nvdocker is library built on top of the docker-py python sdk to build and run docker containers using nvidia-docker.', 
+    version='0.0.2a4',  
+    description='nvdocker is library built on top of the docker-py python sdk to build and run docker containers using nvidia-docker. Targets nvidia-docker2', 
     long_description=long_description, 
     url='https://github.com/acm-uiuc/nvdocker', 
     author='ACM@UIUC',
@@ -35,4 +35,4 @@
     keywords='docker nvidia-docker development containers frameworks',
     packages=find_packages(exclude=['contrib', 'docs', 'tests']),
     install_requires=['docker'],
-)
+)