1
- #Just testing the docker-py SDK
1
+
2
+ import os
3
+ from subprocess import check_output
4
+ import re
2
5
import docker
3
6
4
7
class NVDockerClient :
5
8
6
- def __init__ (self , config ):
9
+ def __init__ (self ):
7
10
self .docker_client = docker .from_env (version = "auto" )
8
- self .gpu_devices = None
9
- if "gpu_devices" in config :
10
- self .gpu_devices = config ["gpu_devices" ]
11
11
12
12
#TODO: Testing on MultiGPU
13
- def create_container (self , image , config = {}):
14
- volumes = None
15
- if "volumes" in config :
16
- volumes = config ["volumes" ]
17
- ports = None
18
- if "ports" in config :
19
- ports = config ["ports" ]
20
- workdir = None
21
- if "workdir" in config :
22
- home_dir = config ["workdir" ]
23
- attached_devices = self .gpu_devices
24
- if "attached_devices" in config :
25
- attached_devices = config ["attached_devices" ]
26
- auto_remove = True
27
- if "auto_remove" in config :
28
- auto_remove = config ["auto_remove" ]
29
- detach = True
30
- if "detach" in config :
31
- detach = config ["detach" ]
13
+ def create_container (self , image , ** kwargs ):
14
+ #defaults
15
+ config = {
16
+ "auto_remove" :False ,
17
+ "detach" :True
18
+ }
19
+ environment = {}
20
+ for arg in kwargs :
21
+ if arg == "driver_capabilities" :
22
+ environment ["NVIDIA_DRIVER_CAPABILITIES" ] = kwargs ["driver_capabilities" ]
23
+ elif arg == "visible_devices" in kwargs :
24
+ environment ["NVIDIA_VISIBLE_DEVICES" ] = kwargs ["visible_devices" ]
25
+ elif arg == "disable_require" in kwargs :
26
+ environment ["NVIDIA_DISABLE_REQUIRE" ] = kwargs ["disable_require" ]
27
+ elif arg == "require" :
28
+ if "cuda" in kwargs ["require" ]:
29
+ environment ["NVIDIA_REQUIRE_CUDA" ] = kwargs ["require" ]["cuda" ]
30
+ if "driver" in kwargs ["require" ]:
31
+ environment ["NVIDIA_REQUIRE_DRIVER" ] = kwargs ["require" ]["driver" ]
32
+ if "arch" in kwargs ["require" ]:
33
+ environment ["NVIDIA_REQUIRE_ARCH" ] = kwargs ["require" ]["arch" ]
34
+ elif arg == "cuda_version" :
35
+ print ("WARNING: the CUDA_VERSION enviorment variable is a legacy variable, consider moving to NVIDIA_REQUIRE_CUDA" )
36
+ environment ["CUDA_VERSION" ] = kwargs ["cuda_version" ]
37
+ elif arg == "environment" :
38
+ if type (kwargs ["environment" ]) is dict :
39
+ for k ,v in kwargs ["environment" ]:
40
+ environment [k ] = v
41
+ elif type (kwargs ["environment" ]) is list :
42
+ for e in kwargs ["environment" ]:
43
+ kv = e .split ("=" )
44
+ assert (len (kv ) == 2 ), "Does not follow the format SOMEVAR=xxx"
45
+ environment [kv [0 ]] = kv [1 ]
46
+ else :
47
+ config [arg ] = kwargs [arg ]
48
+ config ["environment" ] = environment
49
+ config ["runtime" ] = "nvidia"
50
+ print (config )
32
51
33
- c = self .docker_client .containers .run (image , "" , auto_remove = auto_remove , ports = ports , devices = attached_devices , volumes = volumes , detach = detach , working_dir = workdir )
34
- return c .id
52
+ c = self .docker_client .containers .run (image , "" , ** config )
35
53
54
+ return c
36
55
37
- def run (self , image , cmd = "" , config = {}):
38
- volumes = None
39
- if "volumes" in config :
40
- volumes = config ["volumes" ]
41
- ports = None
42
- if "ports" in config :
43
- ports = config ["ports" ]
44
- workdir = None
45
- if "workdir" in config :
46
- home_dir = config ["workdir" ]
47
- attached_devices = self .gpu_devices
48
- if "attached_devices" in config :
49
- attached_devices = config ["attached_devices" ]
50
- auto_remove = True
51
- if "auto_remove" in config :
52
- auto_remove = config ["auto_remove" ]
53
- detach = True
54
- if "detach" in config :
55
- detach = config ["detach" ]
56
-
57
- c = self .docker_client .containers .run (image , cmd , auto_remove = auto_remove , ports = ports , devices = attached_devices , volumes = volumes , detach = detach , working_dir = workdir )
58
- if cmd = "" :
56
+
57
+ def run (self , image , cmd = "" , ** kwargs ):
58
+ #defaults
59
+ config = {}
60
+ environment = {}
61
+ for arg in kwargs :
62
+ if arg == "driver_capabilities" :
63
+ environment ["NVIDIA_DRIVER_CAPABILITIES" ] = kwargs ["driver_capabilities" ]
64
+ elif arg == "visible_devices" in kwargs :
65
+ environment ["NVIDIA_VISIBLE_DEVICES" ] = kwargs ["visible_devices" ]
66
+ elif arg == "disable_require" in kwargs :
67
+ environment ["NVIDIA_DISABLE_REQUIRE" ] = kwargs ["disable_require" ]
68
+ elif arg == "require" :
69
+ if "cuda" in kwargs ["require" ]:
70
+ environment ["NVIDIA_REQUIRE_CUDA" ] = kwargs ["require" ]["cuda" ]
71
+ if "driver" in kwargs ["require" ]:
72
+ environment ["NVIDIA_REQUIRE_DRIVER" ] = kwargs ["require" ]["driver" ]
73
+ if "arch" in kwargs ["require" ]:
74
+ environment ["NVIDIA_REQUIRE_ARCH" ] = kwargs ["require" ]["arch" ]
75
+ elif arg == "cuda_version" :
76
+ print ("WARNING: the CUDA_VERSION enviorment variable is a legacy variable, consider moving to NVIDIA_REQUIRE_CUDA" )
77
+ environment ["CUDA_VERSION" ] = kwargs ["cuda_version" ]
78
+ elif arg == "environment" :
79
+ if type (kwargs ["environment" ]) is dict :
80
+ for k ,v in kwargs ["environment" ]:
81
+ environment [k ] = v
82
+ elif type (kwargs ["environment" ]) is list :
83
+ for e in kwargs ["environment" ]:
84
+ kv = e .split ("=" )
85
+ assert (len (kv ) == 2 ), "Does not follow the format SOMEVAR=xxx"
86
+ environment [kv [0 ]] = kv [1 ]
87
+ else :
88
+ config [arg ] = kwargs [arg ]
89
+ config ["environment" ] = environment
90
+ config ["runtime" ] = "nvidia"
91
+
92
+ c = self .docker_client .containers .run (image , cmd , ** config )
93
+
94
+ if cmd == "" :
59
95
return c .id
60
96
else :
61
97
return c
@@ -90,3 +126,30 @@ def stop_all_containers(self):
90
126
def exec_run (self , cid , cmd ):
91
127
c = self .docker_client .containers .get (cid )
92
128
return c .exec_run (cmd )
129
+
130
+ @staticmethod
131
+ def list_gpus ():
132
+ output = check_output (["nvidia-smi" , "-L" ]).decode ("utf-8" )
133
+ regex = re .compile (r"GPU (?P<id>\d+):" )
134
+ gpus = []
135
+ for line in output .strip ().split ("\n " ):
136
+ m = regex .match (line )
137
+ assert m , "unable to parse " + line
138
+ gpus .append (int (m .group ("id" )))
139
+ return gpus
140
+
141
+ @staticmethod
142
+ def gpu_memory_usage ():
143
+ output = check_output (["nvidia-smi" ]).decode ("utf-8" )
144
+ smi_output = output [output .find ("GPU Memory" ):]
145
+ rows = smi_output .split ("\n " )
146
+ regex = re .compile (r"[|]\s+?(?P<id>\d+)\D+?(?P<pid>\d+).+[ ](?P<usage>\d+)MiB" )
147
+ usage = {gpu_id : 0 for gpu_id in NVDockerClient .list_gpus ()}
148
+ for row in smi_output .split ("\n " ):
149
+ gpu = regex .search (row )
150
+ if not gpu :
151
+ continue
152
+ id = int (gpu .group ("id" ))
153
+ memory = int (gpu .group ("usage" ))
154
+ usage [id ] += memory
155
+ return usage
0 commit comments