Skip to content

Add GPU process definition and function to get running processes #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 82 additions & 27 deletions GPUtil/GPUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,30 @@ def __init__(self, ID, uuid, load, memoryTotal, memoryUsed, memoryFree, driver,
self.display_active = display_active
self.temperature = temp_gpu

def __str__(self):
return str(self.__dict__)


class GPUProcess:
def __init__(self, pid, processName, gpuId, gpuUuid, gpuName, usedMemory):
self.pid = pid
self.processName = processName
self.gpuId = gpuId
self.gpuUuid = gpuUuid
self.gpuName = gpuName
self.usedMemory = usedMemory

def __str__(self):
return str(self.__dict__)

def safeFloatCast(strNumber):
try:
number = float(strNumber)
except ValueError:
number = float('nan')
return number

def getGPUs():
def getNvidiaSmiCmd():
if platform.system() == "Windows":
# If the platform is Windows and nvidia-smi
# could not be found from the environment path,
Expand All @@ -75,8 +91,12 @@ def getGPUs():
nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
else:
nvidia_smi = "nvidia-smi"
return nvidia_smi


def getGPUs():
# Get ID, processing and memory utilization for all GPUs
nvidia_smi = getNvidiaSmiCmd()
try:
p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE)
stdout, stderror = p.communicate()
Expand All @@ -96,36 +116,59 @@ def getGPUs():
#print(line)
vals = line.split(', ')
#print(vals)
for i in range(12):
# print(vals[i])
if (i == 0):
deviceIds = int(vals[i])
elif (i == 1):
uuid = vals[i]
elif (i == 2):
gpuUtil = safeFloatCast(vals[i])/100
elif (i == 3):
memTotal = safeFloatCast(vals[i])
elif (i == 4):
memUsed = safeFloatCast(vals[i])
elif (i == 5):
memFree = safeFloatCast(vals[i])
elif (i == 6):
driver = vals[i]
elif (i == 7):
gpu_name = vals[i]
elif (i == 8):
serial = vals[i]
elif (i == 9):
display_active = vals[i]
elif (i == 10):
display_mode = vals[i]
elif (i == 11):
temp_gpu = safeFloatCast(vals[i]);
deviceIds = int(vals[0])
uuid = vals[1]
gpuUtil = safeFloatCast(vals[2]) / 100
memTotal = safeFloatCast(vals[3])
memUsed = safeFloatCast(vals[4])
memFree = safeFloatCast(vals[5])
driver = vals[6]
gpu_name = vals[7]
serial = vals[8]
display_active = vals[9]
display_mode = vals[10]
temp_gpu = safeFloatCast(vals[11]);
GPUs.append(GPU(deviceIds, uuid, gpuUtil, memTotal, memUsed, memFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu))
return GPUs # (deviceIds, gpuUtil, memUtil)


def getGPUProcesses():
"""Get all gpu compute processes."""

global gpuUuidToIdMap

nvidia_smi = getNvidiaSmiCmd()
try:
p = Popen([nvidia_smi,"--query-compute-apps=pid,process_name,gpu_uuid,gpu_name,used_memory", "--format=csv,noheader,nounits"], stdout=PIPE)
stdout, stderror = p.communicate()
except:
return []
output = stdout.decode('UTF-8')
# output = output[2:-1] # Remove b' and ' from string added by python
#print(output)
## Parse output
# Split on line break
lines = output.split(os.linesep)
#print(lines)
numProcesses = len(lines) - 1
processes = []
for g in range(numProcesses):
line = lines[g]
#print(line)
vals = line.split(', ')
#print(vals)
pid = int(vals[0])
processName = vals[1]
gpuUuid = vals[2]
gpuName = vals[3]
usedMemory = safeFloatCast(vals[4])
gpuId = gpuUuidToIdMap[gpuUuid]
if gpuId is None:
gpuId = -1
processes.append(GPUProcess(pid, processName, gpuId, gpuUuid, gpuName,usedMemory))
return processes


def getAvailable(order = 'first', limit=1, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
# order = first | last | random | load | memory
# first --> select the GPU with the lowest ID (DEFAULT)
Expand Down Expand Up @@ -309,3 +352,15 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
print(headerSpacingString)
for GPUstring in GPUstrings:
print(GPUstring)


# Generate gpu uuid to id map
gpuUuidToIdMap = {}
try:
gpus = getGPUs()
for gpu in gpus:
gpuUuidToIdMap[gpu.uuid] = gpu.id
del gpus
except:
pass

2 changes: 1 addition & 1 deletion GPUtil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from .GPUtil import GPU, getGPUs, getAvailable, getAvailability, getFirstAvailable, showUtilization, __version__
from .GPUtil import GPU, getGPUs, getGPUProcesses, getAvailable, getAvailability, getFirstAvailable, showUtilization, __version__
15 changes: 15 additions & 0 deletions GPUtil/demo_GPUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@
# Print package name and version number
print(GPU.__name__ + ' ' + GPU.__version__)

# Get all GPUs
gpus = GPU.getGPUs()
print("All gpus:")
for gpu in gpus:
print(gpu)
print()

# Get all GPU processes
gpuProcesses = GPU.getGPUProcesses()
print("All gpu processes:")
for gpuProcess in gpuProcesses:
print(gpuProcess)
print()

# Show the utilization of all GPUs in a nice table
GPU.showUtilization()

Expand Down Expand Up @@ -60,3 +74,4 @@
# NOTE: If all your GPUs currently have a memory consumption larger than 1%,
# this step will fail. It's not a bug! It is intended to do so, if it does not
# find an available GPU.

10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ Once included all functions are available. The functions along with a short desc

### Main functions

```python
gpus = GPUtil.getGPUs()
```
Get all GPUs' information, memory usage and utilization.

```python
gpuProcesses = GPUtil.getGPUProcesses()
```
Get all compute processes running on all GPUs.

```python
deviceIDs = GPUtil.getAvailable(order = 'first', limit = 1, maxLoad = 0.5, maxMemory = 0.5, includeNan=False, excludeID=[], excludeUUID=[])
```
Expand Down