Skip to content

Add uid and username to relative GPU pid #34

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 105 additions & 34 deletions GPUtil/GPUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from subprocess import Popen, PIPE
from distutils import spawn
import os
import math
import random
import time
import sys
import platform
import subprocess


__version__ = '1.4.0'
Expand All @@ -58,14 +58,33 @@ def __init__(self, ID, uuid, load, memoryTotal, memoryUsed, memoryFree, driver,
self.display_active = display_active
self.temperature = temp_gpu

def __str__(self):
return str(self.__dict__)


class GPUProcess:
def __init__(self, pid, processName, gpuId, gpuUuid, gpuName, usedMemory,
uid, uname):
self.pid = pid
self.processName = processName
self.gpuId = gpuId
self.gpuUuid = gpuUuid
self.gpuName = gpuName
self.usedMemory = usedMemory
self.uid = uid
self.uname = uname

def __str__(self):
return str(self.__dict__)

def safeFloatCast(strNumber):
try:
number = float(strNumber)
except ValueError:
number = float('nan')
return number

def getGPUs():
def getNvidiaSmiCmd():
if platform.system() == "Windows":
# If the platform is Windows and nvidia-smi
# could not be found from the environment path,
Expand All @@ -75,57 +94,97 @@ def getGPUs():
nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
else:
nvidia_smi = "nvidia-smi"
return nvidia_smi


def getGPUs():
# Get ID, processing and memory utilization for all GPUs
nvidia_smi = getNvidiaSmiCmd()
try:
p = Popen([nvidia_smi,"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu", "--format=csv,noheader,nounits"], stdout=PIPE)
stdout, stderror = p.communicate()
p = subprocess.run([
nvidia_smi,
"--query-gpu=index,uuid,utilization.gpu,memory.total,memory.used,memory.free,driver_version,name,gpu_serial,display_active,display_mode,temperature.gpu",
"--format=csv,noheader,nounits"
], stdout=subprocess.PIPE, encoding='utf8')
stdout, stderror = p.stdout, p.stderr
except:
return []
output = stdout.decode('UTF-8')
# output = output[2:-1] # Remove b' and ' from string added by python
#print(output)
output = stdout
## Parse output
# Split on line break
lines = output.split(os.linesep)
#print(lines)
numDevices = len(lines)-1
GPUs = []
for g in range(numDevices):
line = lines[g]
#print(line)
vals = line.split(', ')
#print(vals)
for i in range(12):
# print(vals[i])
if (i == 0):
deviceIds = int(vals[i])
elif (i == 1):
uuid = vals[i]
elif (i == 2):
gpuUtil = safeFloatCast(vals[i])/100
elif (i == 3):
memTotal = safeFloatCast(vals[i])
elif (i == 4):
memUsed = safeFloatCast(vals[i])
elif (i == 5):
memFree = safeFloatCast(vals[i])
elif (i == 6):
driver = vals[i]
elif (i == 7):
gpu_name = vals[i]
elif (i == 8):
serial = vals[i]
elif (i == 9):
display_active = vals[i]
elif (i == 10):
display_mode = vals[i]
elif (i == 11):
temp_gpu = safeFloatCast(vals[i]);
deviceIds = int(vals[0])
uuid = vals[1]
gpuUtil = safeFloatCast(vals[2]) / 100
memTotal = safeFloatCast(vals[3])
memUsed = safeFloatCast(vals[4])
memFree = safeFloatCast(vals[5])
driver = vals[6]
gpu_name = vals[7]
serial = vals[8]
display_active = vals[9]
display_mode = vals[10]
temp_gpu = safeFloatCast(vals[11]);
GPUs.append(GPU(deviceIds, uuid, gpuUtil, memTotal, memUsed, memFree, driver, gpu_name, serial, display_mode, display_active, temp_gpu))
return GPUs # (deviceIds, gpuUtil, memUtil)


def getGPUProcesses():
"""Get all gpu compute processes."""

global gpuUuidToIdMap

nvidia_smi = getNvidiaSmiCmd()
try:
p = subprocess.run([
nvidia_smi,
"--query-compute-apps=pid,process_name,gpu_uuid,gpu_name,used_memory",
"--format=csv,noheader,nounits"
], stdout=subprocess.PIPE, encoding='utf8')
stdout, stderror = p.stdout, p.stderr
except:
return []
output = stdout
## Parse output
# Split on line break
lines = output.split(os.linesep)
numProcesses = len(lines) - 1
processes = []
for g in range(numProcesses):
line = lines[g]
#print(line)
vals = line.split(', ')
#print(vals)
pid = int(vals[0])
processName = vals[1]
gpuUuid = vals[2]
gpuName = vals[3]
usedMemory = safeFloatCast(vals[4])
gpuId = gpuUuidToIdMap[gpuUuid]
if gpuId is None:
gpuId = -1

# get uid and uname owner of the pid
try:
p = subprocess.run(['ps', f'-p{pid}', '-oruid=,ruser='],
stdout=subprocess.PIPE, encoding='utf8')
uid, uname = p.stdout.split()
uid = int(uid)
except:
uid, uname = -1, ''

processes.append(GPUProcess(pid, processName, gpuId, gpuUuid,
gpuName, usedMemory, uid, uname))
return processes


def getAvailable(order = 'first', limit=1, maxLoad=0.5, maxMemory=0.5, memoryFree=0, includeNan=False, excludeID=[], excludeUUID=[]):
# order = first | last | random | load | memory
# first --> select the GPU with the lowest ID (DEFAULT)
Expand Down Expand Up @@ -309,3 +368,15 @@ def showUtilization(all=False, attrList=None, useOldCode=False):
print(headerSpacingString)
for GPUstring in GPUstrings:
print(GPUstring)


# Generate gpu uuid to id map
gpuUuidToIdMap = {}
try:
gpus = getGPUs()
for gpu in gpus:
gpuUuidToIdMap[gpu.uuid] = gpu.id
del gpus
except:
pass

2 changes: 1 addition & 1 deletion GPUtil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from .GPUtil import GPU, getGPUs, getAvailable, getAvailability, getFirstAvailable, showUtilization, __version__
from .GPUtil import GPU, getGPUs, getGPUProcesses, getAvailable, getAvailability, getFirstAvailable, showUtilization, __version__
15 changes: 15 additions & 0 deletions GPUtil/demo_GPUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@
# Print package name and version number
print(GPU.__name__ + ' ' + GPU.__version__)

# Get all GPUs
gpus = GPU.getGPUs()
print("All gpus:")
for gpu in gpus:
print(gpu)
print()

# Get all GPU processes
gpuProcesses = GPU.getGPUProcesses()
print("All gpu processes:")
for gpuProcess in gpuProcesses:
print(gpuProcess)
print()

# Show the utilization of all GPUs in a nice table
GPU.showUtilization()

Expand Down Expand Up @@ -60,3 +74,4 @@
# NOTE: If all your GPUs currently have a memory consumption larger than 1%,
# this step will fail. It's not a bug! It is intended to do so, if it does not
# find an available GPU.

10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ Once included all functions are available. The functions along with a short desc

### Main functions

```python
gpus = GPUtil.getGPUs()
```
Get all GPUs' information, memory usage and utilization.

```python
gpuProcesses = GPUtil.getGPUProcesses()
```
Get all compute processes running on all GPUs.

```python
deviceIDs = GPUtil.getAvailable(order = 'first', limit = 1, maxLoad = 0.5, maxMemory = 0.5, includeNan=False, excludeID=[], excludeUUID=[])
```
Expand Down