Skip to content

Commit

Permalink
[Bugfix] Mapping physical device indices for e2e test utils (vllm-pro…
Browse files Browse the repository at this point in the history
  • Loading branch information
ShangmingCai authored and MengqingCao committed Sep 30, 2024
1 parent 89bbce3 commit ae15dae
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,12 +356,23 @@ def error_on_warning():
yield


def get_physical_device_indices(devices):
visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
if visible_devices is None:
return devices

visible_indices = [int(x) for x in visible_devices.split(",")]
index_mapping = {i: physical for i, physical in enumerate(visible_indices)}
return [index_mapping[i] for i in devices if i in index_mapping]


@_nvml()
def wait_for_gpu_memory_to_clear(devices: List[int],
threshold_bytes: int,
timeout_s: float = 120) -> None:
# Use nvml instead of pytorch to reduce measurement error from torch cuda
# context.
devices = get_physical_device_indices(devices)
start_time = time.time()
while True:
output: Dict[int, str] = {}
Expand Down

0 comments on commit ae15dae

Please sign in to comment.