use local rank for cuda device

ray-project · krfricke · Jul 19, 2022 · Jul 13, 2022 · Jul 13, 2022 · Jul 13, 2022
commit 72108b792f88336664dd3cfdef4a618ec8c64413
diff --git a/python/ray/train/torch/train_loop_utils.py b/python/ray/train/torch/train_loop_utils.py
@@ -467,7 +467,9 @@ def get_device(self) -> torch.device:
         if torch.cuda.is_available():
             gpu_ids = ray.get_gpu_ids()
             if len(gpu_ids) > 0:
-                device_id = gpu_ids[0]
+                gpu_id = gpu_ids[0]
+                cuda_visible_list = list(map(int, ray._private.utils.get_cuda_visible_devices()))
+                device_id = cuda_visible_list.index(gpu_id)
             else:
                 # If called on the driver or outside of Ray Train, return the
                 # 0th device.