We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c5eb858 commit f081a00Copy full SHA for f081a00
src/torchrunx/environment.py
@@ -39,6 +39,8 @@ def slurm_workers() -> int:
39
if "SLURM_JOB_GPUS" in os.environ:
40
# TODO: is it possible to allocate uneven GPUs across nodes?
41
return len(os.environ["SLURM_JOB_GPUS"].split(","))
42
+ elif "SLURM_GPUS_PER_NODE" in os.environ:
43
+ return int(os.environ['SLURM_GPUS_PER_NODE'])
44
else:
45
# TODO: should we assume that we plan to do one worker per CPU?
46
return int(os.environ["SLURM_CPUS_ON_NODE"])
@@ -52,7 +54,7 @@ def auto_hosts() -> list[str]:
52
54
:rtype: list[str]
53
55
"""
56
if in_slurm_job():
- slurm_hosts()
57
+ return slurm_hosts()
58
59
return ["localhost"]
60
0 commit comments