@@ -33,17 +33,20 @@ def is_env_present(cls) -> bool:
3333 return _ORTE_URI in os .environ
3434
3535 @classmethod
36- def get_coordinator_address (cls , timeout_secs : int | None ) -> str :
36+ def get_coordinator_address (cls , timeout_secs : int | None , override_coordinator_port : str | None ) -> str :
3737 # Examples of orte_uri:
3838 # 1531576320.0;tcp://10.96.0.1,10.148.0.1,10.108.0.1:34911
3939 # 1314521088.0;tcp6://[fe80::b9b:ac5d:9cf0:b858,2620:10d:c083:150e::3000:2]:43370
4040 orte_uri = os .environ [_ORTE_URI ]
41- job_id_str = orte_uri .split ('.' , maxsplit = 1 )[0 ]
42- # The jobid is always a multiple of 2^12, let's divide it by 2^12
43- # to reduce likelihood of port conflict between jobs
44- job_id = int (job_id_str ) // 2 ** 12
45- # Pick port in ephemeral range [(65535 - 2^12 + 1), 65535]
46- port = job_id % 2 ** 12 + (65535 - 2 ** 12 + 1 )
41+ if override_coordinator_port :
42+ port = override_coordinator_port
43+ else :
44+ job_id_str = orte_uri .split ('.' , maxsplit = 1 )[0 ]
45+ # The jobid is always a multiple of 2^12, let's divide it by 2^12
46+ # to reduce likelihood of port conflict between jobs
47+ job_id = int (job_id_str ) // 2 ** 12
48+ # Pick port in ephemeral range [(65535 - 2^12 + 1), 65535]
49+ port = str (job_id % 2 ** 12 + (65535 - 2 ** 12 + 1 ))
4750 launcher_ip_match = re .search (r"tcp://(.+?)[,:]|tcp6://\[(.+?)[,\]]" , orte_uri )
4851 if launcher_ip_match is None :
4952 raise RuntimeError ('Could not parse coordinator IP address from Open MPI environment.' )
0 commit comments