Skip to content

Commit d8023cb

Browse files
authored
Fixed config check issue with multi-node spawn method (#2424)
* Fixed config check issue with multi-node spawn method * [skip ci] updated clearml docs link * Update test_launcher.py
1 parent 7ed1c7e commit d8023cb

File tree

3 files changed

+37
-5
lines changed

3 files changed

+37
-5
lines changed

ignite/contrib/handlers/clearml_logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class ClearMLLogger(BaseLogger):
5151
Args:
5252
kwargs: Keyword arguments accepted from
5353
`clearml.Task
54-
<https://clear.ml/docs/latest/docs/references/sdk/task#taskinit>`_.
54+
<https://clear.ml/docs/latest/docs/references/sdk/task/#taskinit>`_.
5555
All arguments are optional. If a ClearML Task has already been created,
5656
kwargs will be ignored and the current ClearML Task will be used.
5757

ignite/distributed/launcher.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,9 @@ def _setup_spawn_params(
265265
node_rank = 0
266266
if node_rank >= nnodes or node_rank < 0:
267267
raise ValueError(f"Argument node_rank should be between 0 and {nnodes - 1}, but given {node_rank}")
268-
if nnodes > 1 and (master_addr is None or master_port is None or init_method is None):
268+
if nnodes > 1 and (master_addr is None or master_port is None) and init_method is None:
269269
raise ValueError(
270-
"If number of nodes larger than one, arguments master_addr and master_port or init_method"
270+
"If number of nodes larger than one, arguments master_addr and master_port or init_method "
271271
f"should be specified, but given master_addr={master_addr}, master_port={master_port} and "
272272
f"init_method={init_method}."
273273
)

tests/ignite/distributed/test_launcher.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ def exec_filepath():
4040
yield fp.as_posix()
4141

4242

43-
def execute(cmd):
43+
def execute(cmd, env=None):
4444

4545
import ignite
4646

47-
env = dict(os.environ)
47+
env = dict(os.environ) if env is None else env
4848
env["PYTHONPATH"] = f"{os.path.dirname(ignite.__path__[0])}"
4949
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
5050
process.wait()
@@ -153,6 +153,38 @@ def test_check_idist_parallel_spawn_n_procs_gloo(exec_filepath):
153153
_test_check_idist_parallel_spawn(exec_filepath, "gloo", np)
154154

155155

156+
@pytest.mark.distributed
157+
@pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
158+
@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
159+
def test_smoke_test_check_idist_parallel_spawn_multinode_n_procs_gloo(exec_filepath):
160+
# Just a smoke test from check_idist_parallel.py for an emulated multi-node configuration
161+
cmd1 = "export CUDA_VISIBLE_DEVICES= && "
162+
cmd1 += 'bash -c "python tests/ignite/distributed/check_idist_parallel.py --backend=gloo --nproc_per_node=2 '
163+
cmd1 += '--nnodes=2 --node_rank=0 --master_addr=localhost --master_port=3344 &"'
164+
os.system(cmd1)
165+
166+
cmd2 = [
167+
sys.executable,
168+
exec_filepath,
169+
"--backend=gloo",
170+
"--nproc_per_node=2",
171+
"--nnodes=2",
172+
"--node_rank=1",
173+
"--master_addr=localhost",
174+
"--master_port=3344",
175+
]
176+
env = dict(os.environ)
177+
env["CUDA_VISIBLE_DEVICES"] = ""
178+
out = execute(cmd2, env=env)
179+
180+
assert "backend=gloo" in out
181+
assert "nproc_per_node: 2" in out
182+
assert "nnodes: 2" in out
183+
assert "master_addr: localhost" in out
184+
assert "master_port: 3344" in out
185+
assert "End of run" in out
186+
187+
156188
@pytest.mark.distributed
157189
@pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
158190
@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")

0 commit comments

Comments
 (0)