Skip to content

Commit

Permalink
fix ips offset (#45219)
Browse files Browse the repository at this point in the history
  • Loading branch information
kuizhiqing authored Aug 18, 2022
1 parent b6a4db1 commit 041ef22
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions python/paddle/distributed/launch/controllers/collective.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def _build_pod_with_args(self):
self.ctx.logger.debug("job endpoints: {}".format(job_endpoints))

rank_offset = ips.index(
self.ctx.node.ip) if self.ctx.node.ip in ips else 0
self.ctx.node.ip
) * self.pod.replicas if self.ctx.node.ip in ips else 0

self.save_pod_log(job_endpoints)

Expand All @@ -66,7 +67,7 @@ def _build_pod_with_args(self):
"PADDLE_LOCAL_SIZE": "{}".format(self.pod.replicas),
"PADDLE_GLOBAL_RANK": "{}".format(i + rank_offset),
"PADDLE_LOCAL_RANK": "{}".format(i),
"PADDLE_NNODES": "{}".format(self.job.replicas),
"PADDLE_NNODES": "{}".format(len(ips)),
## compatible env
"PADDLE_TRAINER_ENDPOINTS": ",".join(job_endpoints),
"PADDLE_CURRENT_ENDPOINT": job_endpoints[i + rank_offset],
Expand Down

0 comments on commit 041ef22

Please sign in to comment.