Fix ManagedProcessGroup when used in c10d APIs (#191)

H-Huang · web-flow · commit 4f5837bf2225 · 2025-05-12T15:41:04.000-07:00
diff --git a/torchft/process_group.py b/torchft/process_group.py
@@ -1026,10 +1026,11 @@ def __init__(self, manager: "Manager", work: Work, default_result: object) -> No
 
     def wait(self, timeout: Optional[timedelta] = None) -> bool:
         try:
-            if timeout is not None:
-                self._work.wait(timeout)
-            else:
-                self._work.wait()
+            if self._work is not None:
+                if timeout is not None:
+                    self._work.wait(timeout)
+                else:
+                    self._work.wait()
         except Exception as e:
             self._manager.report_error(e)
 
@@ -1064,7 +1065,6 @@ def allreduce(self, tensors: List[torch.Tensor], opts: object) -> Work:
 
         if self._manager.errored() is not None:
             return _DummyWork(tensors)
-
         try:
             work = super().allreduce(tensors, opts)
         except Exception as e: