Grad norm tensor fix (huggingface#938)

Co-authored-by: regisss <15324346+regisss@users.noreply.github.com>
ccrhx4 · May 11, 2024 · 023b292 · 023b292
1 parent d8a4128
commit 023b292
Showing 1 changed file with 5 additions and 6 deletions.
diff --git a/optimum/habana/transformers/trainer.py b/optimum/habana/transformers/trainer.py
@@ -1183,13 +1183,12 @@ def _maybe_log_save_evaluate(self, tr_loss, _grad_norm, model, trial, epoch, ign
             if is_accelerate_available() and self.accelerator.distributed_type == GaudiDistributedType.DEEPSPEED:
                 grad_norm = model.get_global_grad_norm()
             else:
-                grad_norm = (
-                    _grad_norm.item()
-                    if (_grad_norm is not None and self.accelerator.distributed_type != GaudiDistributedType.FSDP)
-                    else None
-                )
+                if _grad_norm is not None and self.accelerator.distributed_type != GaudiDistributedType.FSDP:
+                    grad_norm = _grad_norm.item() if _grad_norm.size() == torch.Size([1]) else _grad_norm.tolist()
+                else:
+                    grad_norm = None
 
-            if grad_norm is not None:
+            if grad_norm is not None and not isinstance(grad_norm, list):
                 logs["grad_norm"] = grad_norm
             logs["learning_rate"] = self._get_learning_rate()