Skip to content

Commit

Permalink
Grad norm tensor fix (huggingface#938)
Browse files Browse the repository at this point in the history
Co-authored-by: regisss <15324346+regisss@users.noreply.github.com>
  • Loading branch information
2 people authored and ccrhx4 committed May 11, 2024
1 parent d8a4128 commit 023b292
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions optimum/habana/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1183,13 +1183,12 @@ def _maybe_log_save_evaluate(self, tr_loss, _grad_norm, model, trial, epoch, ign
if is_accelerate_available() and self.accelerator.distributed_type == GaudiDistributedType.DEEPSPEED:
grad_norm = model.get_global_grad_norm()
else:
grad_norm = (
_grad_norm.item()
if (_grad_norm is not None and self.accelerator.distributed_type != GaudiDistributedType.FSDP)
else None
)
if _grad_norm is not None and self.accelerator.distributed_type != GaudiDistributedType.FSDP:
grad_norm = _grad_norm.item() if _grad_norm.size() == torch.Size([1]) else _grad_norm.tolist()
else:
grad_norm = None

if grad_norm is not None:
if grad_norm is not None and not isinstance(grad_norm, list):
logs["grad_norm"] = grad_norm
logs["learning_rate"] = self._get_learning_rate()

Expand Down

0 comments on commit 023b292

Please sign in to comment.