Warn user to use cache_position when calling the forward path

huzama · huzama · commit 9d67ac1dcdfe · 2024-06-10T15:30:57.000+09:00
torch.arange(past_length) where past_length keeps changing causes recompilation in XLA
diff --git a/src/transformers/cache_utils.py b/src/transformers/cache_utils.py
@@ -1020,6 +1020,7 @@ def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
         # Occupied cache == any slot in the 3rd dim (sequence length) holds a non-zero value. To save on compute, let's
         # limit the check to the first batch member and head dimension.
         # TODO: deprecate this function in favor of `cache_position`
+        logger.debug("Use cache_position parameter in your model for better performance.")
         key_cache = self.key_cache[layer_idx]
         device = key_cache.device