Skip to content

Commit

Permalink
[Modeling / Mixtral] Fix GC + PEFT issues with Mixtral (huggingfa…
Browse files Browse the repository at this point in the history
…ce#28061)

fix for mistral
  • Loading branch information
younesbelkada authored and staghado committed Jan 15, 2024
1 parent 1f957c9 commit 5bbf018
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions src/transformers/models/mixtral/modeling_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,13 @@ def forward(

past_key_values_length = 0

if self.gradient_checkpointing and self.training:
if use_cache:
logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache = False

if use_cache:
use_legacy_cache = not isinstance(past_key_values, Cache)
if use_legacy_cache:
Expand Down Expand Up @@ -1058,13 +1065,6 @@ def forward(

hidden_states = inputs_embeds

if self.gradient_checkpointing and self.training:
if use_cache:
logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache = False

# decoder layers
all_hidden_states = () if output_hidden_states else None
all_self_attns = () if output_attentions else None
Expand Down

0 comments on commit 5bbf018

Please sign in to comment.