Skip to content

Commit

Permalink
Generate: assistant should be greedy in assisted decoding (huggingfac…
Browse files Browse the repository at this point in the history
…e#30778)

* assistant should be greedy

* better comment

* Update src/transformers/generation/candidate_generator.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

---------

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
  • Loading branch information
gante and amyeroberts authored May 13, 2024
1 parent 9430635 commit 2e27291
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/transformers/generation/candidate_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,12 @@ def __init__(
self.generation_config.return_dict_in_generate = True
self.generation_config.output_scores = True

# Disable sampling -- this implementation of assisted generation/speculative decoding uses the assistant
# greedily to maximize matches. Disables sampling-related flags to prevent warnings
self.generation_config.do_sample = False
for attr in ("temperature", "top_p", "min_p", "typical_p", "top_k", "epsilon_cutoff", "eta_cutoff"):
setattr(self.generation_config, attr, None)

# avoid unnecessary warnings that min_length is larger than max_new_tokens
# remove the `MinLengthLogitsProcessor` if exists (NOTE: no need to check for `MinNewTokensLogitsProcessor`)
self.main_model_min_length = self.generation_config.min_length
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/generation/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,11 @@ def validate(self, is_init=False):
greedy_wrong_parameter_msg.format(flag_name="top_p", flag_value=self.top_p),
UserWarning,
)
if self.min_p is not None:
warnings.warn(
greedy_wrong_parameter_msg.format(flag_name="min_p", flag_value=self.min_p),
UserWarning,
)
if self.typical_p is not None and self.typical_p != 1.0:
warnings.warn(
greedy_wrong_parameter_msg.format(flag_name="typical_p", flag_value=self.typical_p),
Expand Down

0 comments on commit 2e27291

Please sign in to comment.