Skip to content

Commit

Permalink
Go back to repetition_penalty 1.0 as default, hurts larger models too…
Browse files Browse the repository at this point in the history
… much when repeated tokens normal, like: What is bigger, 9.9 or 9.11? and doing CoT. Also fix system prompt use when dealing with exact matches
  • Loading branch information
pseudotensor committed Aug 9, 2024
1 parent 842e770 commit 4a8bda6
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 7 deletions.
3 changes: 2 additions & 1 deletion gradio_utils/grclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,8 @@ def query_or_summarize_or_extract(
temperature: float = 0.0,
top_p: float = 1.0,
top_k: int = 40,
repetition_penalty: float = 1.07,
# 1.07 causes issues still with more repetition
repetition_penalty: float = 1.0,
penalty_alpha: float = 0.0,
max_time: int = 360,
max_new_tokens: int = 1024,
Expand Down
4 changes: 2 additions & 2 deletions src/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -4494,7 +4494,7 @@ def get_generate_params(model_lower,
penalty_alpha = 0 if penalty_alpha is None else penalty_alpha
num_beams = num_beams or 1
max_new_tokens = max_new_tokens or 512
repetition_penalty = repetition_penalty or 1.07
repetition_penalty = repetition_penalty or 1.0 # 1.07 causes issues still with more repetition
num_return_sequences = min(num_beams, num_return_sequences or 1)
do_sample = False if do_sample is None else do_sample
else:
Expand All @@ -4504,7 +4504,7 @@ def get_generate_params(model_lower,
penalty_alpha = 0 if penalty_alpha is None else penalty_alpha
num_beams = num_beams or 1
max_new_tokens = max_new_tokens or 1024
repetition_penalty = repetition_penalty or 1.07
repetition_penalty = repetition_penalty or 1.0 # 1.07 causes issues still with more repetition
num_return_sequences = min(num_beams, num_return_sequences or 1)
do_sample = False if do_sample is None else do_sample
# doesn't include chat, instruction_nochat, iinput_nochat, added later
Expand Down
4 changes: 2 additions & 2 deletions src/prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2406,14 +2406,14 @@ def apply_chat_template(instruction, system_prompt, history, image_file,
for si, system_prompt_to_use in enumerate(system_prompts_to_use):
try:
messages = structure_to_messages(instruction,
system_prompt_to_use,
system_prompt_to_use.strip() if system_prompt_to_use else system_prompt_to_use,
history,
image_file,
)
if not messages:
return ''
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
if si == 0 and system_prompt_to_use not in [None, ''] and system_prompt_to_use not in prompt:
if si == 0 and system_prompt_to_use not in [None, ''] and system_prompt_to_use.strip() != '' and system_prompt_to_use.strip() not in prompt.strip():
raise ValueError("System prompt not used: %s" % system_prompt_to_use)
break
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion src/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "794ec254460a0c38a2e3ae3e4437f5dc0f695a09"
__version__ = "3618868401689179d98c95be3ecdcc64d44d2acd"
2 changes: 1 addition & 1 deletion tests/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def run_eval1(cpu=False, bits=None, base_model='h2oai/h2ogpt-oig-oasst1-512-6_9b
kwargs = dict(
stream_output=False, prompt_type=prompt_type, prompt_dict='',
temperature=0.4, top_p=0.85, top_k=70, penalty_alpha=0.0, num_beams=1, max_new_tokens=256,
min_new_tokens=0, early_stopping=False, max_time=180, repetition_penalty=1.07,
min_new_tokens=0, early_stopping=False, max_time=180, repetition_penalty=1.0,
num_return_sequences=1, do_sample=True, seed=0, chat=False,
langchain_mode=langchain_mode, add_chat_history_to_context=True,
add_search_to_context=False,
Expand Down

0 comments on commit 4a8bda6

Please sign in to comment.