You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: colossalai/inference/config.py
+23-2Lines changed: 23 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -26,7 +26,7 @@
26
26
27
27
_DEFAULT_PROMPT_TEMPLATES= {
28
28
"llama": "[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n{input_text}[/INST]",
29
-
"vicuna": "USER: {input_text}\n\nASSISTANT: ",
29
+
"vicuna": "A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user input. USER: {input_text}\nASSISTANT: ",
30
30
}
31
31
32
32
@@ -46,6 +46,8 @@ class InputMetaData:
46
46
head_dim (int, optional): Head dimension. Defaults to 32.
47
47
high_precision(bool, optional): Whether to use float32 for underlying calculations of float16 data to achieve higher precision, Defaults to False.
48
48
dtype (torch.dtype, optional): The computation type of tensor, Defaults to torch.float32.
49
+
use_spec_dec (bool): Indicate whether to use speculative decoding.
50
+
num_tokens_to_verify (int): The number of tokens to verify in speculative decoding. Only valid when `use_spec_dec` is set to True.
0 commit comments