Skip to content

Commit d5d5e6c

Browse files
committed
[model] Add deepseek model.
1 parent 0d549e0 commit d5d5e6c

File tree

17 files changed

+64287
-74
lines changed

17 files changed

+64287
-74
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ xFasterTransformer provides a series of APIs, both of C++ and Python, for end us
4949
| ChatGLM3 | ✔ | ✔ | ✔ |
5050
| Llama | ✔ | ✔ | ✔ |
5151
| Llama2 | ✔ | ✔ | ✔ |
52+
| Deepseek-coder | ✔ | ✔ | ✔ |
5253
| Baichuan | ✔ | ✔ | ✔ |
5354
| QWen | ✔ | ✔ | ✔ |
5455
| SecLLM(YaRN-Llama) | ✔ | ✔ | ✔ |
@@ -141,6 +142,7 @@ xFasterTransformer supports a different model format from Huggingface, but it's
141142
142143
Supported model convert list:
143144
- LlamaConvert
145+
- DeepseekConvert
144146
- ChatGLMConvert
145147
- ChatGLM2Convert
146148
- ChatGLM3Convert

benchmark/benchmark.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def build_inputs_chatglm(tokenizer, query: List[str], padding, history: List[Tup
117117
model_prompt = prompt_pool["chatglm2"]
118118
if "chatglm3" in args.model_name.lower():
119119
model_prompt = prompt_pool["chatglm3"]
120-
if "llama" in args.model_name.lower():
120+
if "llama" in args.model_name.lower() or "deepseek" in args.model_name.lower():
121121
model_prompt = prompt_pool["llama"]
122122
if "baichuan" in args.model_name.lower():
123123
model_prompt = prompt_pool["baichuan"]
@@ -208,7 +208,9 @@ def build_inputs_chatglm(tokenizer, query: List[str], padding, history: List[Tup
208208
print(f"Next token P90 Latency:\t{np.percentile(next_token_times, 90):.2f} ms")
209209
print(f"Next token Avg Latency:\t{np.mean(next_token_times):.2f} ms")
210210
print(f"Next token Latency:\t{np.percentile(next_token_times, 90):.2f} ms")
211-
print(f"Throughput without 1st token:\t{1000 / np.percentile(next_token_times, 90) * args.batch_size:.2f} tokens/s")
211+
print(
212+
f"Throughput without 1st token:\t{1000 / np.percentile(next_token_times, 90) * args.batch_size:.2f} tokens/s"
213+
)
212214
print("=" * 120, "\n" * 3)
213215
else:
214216
for i in range(args.warmup + args.iteration):
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[llama]
2+
model_name = /data/models/deepseek-coder-33b-instruct
3+
head_num = 56
4+
kv_head_num = 8
5+
size_per_head = 128
6+
inter_size = 19200
7+
max_pos_seq_len = 16384
8+
num_layer = 62
9+
layernorm_eps = 1e-06
10+
layernorm_type = pre_layernorm
11+
activation_type = silu
12+
has_post_decoder_layernorm = 1
13+
vocab_size = 32256
14+
start_id = 32013
15+
end_id = 32021
16+
rope_type = linear
17+
rope_theta = 100000
18+
scaling_factor = 4.0
19+
weight_data_type = fp16
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"_from_model_config": true,
3+
"bos_token_id": 32013,
4+
"eos_token_id": 32021,
5+
"transformers_version": "4.34.1"
6+
}

0 commit comments

Comments
 (0)