Skip to content

Commit e368bf6

Browse files
committed
[Model] Add Qwen2 model.
1 parent 5270b21 commit e368bf6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2728238
-51
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ xFasterTransformer provides a series of APIs, both of C++ and Python, for end us
5656
| Llama2 | ✔ | ✔ | ✔ |
5757
| Baichuan | ✔ | ✔ | ✔ |
5858
| QWen | ✔ | ✔ | ✔ |
59+
| QWen2 | ✔ | ✔ | ✔ |
5960
| SecLLM(YaRN-Llama) | ✔ | ✔ | ✔ |
6061
| Opt | ✔ | ✔ | ✔ |
6162
| Deepseek-coder | ✔ | ✔ | ✔ |
@@ -164,6 +165,7 @@ xFasterTransformer supports a different model format from Huggingface, but it's
164165
- OPTConvert
165166
- BaichuanConvert
166167
- QwenConvert
168+
- Qwen2Convert
167169
- DeepseekConvert
168170
169171
## API usage

ci/test_case

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ _test_case=$(
3535
# | baichuan2-7b | √ | √ | × | × | × | × | 32 | 32 |
3636
# | baichuan2-13b | √ | √ | × | × | × | × | 32 | 32 |
3737
# | qwen-7b | √ | √ | × | × | × | × | 32 | 32 |
38+
# | qwen2-0_5b | √ | √ | × | × | × | × | 32 | 32 |
39+
# | qwen2-1_8b | √ | √ | × | × | × | × | 32 | 32 |
3840
# | gemma-2b | √ | √ | × | × | × | × | 32 | 32 |
3941
# | gemma-7b | √ | √ | × | × | × | × | 32 | 32 |
4042
@@ -66,6 +68,14 @@ bash run_benchmark.sh -m baichuan2-13b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
6668
bash run_benchmark.sh -m qwen-7b -d fp16 -i 1 -w 0 -in 32 -out 32 -s 1
6769
bash run_benchmark.sh -m qwen-7b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
6870
71+
# qwen-7b with short prompt & full data type:
72+
bash run_benchmark.sh -m qwen2-0_5b -d fp16 -i 1 -w 0 -in 32 -out 32 -s 1
73+
bash run_benchmark.sh -m qwen2-0_5b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
74+
75+
# qwen-7b with short prompt & full data type:
76+
bash run_benchmark.sh -m qwen2-1_8b -d fp16 -i 1 -w 0 -in 32 -out 32 -s 1
77+
bash run_benchmark.sh -m qwen2-1_8b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
78+
6979
# gemma-2b with short prompt & full data type:
7080
bash run_benchmark.sh -m gemma-2b -d fp16 -i 1 -w 0 -in 32 -out 32 -s 1
7181
bash run_benchmark.sh -m gemma-2b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[qwen2]
2+
model_name = /data/models/Qwen1.5-0.5B-Chat
3+
head_num = 16
4+
kv_head_num = 16
5+
size_per_head = 64
6+
inter_size = 2816
7+
max_pos_seq_len = 32768
8+
num_layer = 24
9+
rms_norm_eps = 1e-06
10+
layernorm_type = pre_layernorm
11+
activation_type = silu
12+
rope_theta = 1000000.0
13+
has_post_decoder_layernorm = 1
14+
vocab_size = 151936
15+
start_id = 151643
16+
end_id = 151645
17+
pad_id = 151643
18+
weight_data_type = fp16
19+
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"architectures": [
3+
"Qwen2ForCausalLM"
4+
],
5+
"attention_dropout": 0.0,
6+
"bos_token_id": 151643,
7+
"eos_token_id": 151645,
8+
"hidden_act": "silu",
9+
"hidden_size": 1024,
10+
"initializer_range": 0.02,
11+
"intermediate_size": 2816,
12+
"max_position_embeddings": 32768,
13+
"max_window_layers": 21,
14+
"model_type": "qwen2",
15+
"num_attention_heads": 16,
16+
"num_hidden_layers": 24,
17+
"num_key_value_heads": 16,
18+
"rms_norm_eps": 1e-06,
19+
"rope_theta": 1000000.0,
20+
"sliding_window": 32768,
21+
"tie_word_embeddings": true,
22+
"torch_dtype": "bfloat16",
23+
"transformers_version": "4.37.0",
24+
"use_cache": true,
25+
"use_sliding_window": false,
26+
"vocab_size": 151936
27+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"bos_token_id": 151643,
3+
"pad_token_id": 151643,
4+
"do_sample": true,
5+
"eos_token_id": [
6+
151645,
7+
151643
8+
],
9+
"repetition_penalty": 1.1,
10+
"top_p": 0.8,
11+
"transformers_version": "4.37.0"
12+
}
13+

0 commit comments

Comments
 (0)