intel
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ci/test_case‎
Lines changed: 10 additions & 0 deletions b/‎ci/test_case‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎examples/model_config/qwen2-0_5b/config.ini‎
Lines changed: 19 additions & 0 deletions b/‎examples/model_config/qwen2-0_5b/config.ini‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎examples/model_config/qwen2-0_5b/config.json‎
Lines changed: 27 additions & 0 deletions b/‎examples/model_config/qwen2-0_5b/config.json‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/model_config/qwen2-0_5b/generation_config.json‎
Lines changed: 13 additions & 0 deletions b/‎examples/model_config/qwen2-0_5b/generation_config.json‎
Lines changed: 13 additions & 0 deletions
@@ -56,6 +56,7 @@ xFasterTransformer provides a series of APIs, both of C++ and Python, for end us
 |       Llama2       | &#10004;  | &#10004; |   &#10004;   |
 |      Baichuan      | &#10004;  | &#10004; |   &#10004;   |
 |        QWen        | &#10004;  | &#10004; |   &#10004;   |
+|        QWen2       | &#10004;  | &#10004; |   &#10004;   |
 | SecLLM(YaRN-Llama) | &#10004;  | &#10004; |   &#10004;   |
 |        Opt         | &#10004;  | &#10004; |   &#10004;   |
 |   Deepseek-coder   | &#10004;  | &#10004; |   &#10004;   |
@@ -164,6 +165,7 @@ xFasterTransformer supports a different model format from Huggingface, but it's
     - OPTConvert
     - BaichuanConvert
     - QwenConvert
+    - Qwen2Convert
     - DeepseekConvert
 
 ## API usage
 
@@ -35,6 +35,8 @@ _test_case=$(
 # |  baichuan2-7b |   √  |   √  |   ×  |   ×  |   ×  |   × |  32  |  32  |
 # | baichuan2-13b |   √  |   √  |   ×  |   ×  |   ×  |   × |  32  |  32  |
 # |       qwen-7b |   √  |   √  |   ×  |   ×  |   ×  |   × |  32  |  32  |
+# |    qwen2-0_5b |   √  |   √  |   ×  |   ×  |   ×  |   × |  32  |  32  |
+# |    qwen2-1_8b |   √  |   √  |   ×  |   ×  |   ×  |   × |  32  |  32  |
 # |      gemma-2b |   √  |   √  |   ×  |   ×  |   ×  |   × |  32  |  32  |
 # |      gemma-7b |   √  |   √  |   ×  |   ×  |   ×  |   × |  32  |  32  |
 
@@ -66,6 +68,14 @@ bash run_benchmark.sh -m baichuan2-13b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
 bash run_benchmark.sh -m qwen-7b -d fp16 -i 1 -w 0 -in 32 -out 32 -s 1
 bash run_benchmark.sh -m qwen-7b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
 
+# qwen-7b with short prompt & full data type:
+bash run_benchmark.sh -m qwen2-0_5b -d fp16 -i 1 -w 0 -in 32 -out 32 -s 1
+bash run_benchmark.sh -m qwen2-0_5b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
+
+# qwen-7b with short prompt & full data type:
+bash run_benchmark.sh -m qwen2-1_8b -d fp16 -i 1 -w 0 -in 32 -out 32 -s 1
+bash run_benchmark.sh -m qwen2-1_8b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
+
 # gemma-2b with short prompt & full data type:
 bash run_benchmark.sh -m gemma-2b -d fp16 -i 1 -w 0 -in 32 -out 32 -s 1
 bash run_benchmark.sh -m gemma-2b -d bf16 -i 1 -w 0 -in 32 -out 32 -s 1
 
@@ -0,0 +1,19 @@
+[qwen2]
+model_name = /data/models/Qwen1.5-0.5B-Chat
+head_num = 16
+kv_head_num = 16
+size_per_head = 64
+inter_size = 2816
+max_pos_seq_len = 32768
+num_layer = 24
+rms_norm_eps = 1e-06
+layernorm_type = pre_layernorm
+activation_type = silu
+rope_theta = 1000000.0
+has_post_decoder_layernorm = 1
+vocab_size = 151936
+start_id = 151643
+end_id = 151645
+pad_id = 151643
+weight_data_type = fp16
+
@@ -0,0 +1,27 @@
+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 2816,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 16,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.37.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
@@ -0,0 +1,13 @@
+{
+  "bos_token_id": 151643,
+  "pad_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "repetition_penalty": 1.1,
+  "top_p": 0.8,
+  "transformers_version": "4.37.0"
+}
+