Skip to content

Commit 3ff4373

Browse files
committed
integrate Mixtral-8x7B-Instruct-v0.1 inference
Signed-off-by: minmingzhu <minming.zhu@intel.com>
1 parent 3710b4c commit 3ff4373

File tree

3 files changed

+25
-2
lines changed

3 files changed

+25
-2
lines changed

.github/workflows/workflow_finetune.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ jobs:
145145
146146
- name: Run Deltatuner Test on DENAS-LoRA Model
147147
run: |
148-
if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1)$ ]]; then
148+
if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1|google\/gemma-2b)$ ]]; then
149149
echo ${{ matrix.model }} is not supported!
150150
else
151151
docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"

.github/workflows/workflow_inference.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
name: inference
3535
strategy:
3636
matrix:
37-
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm ]
37+
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, mixtral-8x7B-Instruct-v0.1 ]
3838
isPR:
3939
- ${{inputs.ci_type == 'pr'}}
4040

@@ -46,6 +46,7 @@ jobs:
4646
- { model: "mistral-7b-v0.1"}
4747
- { model: "mpt-7b-bigdl"}
4848
- { model: "llama-2-7b-chat-hf-vllm"}
49+
- { model: "mixtral-8x7B-Instruct-v0.1"}
4950
- dtuner_model: nathan0/mpt-7b-deltatuner-model
5051
model: mpt-7b
5152

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
port: 8000
2+
name: Mixtral-8x7B-Instruct-v0.1
3+
route_prefix: /Mixtral-8x7B-Instruct-v0.1
4+
num_replicas: 1
5+
cpus_per_worker: 24
6+
gpus_per_worker: 0
7+
deepspeed: false
8+
workers_per_group: 2
9+
device: CPU
10+
ipex:
11+
enabled: true
12+
precision: bf16
13+
model_description:
14+
model_id_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
15+
bigdl: false
16+
tokenizer_name_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
17+
chat_processor: ChatModelLLama
18+
prompt:
19+
intro: ''
20+
human_id: '<s>[INST] {msg} [/INST]'
21+
bot_id: ''
22+
stop_words: []

0 commit comments

Comments
 (0)