integrate mistralai/Mixtral for CPU

minmingzhu · minmingzhu · commit 82e242c8c445 · 2024-03-14T08:17:49.000Z
Signed-off-by: minmingzhu &lt;minming.zhu@intel.com&gt;
diff --git a/llm_on_ray/finetune/models/mixtral-8x7B-Instruct-v0.1.yaml b/llm_on_ray/finetune/models/mixtral-8x7B-Instruct-v0.1.yaml
@@ -0,0 +1,36 @@
+General:
+  base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
+  gpt_base_model: false
+  output_dir: /tmp/llm-ray/output
+  checkpoint_dir: /tmp/llm-ray/checkpoint
+  config:
+    trust_remote_code: false
+    use_auth_token: null
+  lora_config:
+    task_type: CAUSAL_LM
+    r: 8
+    lora_alpha: 32
+    lora_dropout: 0.1
+    target_modules:
+    - q_proj
+    - v_proj
+  enable_gradient_checkpointing: false
+Dataset:
+  train_file: examples/data/sample_finetune_data_small.jsonl
+  validation_file: null
+  validation_split_percentage: 5
+Training:
+  optimizer: AdamW
+  batch_size: 2
+  epochs: 3
+  learning_rate: 1.0e-05
+  lr_scheduler: linear
+  weight_decay: 0.0
+  mixed_precision: bf16
+  device: CPU
+  num_training_workers: 2
+  resources_per_worker:
+    CPU: 2
+  accelerate_mode: CPU_DDP
+  gradient_accumulation_steps: 1
+  logging_steps: 10