add zero config

it-purester · Apr 14, 2024 · c6dce27 · c6dce27
1 parent eeb2574
commit c6dce27
Show file tree

Hide file tree

Showing 8 changed files with 157 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -29,10 +29,18 @@
 </body>
 </html>
 
-
+## 📄相关论文
+* 基于多智能体交互的大语言模型多轮问诊自动评估框架
+
+  [Automatic Interactive Evaluation for Large Language Models
+with State Aware Patient Simulator](https://arxiv.org/pdf/2403.08495.pdf)
 
 ## 💫更新
-🔥 [2024/03/14] 开源了基于Qwen1.5-1.8b指令微调的MING-1.8B
+* 🔥 [coming soon] Technical Report
+
+* 🔥 [2024/04/14] 开源了基于Qwen1.5指令微调的专家混合模型MING-MOE
+
+* [2024/03/14] 开源了基于Qwen1.5-1.8b指令微调的MING-1.8B
 
 * [2023/07/25] 开源了基于bloomz-7b指令微调的MING-7B
 
@@ -47,7 +55,7 @@
 <head>
 </head>
 <body>
-<table style="width: 70%;">
+<table style="width: 80%;">
   <tr>
       <td style="width: 20%;"><div align="center"><strong>模型</strong></div></td>
       <td style="width: 20%;"><div align="center"><strong>基座</strong></div></td>
@@ -62,9 +70,33 @@
 
   <tr>
       <td><center>MING-1.8B</center></td>
-      <td><center><a href="https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat">Qwen1.5-1.8b</a></center></td>
+      <td><center><a href="https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat">Qwen1.5-1.8B</a></center></td>
       <td><center>🤗<a href="https://huggingface.co/BlueZeros/MING-1.8B">MING-1.8B</a></center></td>
   </tr>
+
+  <tr>
+      <td><center>MING-MOE-1.8B</center></td>
+      <td><center><a href="https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat">Qwen1.5-1.8B</a></center></td>
+      <td><center>🤗<a href="https://huggingface.co/BlueZeros/MING-MOE-1.8B">MING-MOE-1.8B</a></center></td>
+  </tr>
+
+  <tr>
+      <td><center>MING-MOE-4B</center></td>
+      <td><center><a href="https://huggingface.co/Qwen/Qwen1.5-4B-Chat">Qwen1.5-4B</a></center></td>
+      <td><center>🤗<a href="https://huggingface.co/BlueZeros/MING-MOE-4B">MING-MOE-4B</a></center></td>
+  </tr>
+
+  <tr>
+      <td><center>MING-MOE-7B</center></td>
+      <td><center><a href="https://huggingface.co/Qwen/Qwen1.5-7B-Chat">Qwen1.5-7B</a></center></td>
+      <td><center>🤗<a href="https://huggingface.co/BlueZeros/MING-MOE-7B">MING-MOE-7B</a></center></td>
+  </tr>
+
+  <tr>
+      <td><center>MING-MOE-14B</center></td>
+      <td><center><a href="https://huggingface.co/Qwen/Qwen1.5-14B-Chat">Qwen1.5-14B</a></center></td>
+      <td><center>🤗<a href="https://huggingface.co/BlueZeros/MING-MOE-14B">MING-MOE-14B</a></center></td>
+  </tr>
 </table>
 </body>
 </html>
@@ -75,7 +107,8 @@
 1. 配置环境（测试环境如下，具体版本可以根据实际需求配置）
 
    * python==3.9.16
-   * pytorch==1.13.0+cu116
+   * pytorch==2.0.1+cu117
+   * peft==0.9.0
 
 2. 安装项目依赖 
 
@@ -88,10 +121,15 @@
 2. 下载模型参数并运行（要求单卡显存 >= 15G）
 
    ```bash
+   # MING-MOE
+   CUDA_VISIBLE_DEVICES=0 python -m fastchat.serve.cli \
+       --model_path {path_to_checkpoint} \ # 模型路径
+       --model_base {path_to_base_model} \ # 基座模型路径
+       --max-new-token 3072 # 输出最大长度
+
    # MING-1.8B
    CUDA_VISIBLE_DEVICES=0 python -m fastchat.serve.cli \
-       --model-path {path_to_checkpoint}  # 模型路径
-       --conv-template qwen 
+       --model_path {path_to_checkpoint} \ # 模型路径
        --max-new-token 2048 # 输出最大长度
 
    # MING-7B

diff --git a/ming/eval/model_diverse_gen.py b/ming/eval/model_diverse_gen.py
@@ -96,7 +96,7 @@ def eval_model(args):
     # pdb.set_trace()
 
     # else:
-    if "molora" in model_path:
+    if "moe" in model_path:
         tokenizer, model, context_len, tokenizer_with_prefix_space = load_molora_pretrained_model(model_path, args.model_base, model_name, use_logit_bias=args.use_logit_bias, only_load=args.only_load, expert_selection=args.expert_selection)
     else:
         tokenizer, model, context_len, tokenizer_with_prefix_space = load_pretrained_model(model_path, args.model_base, model_name, use_logit_bias=args.use_logit_bias, only_load=args.only_load)

diff --git a/ming/model/__pycache__/builder.cpython-39.pyc b/ming/model/__pycache__/builder.cpython-39.pyc
diff --git a/ming/serve/__pycache__/inference.cpython-39.pyc b/ming/serve/__pycache__/inference.cpython-39.pyc
diff --git a/ming/serve/inference.py b/ming/serve/inference.py
@@ -78,7 +78,10 @@ def chat_loop(model_path: str, model_base:str, device: str,
     # Model
     # model, tokenizer = load_model(model_path, device,
     #     num_gpus, max_gpu_memory, load_8bit, debug)
-    tokenizer, model, context_len, _ = load_molora_pretrained_model(model_path, model_base, None, use_logit_bias=None, only_load=None, expert_selection=None)
+    if "moe" in model_path.lower():
+        tokenizer, model, context_len, _ = load_molora_pretrained_model(model_path, model_base, None, use_logit_bias=None, only_load=None, expert_selection=None)
+    else:
+        tokenizer, model, context_len, _ = load_pretrained_model(model_path, model_base, None, use_logit_bias=None, only_load=None)
 
     model.config.use_cache = True
     model.eval()

diff --git a/scripts/zero2.json b/scripts/zero2.json
@@ -0,0 +1,23 @@
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+    "bf16": {
+        "enabled": "auto"
+    },
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto",
+    "gradient_accumulation_steps": "auto",
+    "zero_optimization": {
+        "stage": 2,
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto"
+    }
+}
diff --git a/scripts/zero3.json b/scripts/zero3.json
@@ -0,0 +1,28 @@
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+    "bf16": {
+        "enabled": "auto"
+    },
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto",
+    "gradient_accumulation_steps": "auto",
+    "zero_optimization": {
+        "stage": 3,
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
diff --git a/scripts/zero3_offload.json b/scripts/zero3_offload.json
@@ -0,0 +1,56 @@
+{
+    "fp16": {
+      "enabled": "auto",
+      "loss_scale": 0,
+      "loss_scale_window": 1000,
+      "initial_scale_power": 16,
+      "hysteresis": 2,
+      "min_loss_scale": 1
+    },
+    "bf16": {
+      "enabled": "auto"
+    },
+    "optimizer": {
+      "type": "AdamW",
+      "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+      }
+    },
+    "scheduler": {
+      "type": "WarmupLR",
+      "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+      }
+    },
+    "zero_optimization": {
+      "stage": 3,
+      "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+      },
+      "offload_param": {
+        "device": "cpu",
+        "pin_memory": true
+      },
+      "overlap_comm": true,
+      "contiguous_gradients": true,
+      "sub_group_size": 1e9,
+      "reduce_bucket_size": "auto",
+      "stage3_prefetch_bucket_size": "auto",
+      "stage3_param_persistence_threshold": "auto",
+      "stage3_max_live_parameters": 1e9,
+      "stage3_max_reuse_distance": 1e9,
+      "gather_16bit_weights_on_model_save": true
+    },
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "steps_per_print": 1e5,
+    "wall_clock_breakdown": false
+  }