Merge pull request #41 from pjlab-sys4nlp/moefication-ddz-vis

Moefication: README Update
pjlab-sys4nlp · Dec 13, 2023 · 0c37546 · 0c37546
2 parents 4457f49 + 7dc9312
commit 0c37546
Show file tree

Hide file tree

Showing 7 changed files with 178 additions and 223 deletions.
diff --git a/docs/moefication/README.md b/docs/moefication/README.md
diff --git a/docs/moefication/readme-image.png b/docs/moefication/readme-image.png
diff --git a/scripts/moefication/convert/run_convert_gradient_residual.sh b/scripts/moefication/convert/run_convert_gradient_residual.sh
@@ -13,7 +13,7 @@ expert_size=1376
 # 688 1376 2752 5504 11008
 # 864 1728 3456 6912 13824
 
-score_scale_factor_residual=4.0 #  4.0  8.0  12.0  16.0
+score_scale_factor_residual=1.0 #  4.0  8.0  12.0  16.0
 score_scale_factor=4.0          #  4.0  8.0  12.0  16.0
 
 convert_type=LlamaMoEResidualForCausalLM #  LlamaMoEResidualModel  LlamaMoEResidualForCausalLM  LlamaMoEResidualForSequenceClassification

diff --git a/scripts/moefication/split/run_split_graph.py b/scripts/moefication/split/run_split_graph.py
diff --git a/scripts/moefication/split/run_split_graph.sh b/scripts/moefication/split/run_split_graph.sh
@@ -4,9 +4,9 @@
 #  llama2_7B  llama2_13B  llama2_30B  llama2_base
 llama_size=llama_13B
 
-num_experts=8                         #  8  16
-metric=l2_norm                        #  l1_norm l2_norm plain
-template=layers.{}.mlp.up_proj.weight #  gate_proj  up_proj
+num_experts=16                        #  8  16
+metric=l1_norm                        #  l1_norm l2_norm plain
+proj_type=up_proj #  gate_proj  up_proj
 threshold=1
 
 data_path=/mnt/petrelfs/share_data/quxiaoye
@@ -25,7 +25,7 @@ for specify_layer in {0..39}; do
     --model_path ${model_path} \
     --save_path ${save_path} \
     --specify_layer ${specify_layer} \
-    --template ${template} \
+    --template layers.{}.mlp.${proj_type}.weight \
     --num_experts ${num_experts} \
     --threshold ${threshold} \
     --metric ${metric} \
@@ -38,14 +38,15 @@ wait
 
 gpmetis_run=/mnt/petrelfs/share_data/quxiaoye/metis_for_graph_split/bin/gpmetis
 template1=layers.
-template2=.mlp.up_proj.weight
+template2=.mlp.${proj_type}.weight
 
 for layer in {0..39}; do
   OMP_NUM_THREADS=8 srun --partition=MoE --job-name=split --mpi=pmi2 --gres=gpu:${gpus} -n1 --ntasks-per-node=1 -c ${cpus} --kill-on-bad-exit=1 --quotatype=auto \
     ${gpmetis_run} ${save_path}/${template1}${layer}${template2} ${num_experts} &
   sleep 0.7
 done
 wait
+
 # STEP3
 
 template3=.part.${num_experts}
@@ -57,4 +58,5 @@ for layer in {0..39}; do
   sleep 0.7
 done
 wait
+
 chmod -R 755 ${save_path} >/dev/null 2>&1
diff --git a/scripts/moefication/split/run_split_random.sh b/scripts/moefication/split/run_split_random.sh
@@ -5,8 +5,7 @@
 #  open_llama_7b
 llama_size="open_llama_7b"
 
-num_experts=8       #  8  16
-proj_type=gate_proj #  gate_proj  up_proj
+num_experts=8 #  8  16
 
 data_path=/mnt/petrelfs/share_data/quxiaoye
 model_path=${data_path}/models/${llama_size}
@@ -18,7 +17,7 @@ OMP_NUM_THREADS=2 srun --partition=MoE --job-name=split --mpi=pmi2 --gres=gpu:${
   python -m smoe.entrypoint.moefication.llama_split_random \
   --model_path ${model_path} \
   --save_path ${save_path} \
-  --template layers.{}.mlp.${proj_type}.weight \
+  --template layers.{}.mlp.up_proj.weight \
   --num_experts ${num_experts}
 
 chmod -R 755 ${save_path} >/dev/null 2>&1
diff --git a/smoe/entrypoint/moefication/llama_split_random.py b/smoe/entrypoint/moefication/llama_split_random.py
@@ -11,7 +11,7 @@
     parser = argparse.ArgumentParser()
     parser.add_argument('--model_path', type=str, default="/home/data/models/llama-transformers/7B")
     parser.add_argument('--save_path', type=str, default="/home/dongdz/workspace/moefication/llama_moe_temp_files/")
-    parser.add_argument('--template', type=str, default='layers.{}.mlp.gate_proj.weight')
+    parser.add_argument('--template', type=str, default='layers.{}.mlp.up_proj.weight')
     parser.add_argument('--num_experts', type=int, default=8, help='number of experts')
 
     args = parser.parse_args()