PaddlePaddle · zjjlivein · Mar 19, 2024 · Mar 19, 2024
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O1_DP2-MP1-PP4-SD2-stage1.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O1_DP2-MP1-PP4-SD2-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=1
+pp_degree=4
+bs_item=16 # micro * dp * pp
+fp_item=fp16O1
+run_mode=DP2-MP1-PP4-SD2-stage1
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=1
+level=o1
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O1_DP2-MP1-PP4-SD2-stage2.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O1_DP2-MP1-PP4-SD2-stage2.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=1
+pp_degree=4
+bs_item=16 # micro * dp * pp
+fp_item=fp16O1
+run_mode=DP2-MP1-PP4-SD2-stage2
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=2
+level=o1
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O1_DP2-MP2-PP2-SD2-stage1.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O1_DP2-MP2-PP2-SD2-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=2
+pp_degree=2
+bs_item=16 # micro * dp * pp
+fp_item=fp16O1
+run_mode=DP2-MP2-PP2-SD2-stage1
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=1
+level=o1
+local_batch_size=8
+
+model=gpt
+micro_bs=4 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O1_DP2-MP2-PP2-SD2-stage2.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O1_DP2-MP2-PP2-SD2-stage2.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=2
+pp_degree=2
+bs_item=16 # micro * dp * pp
+fp_item=fp16O1
+run_mode=DP2-MP2-PP2-SD2-stage2
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=2
+level=o1
+local_batch_size=8
+
+model=gpt
+micro_bs=4 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O2_DP2-MP1-PP4-SD2-stage1.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O2_DP2-MP1-PP4-SD2-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=1
+pp_degree=4
+bs_item=16 # micro * dp * pp
+fp_item=fp16O2
+run_mode=DP2-MP1-PP4-SD2-stage1
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=1
+level=o2
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O2_DP2-MP1-PP4-SD2-stage2.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O2_DP2-MP1-PP4-SD2-stage2.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=1
+pp_degree=4
+bs_item=16 # micro * dp * pp
+fp_item=fp16O2
+run_mode=DP2-MP1-PP4-SD2-stage2
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=2
+level=o2
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O2_DP2-MP2-PP2-SD2-stage1.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O2_DP2-MP2-PP2-SD2-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=2
+pp_degree=2
+bs_item=16 # micro * dp * pp
+fp_item=fp16O2
+run_mode=DP2-MP2-PP2-SD2-stage1
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=1
+level=o2
+local_batch_size=8
+
+model=gpt
+micro_bs=4 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O2_DP2-MP2-PP2-SD2-stage2.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O2_DP2-MP2-PP2-SD2-stage2.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=2
+pp_degree=2
+bs_item=16 # micro * dp * pp
+fp_item=fp16O2
+run_mode=DP2-MP2-PP2-SD2-stage2
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=2
+level=o2
+local_batch_size=8
+
+model=gpt
+micro_bs=4 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O3_DP2-MP1-PP4-SD2-stage1.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O3_DP2-MP1-PP4-SD2-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=1
+pp_degree=4
+bs_item=16 # micro * dp * pp
+fp_item=fp16O3
+run_mode=DP2-MP1-PP4-SD2-stage1
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=1
+level=o3
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O3_DP2-MP1-PP4-SD2-stage2.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O3_DP2-MP1-PP4-SD2-stage2.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=1
+pp_degree=4
+bs_item=16 # micro * dp * pp
+fp_item=fp16O3
+run_mode=DP2-MP1-PP4-SD2-stage2
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=2
+level=o3
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O3_DP2-MP2-PP2-SD2-stage1.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O3_DP2-MP2-PP2-SD2-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=2
+pp_degree=2
+bs_item=16 # micro * dp * pp
+fp_item=fp16O3
+run_mode=DP2-MP2-PP2-SD2-stage1
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=1
+level=o3
+local_batch_size=8
+
+model=gpt
+micro_bs=4 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O3_DP2-MP2-PP2-SD2-stage2.sh b/...t_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs16_fp16O3_DP2-MP2-PP2-SD2-stage2.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=2
+mp_degree=2
+pp_degree=2
+bs_item=16 # micro * dp * pp
+fp_item=fp16O3
+run_mode=DP2-MP2-PP2-SD2-stage2
+device_num=N1C8
+sharding_degree=2 # sharding_degree = dp_degree
+sharding_stage=2
+level=o3
+local_batch_size=8
+
+model=gpt
+micro_bs=4 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O1_DP1-MP1-PP8-SD1-stage1.sh b/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O1_DP1-MP1-PP8-SD1-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=1
+mp_degree=1
+pp_degree=8
+bs_item=8 # micro * dp * pp
+fp_item=fp16O1
+run_mode=DP1-MP1-PP8-SD1-stage1
+device_num=N1C8
+sharding_degree=1
+sharding_stage=1
+level=o1
+local_batch_size=8
+
+model=gpt
+micro_bs=1 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O1_DP1-MP2-PP4-SD1-stage1.sh b/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O1_DP1-MP2-PP4-SD1-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=1
+mp_degree=2
+pp_degree=4
+bs_item=8 # micro * dp * pp
+fp_item=fp16O1
+run_mode=DP1-MP2-PP4-SD1-stage1
+device_num=N1C8
+sharding_degree=1 # sharding_degree = dp_degree
+sharding_stage=1
+level=o1
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O2_DP1-MP1-PP8-SD1-stage1.sh b/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O2_DP1-MP1-PP8-SD1-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=1
+mp_degree=1
+pp_degree=8
+bs_item=8 # micro * dp * pp
+fp_item=fp16O2
+run_mode=DP1-MP1-PP8-SD1-stage1
+device_num=N1C8
+sharding_degree=1
+sharding_stage=1
+level=o2
+local_batch_size=8
+
+model=gpt
+micro_bs=1 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O2_DP1-MP2-PP4-SD1-stage1.sh b/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O2_DP1-MP2-PP4-SD1-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=1
+mp_degree=2
+pp_degree=4
+bs_item=8 # micro * dp * pp
+fp_item=fp16O2
+run_mode=DP1-MP2-PP4-SD1-stage1
+device_num=N1C8
+sharding_degree=1 # sharding_degree = dp_degree
+sharding_stage=1
+level=o2
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O3_DP1-MP1-PP8-SD1-stage1.sh b/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O3_DP1-MP1-PP8-SD1-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=1
+mp_degree=1
+pp_degree=8
+bs_item=8 # micro * dp * pp
+fp_item=fp16O3
+run_mode=DP1-MP1-PP8-SD1-stage1
+device_num=N1C8
+sharding_degree=1
+sharding_stage=1
+level=o3
+local_batch_size=8
+
+model=gpt
+micro_bs=1 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;
diff --git a/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O3_DP1-MP2-PP4-SD1-stage1.sh b/...st_tipc/gpt/static/new_exec_pp_pir/N1C8/gpt_auto_pir_bs8_fp16O3_DP1-MP2-PP4-SD1-stage1.sh
@@ -0,0 +1,21 @@
+model_item=gpt_auto_pir
+dp_degree=1
+mp_degree=2
+pp_degree=4
+bs_item=8 # micro * dp * pp
+fp_item=fp16O3
+run_mode=DP1-MP2-PP4-SD1-stage1
+device_num=N1C8
+sharding_degree=1 # sharding_degree = dp_degree
+sharding_stage=1
+level=o3
+local_batch_size=8
+
+model=gpt
+micro_bs=2 # local_batch_size / pp_degree
+
+cd ./benchmarks
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/prepare.sh
+# run
+bash ./test_tipc/gpt/static/new_exec_pp_pir/benchmark_common/run_benchmark.sh ${model_item} ${fp_item} ${dp_degree} ${mp_degree} ${pp_degree} ${micro_bs} ${bs_item} ${run_mode} ${device_num} \
+${sharding_degree} ${sharding_stage} ${level} 2>&1;