From d444a9a00beb44e7ebe63a943e7f376746731bcb Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Wed, 15 Jun 2022 12:44:57 +0000
Subject: [PATCH 01/10] add gpups_1n1c

---
 test_tipc/benchmark_train.sh                 | 256 ++++++++++++++++
 test_tipc/configs/dnn/train_infer_python.txt |  55 ++++
 test_tipc/doc/benchmark_train.md             |  57 ++++
 test_tipc/prepare.sh                         |  91 +++---
 test_tipc/scripts/analysis.py                | 300 +++++++++++++++++++
 test_tipc/test_train_inference_python.sh     |  78 ++++-
 tools/profiler.py                            | 110 +++++++
 tools/static_gpubox_trainer.py               |  13 +
 8 files changed, 901 insertions(+), 59 deletions(-)
 create mode 100644 test_tipc/benchmark_train.sh
 create mode 100755 test_tipc/configs/dnn/train_infer_python.txt
 create mode 100644 test_tipc/doc/benchmark_train.md
 mode change 100644 => 100755 test_tipc/prepare.sh
 create mode 100644 test_tipc/scripts/analysis.py
 mode change 100644 => 100755 test_tipc/test_train_inference_python.sh
 create mode 100644 tools/profiler.py

diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
new file mode 100644
index 000000000..84935c272
--- /dev/null
+++ b/test_tipc/benchmark_train.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+# set env
+python=python
+export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
+export model_commit=$(git log|head -n1|awk '{print $2}') 
+export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
+export frame_version=${str_tmp%%.post*}
+export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
+
+# run benchmark sh 
+# Usage:
+# bash run_benchmark_train.sh config.txt params
+# or 
+# bash run_benchmark_train.sh config.txt
+
+function func_parser_params(){
+    strs=$1
+    IFS="="
+    array=(${strs})
+    tmp=${array[1]}
+    echo ${tmp}
+}
+
+function func_sed_params(){
+    filename=$1
+    line=$2
+    param_value=$3
+    params=`sed -n "${line}p" $filename`
+    IFS=":"
+    array=(${params})
+    key=${array[0]}
+    value=${array[1]}
+    if [[ $value =~ 'benchmark_train' ]];then
+        IFS='='
+        _val=(${value})
+        param_value="${_val[0]}=${param_value}"
+    fi
+    new_params="${key}:${param_value}"
+    IFS=";"
+    cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
+    eval $cmd
+}
+
+function set_gpu_id(){
+    string=$1
+    _str=${string:1:6}
+    IFS="C"
+    arr=(${_str})
+    M=${arr[0]}
+    P=${arr[1]}
+    gn=`expr $P - 1`
+    gpu_num=`expr $gn / $M`
+    seq=`seq -s "," 0 $gpu_num`
+    echo $seq
+}
+
+function get_repo_name(){
+    IFS=";"
+    cur_dir=$(pwd)
+    IFS="/"
+    arr=(${cur_dir})
+    echo ${arr[-1]}
+}
+
+FILENAME=$1
+# copy FILENAME as new
+new_filename="./test_tipc/benchmark_train.txt"
+cmd=`yes|cp $FILENAME $new_filename`
+FILENAME=$new_filename
+# MODE must be one of ['benchmark_train']
+MODE=$2
+PARAMS=$3
+# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt  benchmark_train dynamic_bs8_null_DP_N1C1
+IFS=$'\n'
+# parser params from train_benchmark.txt
+dataline=`cat $FILENAME`
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+model_name=$(func_parser_value "${lines[1]}")
+
+# 获取benchmark_params所在的行数
+line_num=`grep -n "train_benchmark_params" $FILENAME  | cut -d ":" -f 1`
+# for train log parser
+batch_size=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+fp_items=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+epoch=$(func_parser_value "${lines[line_num]}")
+
+line_num=`expr $line_num + 1`
+profile_option_key=$(func_parser_key "${lines[line_num]}")
+profile_option_params=$(func_parser_value "${lines[line_num]}")
+profile_option="${profile_option_key}:${profile_option_params}"
+
+line_num=`expr $line_num + 1`
+flags_value=$(func_parser_value "${lines[line_num]}")
+# set flags
+IFS=";"
+flags_list=(${flags_value})
+for _flag in ${flags_list[*]}; do
+    cmd="export ${_flag}"
+    eval $cmd
+done
+
+# set log_name
+repo_name=$(get_repo_name )
+SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
+mkdir -p "${SAVE_LOG}/benchmark_log/"
+status_log="${SAVE_LOG}/benchmark_log/results.log"
+
+# The number of lines in which train params can be replaced.
+line_python=3
+line_gpuid=4
+line_precision=6
+line_epoch=7
+line_batchsize=9
+line_profile=13
+line_eval_py=24
+line_export_py=30
+
+func_sed_params "$FILENAME" "${line_eval_py}" "null"
+func_sed_params "$FILENAME" "${line_export_py}" "null"
+func_sed_params "$FILENAME" "${line_python}"  "$python"
+
+# if params
+if  [ ! -n "$PARAMS" ] ;then
+    # PARAMS input is not a word.
+    IFS="|"
+    batch_size_list=(${batch_size})
+    fp_items_list=(${fp_items})
+    device_num_list=(N1C4)
+    run_mode="DP"
+else
+    # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
+    IFS="_"
+    params_list=(${PARAMS})
+    model_type=${params_list[0]}
+    batch_size=${params_list[1]}
+    batch_size=`echo  ${batch_size} | tr -cd "[0-9]" `
+    precision=${params_list[2]}
+    # run_process_type=${params_list[3]}
+    run_mode=${params_list[3]}
+    device_num=${params_list[4]}
+    IFS=";"
+
+    if [ ${precision} = "null" ];then
+        precision="fp32"
+    fi
+
+    fp_items_list=($precision)
+    batch_size_list=($batch_size)
+    device_num_list=($device_num)
+fi
+
+IFS="|"
+for batch_size in ${batch_size_list[*]}; do 
+    for precision in ${fp_items_list[*]}; do
+        for device_num in ${device_num_list[*]}; do
+            # sed batchsize and precision
+            func_sed_params "$FILENAME" "${line_precision}" "$precision"
+            func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
+            func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
+            gpu_id=$(set_gpu_id $device_num)
+
+            if [ ${#gpu_id} -le 1 ];then
+                run_process_type="SingleP"
+                log_path="$SAVE_LOG/profiling_log"
+                mkdir -p $log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
+                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id 
+                # set profile_option params
+                tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
+
+                # run test_train_inference_python.sh
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                eval $cmd
+                eval "cat ${log_path}/${log_name}"
+
+                # without profile
+                log_path="$SAVE_LOG/train_log"
+                speed_log_path="$SAVE_LOG/index"
+                mkdir -p $log_path
+                mkdir -p $speed_log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
+                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed profile_id as null
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                job_bt=`date '+%Y%m%d%H%M%S'`
+                eval $cmd
+                job_et=`date '+%Y%m%d%H%M%S'`
+                export model_run_time=$((${job_et}-${job_bt}))
+                eval "cat ${log_path}/${log_name}"
+
+                # parser log
+                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
+                        --model_name ${_model_name} \
+                        --base_batch_size ${batch_size} \
+                        --run_mode ${run_mode} \
+                        --fp_item ${precision} \
+                        --keyword ips: \
+                        --skip_steps 2 \
+                        --device_num ${device_num} \
+                        --speed_unit samples/s \
+                        --convergence_key loss: "
+                echo $cmd
+                eval $cmd
+                last_status=${PIPESTATUS[0]}
+                status_check $last_status "${cmd}" "${status_log}"
+            else
+                IFS=";"
+                unset_env=`unset CUDA_VISIBLE_DEVICES`
+                run_process_type="MultiP"
+                log_path="$SAVE_LOG/train_log"
+                speed_log_path="$SAVE_LOG/index"
+                mkdir -p $log_path
+                mkdir -p $speed_log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
+                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+                func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id"  # sed used gpu_id 
+                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed --profile_option as null
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                job_bt=`date '+%Y%m%d%H%M%S'`
+                eval $cmd
+                job_et=`date '+%Y%m%d%H%M%S'`
+                export model_run_time=$((${job_et}-${job_bt}))
+                eval "cat ${log_path}/${log_name}"
+                # parser log
+                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+                
+                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
+                        --model_name ${_model_name} \
+                        --base_batch_size ${batch_size} \
+                        --run_mode ${run_mode} \
+                        --fp_item ${precision} \
+                        --keyword ips: \
+                        --skip_steps 2 \
+                        --device_num ${device_num} \
+                        --speed_unit images/s \
+                        --convergence_key loss: "
+                echo $cmd
+                eval $cmd
+                last_status=${PIPESTATUS[0]}
+                status_check $last_status "${cmd}" "${status_log}"
+            fi
+        done
+    done
+done
diff --git a/test_tipc/configs/dnn/train_infer_python.txt b/test_tipc/configs/dnn/train_infer_python.txt
new file mode 100755
index 000000000..b6b7dd591
--- /dev/null
+++ b/test_tipc/configs/dnn/train_infer_python.txt
@@ -0,0 +1,55 @@
+===========================train_params===========================
+model_name:dnn
+python:python3.7
+gpu_list:0|-1
+runner.use_gpu:True|False
+auto_cast:False
+runner.epochs:lite_train_lite_infer=4|whole_train_whole_infer=4|whole_infer=4|lite_train_whole_infer=4
+runner.model_save_path
+runner.train_batch_size:lite_train_lite_infer=50|whole_train_whole_infer=512|whole_infer=50|lite_train_whole_infer=50
+runner.infer_load_path:null
+train_model_name:lite_train_lite_infer=3|whole_train_whole_infer=3|whole_infer=3|lite_train_whole_infer=3
+runner.test_data_dir:test_tipc/data/infer
+runner.train_data_dir:../../../test_tipc/data/train
+##
+trainer:norm_train
+norm_train:-u tools/trainer.py -m ./models/rank/dnn/config_bigdata.yaml -o runner.print_interval=2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+runner.model_save_path:
+runner.model_init_path:
+norm_export:-u tools/to_static.py -m ./models/rank/dnn/config_bigdata.yaml -o runner.CE=true
+quant_export:null
+fpgm_export:null
+distill_export:null
+null:null
+null:null
+##
+infer_model:test_tipc/save_dnn_model
+infer_export:null
+infer_quant:False
+inference:-u tools/paddle_infer.py --model_name=dnn --reader_file=models/rank/dnn/criteo_reader.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--batchsize:10
+--enable_tensorRT:True|False
+--precision:fp32
+--model_dir:
+--data_dir:test_tipc/data/infer
+--save_log_path:./test_tipc/output/
+--benchmark:True
+null:null
+===========================train_benchmark_params=========================== 
+batchsize:2048
+epoch:3
+--profiler_options="batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile"
diff --git a/test_tipc/doc/benchmark_train.md b/test_tipc/doc/benchmark_train.md
new file mode 100644
index 000000000..cc3b17c7c
--- /dev/null
+++ b/test_tipc/doc/benchmark_train.md
@@ -0,0 +1,57 @@
+# TIPC Linux端Benchmark测试文档
+
+该文档为Benchmark测试说明，Benchmark预测功能测试的主程序为`benchmark_train.sh`，用于验证监控模型训练的性能。
+
+# 1. 测试流程
+## 1.1 准备数据和环境安装
+运行`test_tipc/prepare.sh`，完成训练数据准备和安装环境流程。
+
+```shell
+# 运行格式：bash test_tipc/prepare.sh  train_benchmark.txt  mode
+bash test_tipc/prepare.sh test_tipc/configs/dnn/train_benchmark.txt benchmark_train
+```
+
+## 1.2 功能测试
+执行`test_tipc/benchmark_train.sh`，完成模型训练和日志解析
+
+```shell
+# 运行格式：bash test_tipc/benchmark_train.sh train_benchmark.txt mode
+bash test_tipc/benchmark_train.sh test_tipc/configs/dnn/train_infer_python.txt benchmark_train
+```
+
+`test_tipc/benchmark_train.sh`支持根据传入的第三个参数实现只运行某一个训练配置，如下：
+```shell
+# 运行格式：bash test_tipc/benchmark_train.sh train_benchmark.txt mode
+bash test_tipc/benchmark_train.sh test_tipc/configs/dnn/train_infer_python.txt benchmark_train 
+```
+dynamic_bs8_fp32_DP_N1C1为test_tipc/benchmark_train.sh传入的参数，格式如下：
+`${modeltype}_${batch_size}_${fp_item}_${run_mode}_${device_num}`
+包含的信息有：模型类型、batchsize大小、训练精度如fp32,fp16等、分布式运行模式以及分布式训练使用的机器信息如单机单卡（N1C1）。
+
+
+## 2. 日志输出
+benchmark训练得到训练日志后，会自动保存训练日志并解析得到ips等信息, 在benchmark测试时，会自动调用{benchmark_root}/scrips/analysis.py
+
+BENCHMARK_ROOT 通过设置环境变量的方式来设置，比如：
+```
+export BENCHMARK_ROOT=/paddle/PaddleRec/test_tipc
+benchmark_train.sh在运行时会自动调用/paddle/PaddleRec/test_tipc/scripts/analysis.py
+```
+运行后将保存模型的训练日志和解析日志，使用 `test_tipc/configs/dnn/train_benchmark.txt` 参数文件的训练日志解析结果是：
+
+```
+{"model_branch": "gpups", "model_commit": "2ccd243761b39dffe037cef5160dda722f121311", "model_name": "dnn_bs2048_3_MultiP_DP", "batch_size": 2048, "fp_item": "3", "run_mode": "DP", "convergence_value": 0, "convergence_key": "loss:", "ips": 0, "speed_unit": "", "device_num": "N1C4", "model_run_time": "0", "frame_commit": "360b8383250774108a6561e7071d60189b0d0964", "frame_version": "0.0.0"}
+```
+
+训练日志和日志解析结果保存在benchmark_log目录下，文件组织格式如下：
+```
+train_log/
+├── index
+│   ├── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C1_speed
+│   └── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C4_speed
+├── profiling_log
+│   └── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C1_profiling
+└── train_log
+    ├── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C1_log
+    └── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C4_log
+```
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
old mode 100644
new mode 100755
index c4843e819..eeac2648d
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -18,7 +18,42 @@ model_name=$(func_parser_value "${lines[1]}")
 rm -rf ./test_tipc/data
 rm -rf ./test_tipc/output
 
-if [ ${model_name} == "wide_deep" ]; then
+if [ ${model_name} == "dnn" ]; then
+    # prepare pretrained weights and dataset 
+    wget -nc -P  ./test_tipc/save_dnn_model https://paddlerec.bj.bcebos.com/wide_deep/wide_deep.tar
+    cd test_tipc/save_dnn_model && tar -xvf wide_deep.tar && rm -rf wide_deep.tar && cd ../../
+    
+    mkdir -p ./test_tipc/data/train
+    mkdir -p ./test_tipc/data/infer
+    if [ ${MODE} = "lite_train_lite_infer" ];then
+        cp -r ./models/rank/dnn/data/sample_data/train/* ./test_tipc/data/train
+        cp -r ./models/rank/dnn/data/sample_data/train/* ./test_tipc/data/infer
+        echo "demo data ready"
+    elif [ ${MODE} = "whole_train_whole_infer" ];then
+        cd ./datasets/criteo
+        bash run.sh
+        cd ../..
+        cp -r ./datasets/criteo/slot_train_data_full/* ./test_tipc/data/train
+        cp -r ./datasets/criteo/slot_test_data_full/* ./test_tipc/data/infer
+        echo "whole data ready"
+    elif [ ${MODE} = "whole_infer" ];then
+        cd ./datasets/criteo
+        bash run.sh
+        cd ../..
+        cp -r ./models/rank/dnn/data/sample_data/train/* ./test_tipc/data/train
+        cp -r ./datasets/criteo/slot_test_data_full/* ./test_tipc/data/infer
+    elif [ ${MODE} = "lite_train_whole_infer" ];then
+        cd ./datasets/criteo
+        bash run.sh
+        cd ../..
+        cp -r ./models/rank/dnn/data/sample_data/train/* ./test_tipc/data/train
+        cp -r ./datasets/criteo/slot_test_data_full/* ./test_tipc/data/infer
+    elif [ ${MODE} = "benchmark_train" ];then
+        cp -r ./models/rank/dnn/data/sample_data/train/* ./test_tipc/data/train
+        echo "demo data ready"
+    fi
+ 
+elif [ ${model_name} == "wide_deep" ]; then
     # prepare pretrained weights and dataset 
     wget -nc -P  ./test_tipc/save_wide_deep_model https://paddlerec.bj.bcebos.com/wide_deep/wide_deep.tar
     cd test_tipc/save_wide_deep_model && tar -xvf wide_deep.tar && rm -rf wide_deep.tar && cd ../../
@@ -366,35 +401,6 @@ elif [ ${model_name} == "sign" ]; then
         cp -r ./models/rank/sign/data/* ./test_tipc/data/train
         cp -r ./datasets/sign/test/* ./test_tipc/data/infer
     fi
-elif [ ${model_name} == "fgcnn" ]; then
-    rm -rf ./test_tipc/data/*
-    mkdir -p ./test_tipc/data/train
-    mkdir -p ./test_tipc/data/infer
-    if [ ${MODE} = "lite_train_lite_infer" ];then
-        cp -r ./models/rank/fgcnn/data/trainlite/* ./test_tipc/data/train
-        cp -r ./models/rank/fgcnn/data/testlite/* ./test_tipc/data/infer
-        echo "demo data ready"
-    elif [ ${MODE} = "whole_train_whole_infer" ];then
-        cd ./datasets/criteo_fgcnn
-        bash run.sh
-        cd ../..
-        cp -r ./datasets/criteo_fgcnn/train/train.h5 ./test_tipc/data/train
-        cp -r ./datasets/criteo_fgcnn/test/valid.h5 ./test_tipc/data/infer
-        echo "whole data ready"
-    elif [ ${MODE} = "whole_infer" ];then
-        cd ./datasets/criteo_fgcnn
-        bash run.sh
-        cd ../..
-        cp -r ./datasets/criteo_fgcnn/train/train.h5 ./test_tipc/data/train
-        cp -r ./datasets/criteo_fgcnn/test/valid.h5 ./test_tipc/data/infer
-        echo "whole data ready"
-    elif [ ${MODE} = "lite_train_whole_infer" ];then
-        cd ./datasets/criteo_fgcnn
-        bash run.sh
-        cd ../..
-        cp -r ./models/rank/fgcnn/data/trainlite/* ./test_tipc/data/train
-        cp -r ./datasets/criteo_fgcnn/test/valid.h5 ./test_tipc/data/infer
-    fi
 elif [ ${model_name} == "iprec" ]; then
     mkdir -p ./test_tipc/data/train
     mkdir -p ./test_tipc/data/infer
@@ -422,29 +428,4 @@ elif [ ${model_name} == "iprec" ]; then
         cp -r ./datasets/iprec/whole_data/train/* ./test_tipc/data/train
         cp -r ./datasets/iprec/whole_data/test/* ./test_tipc/data/infer
     fi
-elif [ ${model_name} == "kim" ]; then
-    rm -rf ./test_tipc/data/*
-    mkdir -p ./test_tipc/data/train
-    if [ ${MODE} = "lite_train_lite_infer" ];then
-        cp -r ./models/match/kim/data/sample_data/* ./test_tipc/data/train
-        echo "demo data ready"
-    elif [ ${MODE} = "whole_train_whole_infer" ];then
-        cd ./datasets/kim
-        bash run.sh
-        cd ../..
-        cp -r ./datasets/kim/data/whole_data/* ./test_tipc/data/train
-        echo "whole data ready"
-    elif [ ${MODE} = "whole_infer" ];then
-        cd ./datasets/kim
-        bash run.sh
-        cd ../..
-        cp -r ./datasets/kim/data/whole_data/* ./test_tipc/data/train
-        echo "whole data ready"
-    elif [ ${MODE} = "lite_train_whole_infer" ];then
-        cd ./datasets/kim
-        bash run.sh
-        cd ../..
-        cp -r ./datasets/kim/data/whole_data/* ./test_tipc/data/train
-        echo "whole data ready"
-    fi
 fi
diff --git a/test_tipc/scripts/analysis.py b/test_tipc/scripts/analysis.py
new file mode 100644
index 000000000..9a3aae1fe
--- /dev/null
+++ b/test_tipc/scripts/analysis.py
@@ -0,0 +1,300 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import argparse
+import json
+import os
+import re
+import traceback
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--filename", type=str, help="The name of log which need to analysis.")
+    parser.add_argument(
+        "--speed_log_file", type=str, help="json file")
+    parser.add_argument(
+        "--log_with_profiler", type=str, help="The path of train log with profiler")
+    parser.add_argument(
+        "--profiler_path", type=str, help="The path of profiler timeline log.")
+    parser.add_argument(
+        "--keyword", type=str, help="Keyword to specify analysis data")
+    parser.add_argument(
+        "--separator", type=str, default=None, help="Separator of different field in log")
+    parser.add_argument(
+        '--position', type=int, default=None, help='The position of data field')
+    parser.add_argument(
+        '--range', type=str, default="", help='The range of data field to intercept')
+    parser.add_argument(
+        '--skip_steps', type=int, default=0, help='The number of steps to be skipped')
+    parser.add_argument(
+        '--model_mode', type=int, default=-1, help='Analysis mode, default value is -1')
+
+    parser.add_argument(
+        '--model_name', type=str, default="model_name", help='training model_name, transformer_base')
+    parser.add_argument(
+        '--base_batch_size', type=int, help='base_batch size on gpu')
+    parser.add_argument(
+        '--fp_item', type=str, help='fp_item:fp16|fp32')
+    parser.add_argument(
+        '--run_mode', type=str, default="DP", help='DP|MP|PP')
+    parser.add_argument(
+        '--convergence_key', type=str, default="", help="Keyword to specify loss data")
+    parser.add_argument(
+        '--speed_unit', type=str, default="images/s", help='IPS unit')
+    parser.add_argument(
+        '--device_num', type=str, default='N1C1', help='device_num:N1C1|N1C8|N4C32')
+    args = parser.parse_args()
+    args.separator = None if args.separator == "None" else args.separator
+    return args
+
+
+def _is_number(num):
+    pattern = re.compile(r'^[-+]?[-0-9]\d*\.\d*|[-+]?\.?[0-9]\d*$')
+    result = pattern.match(num)
+    if result:
+        return True
+    else:
+        return False
+
+
+class TimeAnalyzer(object):
+    def __init__(self, filename, keyword=None, separator=None, position=None, range="-1"):
+        if filename is None:
+            raise Exception("Please specify the filename!")
+
+        if keyword is None:
+            raise Exception("Please specify the keyword!")
+
+        self.filename = filename
+        self.keyword = keyword
+        self.separator = separator
+        self.position = position
+        self.range = range
+        self.records = None
+        self._distil()
+
+    def _distil(self):
+        self.records = []
+        with open(self.filename, "r") as f_object:
+            lines = f_object.readlines()
+            for line in lines:
+                if self.keyword not in line:
+                    continue
+                try:
+                    result = None
+
+                    # Distil the string from a line.
+                    line = line.strip()
+                    line_words = line.split(self.separator) if self.separator else line.split()
+                    if args.position:
+                        result = line_words[self.position]
+                    else:
+                        # Distil the string following the keyword.
+                        for i in range(len(line_words) - 1):
+                            if line_words[i] == self.keyword:
+                                result = line_words[i + 1]
+                                break
+
+                    # Distil the result from the picked string.
+                    if not self.range:
+                        result = result[0:]
+                    elif _is_number(self.range):
+                        result = result[0: int(self.range)]
+                    else:
+                        result = result[int(self.range.split(":")[0]): int(self.range.split(":")[1])]
+                    self.records.append(float(result))
+                except Exception as exc:
+                    print("line is: {}; separator={}; position={}".format(line, self.separator, self.position))
+
+        print("Extract {} records: separator={}; position={}".format(len(self.records), self.separator, self.position))
+
+    def _get_fps(self, mode, base_batch_size, gpu_num, avg_of_records, unit=None):
+        if mode == -1 :
+            assert unit, "Please set the unit when mode is -1."
+            fps = gpu_num * avg_of_records
+        elif mode == 0:
+            # s/step -> samples/s
+            fps = (base_batch_size * gpu_num) / avg_of_records
+            unit = "samples/s"
+        elif mode == 1:
+            # steps/s -> steps/s
+            fps = avg_of_records
+            unit = "steps/s"
+        elif mode == 2:
+            # s/step -> steps/s
+            fps = 1 / avg_of_records
+            unit = "steps/s"
+        elif mode == 3:
+            # steps/s -> samples/s
+            fps = base_batch_size * gpu_num * avg_of_records
+            unit = "samples/s"
+        elif mode == 4:
+            # s/epoch -> s/epoch
+            fps = avg_of_records
+            unit = "s/epoch"
+        else:
+            ValueError("Unsupported analysis mode.")
+
+        return fps, unit
+
+    def analysis(self, base_batch_size, gpu_num=1, skip_steps=0, mode=-1, unit=None):
+        if base_batch_size <= 0:
+            print("base_batch_size should larger than 0.")
+            return 0, ''
+
+        if len(self.records) <= skip_steps:  # to address the condition which item of log equals to skip_steps
+            print("no records")
+            return 0, ''
+
+        sum_of_records = 0
+        sum_of_records_skipped = 0
+        skip_min = self.records[skip_steps]
+        skip_max = self.records[skip_steps]
+
+        count = len(self.records)
+        for i in range(count):
+            sum_of_records += self.records[i]
+            if i >= skip_steps:
+                sum_of_records_skipped += self.records[i]
+                if self.records[i] < skip_min:
+                    skip_min = self.records[i]
+                if self.records[i] > skip_max:
+                    skip_max = self.records[i]
+
+        avg_of_records = sum_of_records / float(count)
+        avg_of_records_skipped = sum_of_records_skipped / float(count - skip_steps)
+
+        fps, fps_unit = self._get_fps(mode, base_batch_size, gpu_num, avg_of_records, unit)
+        fps_skipped, _ = self._get_fps(mode, base_batch_size, gpu_num, avg_of_records_skipped, unit)
+        if mode == -1:
+            print("average ips of %d steps, skip 0 step:" % count)
+            print("\tAvg: %.3f %s" % (avg_of_records, fps_unit))
+            print("\tFPS: %.3f %s" % (fps, fps_unit))
+            if skip_steps > 0:
+                print("average ips of %d steps, skip %d steps:" % (count, skip_steps))
+                print("\tAvg: %.3f %s" % (avg_of_records_skipped, fps_unit))
+                print("\tMin: %.3f %s" % (skip_min, fps_unit))
+                print("\tMax: %.3f %s" % (skip_max, fps_unit))
+                print("\tFPS: %.3f %s" % (fps_skipped, fps_unit))
+        elif mode == 1 or mode == 3:
+            print("average latency of %d steps, skip 0 step:" % count)
+            print("\tAvg: %.3f steps/s" % avg_of_records)
+            print("\tFPS: %.3f %s" % (fps, fps_unit))
+            if skip_steps > 0:
+                print("average latency of %d steps, skip %d steps:" % (count, skip_steps))
+                print("\tAvg: %.3f steps/s" % avg_of_records_skipped)
+                print("\tMin: %.3f steps/s" % skip_min)
+                print("\tMax: %.3f steps/s" % skip_max)
+                print("\tFPS: %.3f %s" % (fps_skipped, fps_unit))
+        elif mode == 0 or mode == 2:
+            print("average latency of %d steps, skip 0 step:" % count)
+            print("\tAvg: %.3f s/step" % avg_of_records)
+            print("\tFPS: %.3f %s" % (fps, fps_unit))
+            if skip_steps > 0:
+                print("average latency of %d steps, skip %d steps:" % (count, skip_steps))
+                print("\tAvg: %.3f s/step" % avg_of_records_skipped)
+                print("\tMin: %.3f s/step" % skip_min)
+                print("\tMax: %.3f s/step" % skip_max)
+                print("\tFPS: %.3f %s" % (fps_skipped, fps_unit))
+
+        return round(fps_skipped, 3), fps_unit
+
+
+class ExceptionTest(Exception):
+    pass
+
+
+class LossAnalyzer(object):
+    def __init__(self, filename, convergence_key=None, separator=None):
+        if filename is None:
+            raise Exception("Please specify the filename!")
+        if convergence_key is None:
+            raise Exception("Please specify the keyword of loss!")
+        self.filename = filename
+        self.convergence_key = convergence_key
+        self.separator = separator
+
+    def get_loss(self):
+        with open(self.filename, "r") as f_object:
+            lines = f_object.readlines()
+            lines.reverse()
+            result_loss = 0
+            for line in lines:
+                if self.convergence_key not in line:
+                    continue
+                try:
+                    result_loss = 0
+                    line = line.strip()
+                    line_words = line.split(self.separator) if self.separator else line.split()
+                    for i in range(len(line_words) - 1):
+                        if line_words[i] == self.convergence_key:
+                            result_loss = line_words[i + 1]
+                            result_loss = result_loss.replace(',', '')
+                            raise ExceptionTest()
+                except ExceptionTest:
+                    break
+        print("\tLoss: {}".format(result_loss))
+        return result_loss
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    run_info = dict()
+    run_info["model_branch"] = os.getenv("model_branch")
+    run_info["model_commit"] = os.getenv("model_commit")
+    run_info["model_name"] = args.model_name
+    run_info["batch_size"] = args.base_batch_size
+    run_info["fp_item"] = args.fp_item
+    if re.match(r'DP.-MP.-PP.', args.run_mode) or 'DP_MoE_C' in args.run_mode:
+        run_info["run_mode"] = 'Collective'
+    else:
+        run_info["run_mode"] = args.run_mode
+    run_info["convergence_value"] = 0
+    run_info["convergence_key"] = args.convergence_key
+    run_info["ips"] = 0
+    run_info["speed_unit"] = args.speed_unit
+    run_info["device_num"] = args.device_num
+    run_info["model_run_time"] = os.getenv('model_run_time')
+    run_info["frame_commit"] = os.getenv('frame_commit')
+    run_info["frame_version"] = os.getenv('frame_version')
+    device_num = args.device_num
+    print("---device_num:-", device_num)
+    index_c = device_num.index('C')
+    print("---index_c:-", index_c)
+    gpu_num = int(device_num[index_c + 1:len(device_num)])
+    print("-----gpu_num:", gpu_num)
+    if "pwgan" in args.model_name:
+        print("------analysis ", args.model_name)
+        args.keyword="avg_ips:"
+
+    try:
+        analyzer = TimeAnalyzer(args.filename, args.keyword, args.separator, args.position, args.range)
+        run_info["ips"], run_info["speed_unit"] = analyzer.analysis(
+            base_batch_size=args.base_batch_size,
+            gpu_num=gpu_num,
+            skip_steps=args.skip_steps,
+            mode=args.model_mode,
+            unit=args.speed_unit)
+        if args.convergence_key != "":
+            loss_analyzer = LossAnalyzer(args.filename, args.convergence_key)
+            run_info["convergence_value"] = loss_analyzer.get_loss()
+    except Exception:
+        traceback.print_exc()
+    print("{}".format(json.dumps(run_info)))  # it's required, for the log file path  insert to the database
+    with open(args.speed_log_file, "w") as f:
+        f.write(json.dumps(run_info))
diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh
old mode 100644
new mode 100755
index 56c6ff7e7..bc13ddf5d
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@@ -205,6 +205,48 @@ function func_inference(){
     done
 }
 
+if [ ${MODE} = "benchmark_train" ]; then
+	if [ ! -d "./log" ]; then
+	  mkdir ./log
+	  echo "Create log floder for store running log"
+	fi
+
+	export FLAGS_LAUNCH_BARRIER=0
+	export PADDLE_TRAINER_ID=0
+	export PADDLE_PSERVER_NUMS=1
+	export PADDLE_TRAINERS=1
+	export PADDLE_TRAINERS_NUM=${PADDLE_TRAINERS}
+	export POD_IP=127.0.0.1
+
+	# set free port if 29011 is occupied
+	export PADDLE_PSERVERS_IP_PORT_LIST="127.0.0.1:29011"
+	export PADDLE_PSERVER_PORT_ARRAY=(29011)
+
+	# set gpu numbers according to your device
+	export FLAGS_selected_gpus="0,1,2,3,4,5,6,7"
+
+	# set your model yaml
+	SC="tools/static_gpubox_trainer.py -m models/rank/dnn/config_gpubox.yaml"
+
+	# run pserver
+	export TRAINING_ROLE=PSERVER
+	for((i=0;i<$PADDLE_PSERVER_NUMS;i++))
+	do
+		cur_port=${PADDLE_PSERVER_PORT_ARRAY[$i]}
+		echo "PADDLE WILL START PSERVER "$cur_port
+		export PADDLE_PORT=${cur_port}
+		python3.7 -u $SC &> ./log/pserver.$i.log &
+	done
+
+	# run trainer
+	export TRAINING_ROLE=TRAINER
+	for((i=0;i<$PADDLE_TRAINERS;i++))
+	do
+		echo "PADDLE WILL START Trainer "$i
+		export PADDLE_TRAINER_ID=$i
+		python3.7 -u $SC &> ./log/worker.$i.log
+	done
+fi
 if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
     GPUID=$3
     if [ ${#GPUID} -le 0 ];then
@@ -324,15 +366,43 @@ else
                 set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
                 if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
                     cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
+                    eval "unset CUDA_VISIBLE_DEVICES"
+                    eval $cmd
+                    status_check $? "${cmd}" "${status_log}"
+
                 elif [ ${#ips} -le 26 ];then  # train with multi-gpu
-                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
+                    # run pserver
+                    export TRAINING_ROLE=PSERVER
+                    for((i=0;i<$PADDLE_PSERVER_NUMS;i++))
+                    do
+                        cur_port=${PADDLE_PSERVER_PORT_ARRAY[$i]}
+                        echo "PADDLE WILL START PSERVER "$cur_port
+                        export PADDLE_PORT=${cur_port}
+                        cmd="${python} ${SC} &> ./log/pserver.$i.log &"
+                        eval "unset CUDA_VISIBLE_DEVICES"
+                        eval $cmd
+                        status_check $? "${cmd}" "${status_log}"
+                    done
+
+                    # run trainer
+                    export TRAINING_ROLE=TRAINER
+                    for((i=0;i<$PADDLE_TRAINERS;i++))
+                    do
+                        echo "PADDLE WILL START Trainer "$i
+                        export PADDLE_TRAINER_ID=$i
+                        cmd="${python} ${SC} &> ./log/worker.$i.log &"
+                        eval "unset CUDA_VISIBLE_DEVICES"
+                        eval $cmd
+                        status_check $? "${cmd}" "${status_log}"
+                    done
                 else     # train with multi-machine
                     cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
+                    eval "unset CUDA_VISIBLE_DEVICES"
+                    eval $cmd
+                    status_check $? "${cmd}" "${status_log}"
+
                 fi
                 # run train
-                eval "unset CUDA_VISIBLE_DEVICES"
-                eval $cmd
-                status_check $? "${cmd}" "${status_log}"
 
                 set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
                 # save norm trained models to set pretrain for pact training and fpgm training 
diff --git a/tools/profiler.py b/tools/profiler.py
new file mode 100644
index 000000000..c4e28bc6b
--- /dev/null
+++ b/tools/profiler.py
@@ -0,0 +1,110 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import paddle
+
+# A global variable to record the number of calling times for profiler
+# functions. It is used to specify the tracing range of training steps.
+_profiler_step_id = 0
+
+# A global variable to avoid parsing from string every time.
+_profiler_options = None
+
+
+class ProfilerOptions(object):
+    '''
+    Use a string to initialize a ProfilerOptions.
+    The string should be in the format: "key1=value1;key2=value;key3=value3".
+    For example:
+      "profile_path=model.profile"
+      "batch_range=[50, 60]; profile_path=model.profile"
+      "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
+    ProfilerOptions supports following key-value pair:
+      batch_range      - a integer list, e.g. [100, 110].
+      state            - a string, the optional values are 'CPU', 'GPU' or 'All'. 
+      sorted_key       - a string, the optional values are 'calls', 'total',
+                         'max', 'min' or 'ave.
+      tracer_option    - a string, the optional values are 'Default', 'OpDetail',
+                         'AllOpDetail'.
+      profile_path     - a string, the path to save the serialized profile data,
+                         which can be used to generate a timeline.
+      exit_on_finished - a boolean.
+    '''
+
+    def __init__(self, options_str):
+        assert isinstance(options_str, str)
+
+        self._options = {
+            'batch_range': [10, 20],
+            'state': 'All',
+            'sorted_key': 'total',
+            'tracer_option': 'Default',
+            'profile_path': '/tmp/profile',
+            'exit_on_finished': True
+        }
+        self._parse_from_string(options_str)
+
+    def _parse_from_string(self, options_str):
+        for kv in options_str.replace(' ', '').split(';'):
+            key, value = kv.split('=')
+            if key == 'batch_range':
+                value_list = value.replace('[', '').replace(']', '').split(',')
+                value_list = list(map(int, value_list))
+                if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
+                        1] > value_list[0]:
+                    self._options[key] = value_list
+            elif key == 'exit_on_finished':
+                self._options[key] = value.lower() in ("yes", "true", "t", "1")
+            elif key in [
+                    'state', 'sorted_key', 'tracer_option', 'profile_path'
+            ]:
+                self._options[key] = value
+
+    def __getitem__(self, name):
+        if self._options.get(name, None) is None:
+            raise ValueError(
+                "ProfilerOptions does not have an option named %s." % name)
+        return self._options[name]
+
+
+def add_profiler_step(options_str=None):
+    '''
+    Enable the operator-level timing using PaddlePaddle's profiler.
+    The profiler uses a independent variable to count the profiler steps.
+    One call of this function is treated as a profiler step.
+    
+    Args:
+      profiler_options - a string to initialize the ProfilerOptions.
+                         Default is None, and the profiler is disabled.
+    '''
+    if options_str is None:
+        return
+
+    global _profiler_step_id
+    global _profiler_options
+
+    if _profiler_options is None:
+        _profiler_options = ProfilerOptions(options_str)
+
+    if _profiler_step_id == _profiler_options['batch_range'][0]:
+        paddle.utils.profiler.start_profiler(
+            _profiler_options['state'], _profiler_options['tracer_option'])
+    elif _profiler_step_id == _profiler_options['batch_range'][1]:
+        paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
+                                            _profiler_options['profile_path'])
+        if _profiler_options['exit_on_finished']:
+            sys.exit(0)
+
+    _profiler_step_id += 1
diff --git a/tools/static_gpubox_trainer.py b/tools/static_gpubox_trainer.py
index 7579195ad..fb0030d08 100755
--- a/tools/static_gpubox_trainer.py
+++ b/tools/static_gpubox_trainer.py
@@ -27,6 +27,7 @@
 import warnings
 import logging
 from paddle.fluid.incubate.fleet.utils.fleet_util import FleetUtil
+import profiler
 fleet_util = FleetUtil()
 
 __dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -45,12 +46,19 @@ def parse_args():
         type=str,
         required=True,
         help='config file path')
+    parser.add_argument(
+        '--profiler_options',
+        type=str,
+        default=None,
+        help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".'
+    )
     args = parser.parse_args()
     args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml))
     yaml_helper = YamlHelper()
     config = yaml_helper.load_yaml(args.config_yaml)
     config["yaml_path"] = args.config_yaml
     config["config_abs_dir"] = args.abs_dir
+    config["profiler_options"] = args.profiler_options
     yaml_helper.print_yaml(config)
     return config
 
@@ -59,6 +67,7 @@ class Main(object):
     def __init__(self, config):
         self.metrics = {}
         self.config = config
+        self.profiler_options = config.get("profiler_options")
         self.input_data = None
         self.reader = None
         self.exe = None
@@ -221,6 +230,7 @@ def dataset_train_loop(self, epoch):
         ]
         fetch_vars = [var for _, var in self.metrics.items()]
         print_step = int(config.get("runner.print_interval"))
+        profiler.add_profiler_step(self.profiler_options)
         self.exe.train_from_dataset(
             program=paddle.static.default_main_program(),
             dataset=self.reader,
@@ -235,6 +245,7 @@ def dataloader_train_loop(self, epoch):
         while True:
             try:
                 train_start = time.time()
+                profiler.add_profiler_step(self.profiler_options)
                 # --------------------------------------------------- #
                 fetch_var = self.exe.run(
                     program=paddle.static.default_main_program(),
@@ -280,6 +291,7 @@ def recdataset_train_loop(self, epoch):
         for batch_id, batch_data in enumerate(self.reader()):
             train_reader_cost += time.time() - reader_start
             train_start = time.time()
+            profiler.add_profiler_step(self.profiler_options)
             # --------------------------------------------------- #
             fetch_batch_var = self.exe.run(
                 program=paddle.static.default_main_program(),
@@ -325,6 +337,7 @@ def heter_train_loop(self, epoch):
             while True:
                 try:
                     train_start = time.time()
+                    profiler.add_profiler_step(self.profiler_options)
                     # --------------------------------------------------- #
                     self.exe.run(program=paddle.static.default_main_program())
                     # --------------------------------------------------- #

From 7885ab91c6eabee4ac2985161b7249aaf202c0dd Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Wed, 15 Jun 2022 12:50:08 +0000
Subject: [PATCH 02/10] add gpups_1n1c

---
 test_tipc/prepare.sh | 54 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index eeac2648d..07adbaf86 100755
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -428,4 +428,58 @@ elif [ ${model_name} == "iprec" ]; then
         cp -r ./datasets/iprec/whole_data/train/* ./test_tipc/data/train
         cp -r ./datasets/iprec/whole_data/test/* ./test_tipc/data/infer
     fi
+elif [ ${model_name} == "kim" ]; then
+    rm -rf ./test_tipc/data/*
+    mkdir -p ./test_tipc/data/train
+    if [ ${MODE} = "lite_train_lite_infer" ];then
+        cp -r ./models/match/kim/data/sample_data/* ./test_tipc/data/train
+        echo "demo data ready"
+    elif [ ${MODE} = "whole_train_whole_infer" ];then
+        cd ./datasets/kim
+        bash run.sh
+        cd ../..
+        cp -r ./datasets/kim/data/whole_data/* ./test_tipc/data/train
+        echo "whole data ready"
+    elif [ ${MODE} = "whole_infer" ];then
+        cd ./datasets/kim
+        bash run.sh
+        cd ../..
+        cp -r ./datasets/kim/data/whole_data/* ./test_tipc/data/train
+        echo "whole data ready"
+    elif [ ${MODE} = "lite_train_whole_infer" ];then
+        cd ./datasets/kim
+        bash run.sh
+        cd ../..
+        cp -r ./datasets/kim/data/whole_data/* ./test_tipc/data/train
+        echo "whole data ready"
+    fi
+elif [ ${model_name} == "fgcnn" ]; then
+    rm -rf ./test_tipc/data/*
+    mkdir -p ./test_tipc/data/train
+    mkdir -p ./test_tipc/data/infer
+    if [ ${MODE} = "lite_train_lite_infer" ];then
+        cp -r ./models/rank/fgcnn/data/trainlite/* ./test_tipc/data/train
+        cp -r ./models/rank/fgcnn/data/testlite/* ./test_tipc/data/infer
+        echo "demo data ready"
+    elif [ ${MODE} = "whole_train_whole_infer" ];then
+        cd ./datasets/criteo_fgcnn
+        bash run.sh
+        cd ../..
+        cp -r ./datasets/criteo_fgcnn/train/train.h5 ./test_tipc/data/train
+        cp -r ./datasets/criteo_fgcnn/test/valid.h5 ./test_tipc/data/infer
+        echo "whole data ready"
+    elif [ ${MODE} = "whole_infer" ];then
+        cd ./datasets/criteo_fgcnn
+        bash run.sh
+        cd ../..
+        cp -r ./datasets/criteo_fgcnn/train/train.h5 ./test_tipc/data/train
+        cp -r ./datasets/criteo_fgcnn/test/valid.h5 ./test_tipc/data/infer
+        echo "whole data ready"
+    elif [ ${MODE} = "lite_train_whole_infer" ];then
+        cd ./datasets/criteo_fgcnn
+        bash run.sh
+        cd ../..
+        cp -r ./models/rank/fgcnn/data/trainlite/* ./test_tipc/data/train
+        cp -r ./datasets/criteo_fgcnn/test/valid.h5 ./test_tipc/data/infer
+    fi
 fi

From 6308f2e987408425b5b27e6ab5f728654614e6b5 Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Wed, 22 Jun 2022 11:46:23 +0000
Subject: [PATCH 03/10] fix log sytle

---
 test_tipc/scripts/analysis.py            | 127 ++++++++++++++++-------
 test_tipc/test_train_inference_python.sh |   8 +-
 tools/static_gpubox_trainer.py           |   2 +-
 3 files changed, 95 insertions(+), 42 deletions(-)

diff --git a/test_tipc/scripts/analysis.py b/test_tipc/scripts/analysis.py
index 9a3aae1fe..d17bdf8d7 100644
--- a/test_tipc/scripts/analysis.py
+++ b/test_tipc/scripts/analysis.py
@@ -25,39 +25,62 @@ def parse_args():
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
         "--filename", type=str, help="The name of log which need to analysis.")
+    parser.add_argument("--speed_log_file", type=str, help="json file")
     parser.add_argument(
-        "--speed_log_file", type=str, help="json file")
-    parser.add_argument(
-        "--log_with_profiler", type=str, help="The path of train log with profiler")
+        "--log_with_profiler",
+        type=str,
+        help="The path of train log with profiler")
     parser.add_argument(
         "--profiler_path", type=str, help="The path of profiler timeline log.")
     parser.add_argument(
         "--keyword", type=str, help="Keyword to specify analysis data")
     parser.add_argument(
-        "--separator", type=str, default=None, help="Separator of different field in log")
+        "--separator",
+        type=str,
+        default=None,
+        help="Separator of different field in log")
     parser.add_argument(
-        '--position', type=int, default=None, help='The position of data field')
+        '--position',
+        type=int,
+        default=None,
+        help='The position of data field')
     parser.add_argument(
-        '--range', type=str, default="", help='The range of data field to intercept')
+        '--range',
+        type=str,
+        default="",
+        help='The range of data field to intercept')
     parser.add_argument(
-        '--skip_steps', type=int, default=0, help='The number of steps to be skipped')
+        '--skip_steps',
+        type=int,
+        default=0,
+        help='The number of steps to be skipped')
     parser.add_argument(
-        '--model_mode', type=int, default=-1, help='Analysis mode, default value is -1')
+        '--model_mode',
+        type=int,
+        default=-1,
+        help='Analysis mode, default value is -1')
 
     parser.add_argument(
-        '--model_name', type=str, default="model_name", help='training model_name, transformer_base')
+        '--model_name',
+        type=str,
+        default="model_name",
+        help='training model_name, transformer_base')
     parser.add_argument(
         '--base_batch_size', type=int, help='base_batch size on gpu')
+    parser.add_argument('--fp_item', type=str, help='fp_item:fp16|fp32')
+    parser.add_argument('--run_mode', type=str, default="DP", help='DP|MP|PP')
     parser.add_argument(
-        '--fp_item', type=str, help='fp_item:fp16|fp32')
-    parser.add_argument(
-        '--run_mode', type=str, default="DP", help='DP|MP|PP')
-    parser.add_argument(
-        '--convergence_key', type=str, default="", help="Keyword to specify loss data")
+        '--convergence_key',
+        type=str,
+        default="",
+        help="Keyword to specify loss data")
     parser.add_argument(
         '--speed_unit', type=str, default="images/s", help='IPS unit')
     parser.add_argument(
-        '--device_num', type=str, default='N1C1', help='device_num:N1C1|N1C8|N4C32')
+        '--device_num',
+        type=str,
+        default='N1C1',
+        help='device_num:N1C1|N1C8|N4C32')
     args = parser.parse_args()
     args.separator = None if args.separator == "None" else args.separator
     return args
@@ -73,7 +96,12 @@ def _is_number(num):
 
 
 class TimeAnalyzer(object):
-    def __init__(self, filename, keyword=None, separator=None, position=None, range="-1"):
+    def __init__(self,
+                 filename,
+                 keyword=None,
+                 separator=None,
+                 position=None,
+                 range="-1"):
         if filename is None:
             raise Exception("Please specify the filename!")
 
@@ -100,7 +128,8 @@ def _distil(self):
 
                     # Distil the string from a line.
                     line = line.strip()
-                    line_words = line.split(self.separator) if self.separator else line.split()
+                    line_words = line.split(
+                        self.separator) if self.separator else line.split()
                     if args.position:
                         result = line_words[self.position]
                     else:
@@ -114,17 +143,25 @@ def _distil(self):
                     if not self.range:
                         result = result[0:]
                     elif _is_number(self.range):
-                        result = result[0: int(self.range)]
+                        result = result[0:int(self.range)]
                     else:
-                        result = result[int(self.range.split(":")[0]): int(self.range.split(":")[1])]
+                        result = result[int(self.range.split(":")[0]):int(
+                            self.range.split(":")[1])]
                     self.records.append(float(result))
                 except Exception as exc:
-                    print("line is: {}; separator={}; position={}".format(line, self.separator, self.position))
-
-        print("Extract {} records: separator={}; position={}".format(len(self.records), self.separator, self.position))
-
-    def _get_fps(self, mode, base_batch_size, gpu_num, avg_of_records, unit=None):
-        if mode == -1 :
+                    print("line is: {}; separator={}; position={}".format(
+                        line, self.separator, self.position))
+
+        print("Extract {} records: separator={}; position={}".format(
+            len(self.records), self.separator, self.position))
+
+    def _get_fps(self,
+                 mode,
+                 base_batch_size,
+                 gpu_num,
+                 avg_of_records,
+                 unit=None):
+        if mode == -1:
             assert unit, "Please set the unit when mode is -1."
             fps = gpu_num * avg_of_records
         elif mode == 0:
@@ -152,12 +189,19 @@ def _get_fps(self, mode, base_batch_size, gpu_num, avg_of_records, unit=None):
 
         return fps, unit
 
-    def analysis(self, base_batch_size, gpu_num=1, skip_steps=0, mode=-1, unit=None):
+    def analysis(self,
+                 base_batch_size,
+                 gpu_num=1,
+                 skip_steps=0,
+                 mode=-1,
+                 unit=None):
         if base_batch_size <= 0:
             print("base_batch_size should larger than 0.")
             return 0, ''
 
-        if len(self.records) <= skip_steps:  # to address the condition which item of log equals to skip_steps
+        if len(
+                self.records
+        ) <= skip_steps:  # to address the condition which item of log equals to skip_steps
             print("no records")
             return 0, ''
 
@@ -177,16 +221,20 @@ def analysis(self, base_batch_size, gpu_num=1, skip_steps=0, mode=-1, unit=None)
                     skip_max = self.records[i]
 
         avg_of_records = sum_of_records / float(count)
-        avg_of_records_skipped = sum_of_records_skipped / float(count - skip_steps)
+        avg_of_records_skipped = sum_of_records_skipped / float(count -
+                                                                skip_steps)
 
-        fps, fps_unit = self._get_fps(mode, base_batch_size, gpu_num, avg_of_records, unit)
-        fps_skipped, _ = self._get_fps(mode, base_batch_size, gpu_num, avg_of_records_skipped, unit)
+        fps, fps_unit = self._get_fps(mode, base_batch_size, gpu_num,
+                                      avg_of_records, unit)
+        fps_skipped, _ = self._get_fps(mode, base_batch_size, gpu_num,
+                                       avg_of_records_skipped, unit)
         if mode == -1:
             print("average ips of %d steps, skip 0 step:" % count)
             print("\tAvg: %.3f %s" % (avg_of_records, fps_unit))
             print("\tFPS: %.3f %s" % (fps, fps_unit))
             if skip_steps > 0:
-                print("average ips of %d steps, skip %d steps:" % (count, skip_steps))
+                print("average ips of %d steps, skip %d steps:" %
+                      (count, skip_steps))
                 print("\tAvg: %.3f %s" % (avg_of_records_skipped, fps_unit))
                 print("\tMin: %.3f %s" % (skip_min, fps_unit))
                 print("\tMax: %.3f %s" % (skip_max, fps_unit))
@@ -196,7 +244,8 @@ def analysis(self, base_batch_size, gpu_num=1, skip_steps=0, mode=-1, unit=None)
             print("\tAvg: %.3f steps/s" % avg_of_records)
             print("\tFPS: %.3f %s" % (fps, fps_unit))
             if skip_steps > 0:
-                print("average latency of %d steps, skip %d steps:" % (count, skip_steps))
+                print("average latency of %d steps, skip %d steps:" %
+                      (count, skip_steps))
                 print("\tAvg: %.3f steps/s" % avg_of_records_skipped)
                 print("\tMin: %.3f steps/s" % skip_min)
                 print("\tMax: %.3f steps/s" % skip_max)
@@ -206,7 +255,8 @@ def analysis(self, base_batch_size, gpu_num=1, skip_steps=0, mode=-1, unit=None)
             print("\tAvg: %.3f s/step" % avg_of_records)
             print("\tFPS: %.3f %s" % (fps, fps_unit))
             if skip_steps > 0:
-                print("average latency of %d steps, skip %d steps:" % (count, skip_steps))
+                print("average latency of %d steps, skip %d steps:" %
+                      (count, skip_steps))
                 print("\tAvg: %.3f s/step" % avg_of_records_skipped)
                 print("\tMin: %.3f s/step" % skip_min)
                 print("\tMax: %.3f s/step" % skip_max)
@@ -240,7 +290,8 @@ def get_loss(self):
                 try:
                     result_loss = 0
                     line = line.strip()
-                    line_words = line.split(self.separator) if self.separator else line.split()
+                    line_words = line.split(
+                        self.separator) if self.separator else line.split()
                     for i in range(len(line_words) - 1):
                         if line_words[i] == self.convergence_key:
                             result_loss = line_words[i + 1]
@@ -280,10 +331,11 @@ def get_loss(self):
     print("-----gpu_num:", gpu_num)
     if "pwgan" in args.model_name:
         print("------analysis ", args.model_name)
-        args.keyword="avg_ips:"
+        args.keyword = "avg_ips:"
 
     try:
-        analyzer = TimeAnalyzer(args.filename, args.keyword, args.separator, args.position, args.range)
+        analyzer = TimeAnalyzer(args.filename, args.keyword, args.separator,
+                                args.position, args.range)
         run_info["ips"], run_info["speed_unit"] = analyzer.analysis(
             base_batch_size=args.base_batch_size,
             gpu_num=gpu_num,
@@ -295,6 +347,7 @@ def get_loss(self):
             run_info["convergence_value"] = loss_analyzer.get_loss()
     except Exception:
         traceback.print_exc()
-    print("{}".format(json.dumps(run_info)))  # it's required, for the log file path  insert to the database
+    print("{}".format(json.dumps(run_info))
+          )  # it's required, for the log file path  insert to the database
     with open(args.speed_log_file, "w") as f:
         f.write(json.dumps(run_info))
diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh
index bc13ddf5d..8ca3abf40 100755
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@@ -235,7 +235,7 @@ if [ ${MODE} = "benchmark_train" ]; then
 		cur_port=${PADDLE_PSERVER_PORT_ARRAY[$i]}
 		echo "PADDLE WILL START PSERVER "$cur_port
 		export PADDLE_PORT=${cur_port}
-		python3.7 -u $SC &> ./log/pserver.$i.log &
+		python3.7 -u $SC 
 	done
 
 	# run trainer
@@ -244,7 +244,7 @@ if [ ${MODE} = "benchmark_train" ]; then
 	do
 		echo "PADDLE WILL START Trainer "$i
 		export PADDLE_TRAINER_ID=$i
-		python3.7 -u $SC &> ./log/worker.$i.log
+		python3.7 -u $SC 
 	done
 fi
 if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
@@ -378,7 +378,7 @@ else
                         cur_port=${PADDLE_PSERVER_PORT_ARRAY[$i]}
                         echo "PADDLE WILL START PSERVER "$cur_port
                         export PADDLE_PORT=${cur_port}
-                        cmd="${python} ${SC} &> ./log/pserver.$i.log &"
+                        cmd="${python} ${SC}"
                         eval "unset CUDA_VISIBLE_DEVICES"
                         eval $cmd
                         status_check $? "${cmd}" "${status_log}"
@@ -390,7 +390,7 @@ else
                     do
                         echo "PADDLE WILL START Trainer "$i
                         export PADDLE_TRAINER_ID=$i
-                        cmd="${python} ${SC} &> ./log/worker.$i.log &"
+                        cmd="${python} ${SC}"
                         eval "unset CUDA_VISIBLE_DEVICES"
                         eval $cmd
                         status_check $? "${cmd}" "${status_log}"
diff --git a/tools/static_gpubox_trainer.py b/tools/static_gpubox_trainer.py
index fb0030d08..70ec79dfb 100755
--- a/tools/static_gpubox_trainer.py
+++ b/tools/static_gpubox_trainer.py
@@ -174,7 +174,7 @@ def run_worker(self):
                 fleet_util.set_zero(self.model.batch_stat_neg.name,
                                     paddle.fluid.global_scope())
                 logger.info(
-                    "Epoch: {}, using time {} second, ips {} {}/sec. auc: {}".
+                    "Epoch: {}, using time: {} second, ips: {} {}/sec. auc: {}".
                     format(epoch, epoch_time, epoch_speed, self.count_method,
                            global_auc))
             else:

From bee9da66aee2d9461b953956c1407591af62b260 Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Wed, 22 Jun 2022 12:16:11 +0000
Subject: [PATCH 04/10] fix log sytle

---
 tools/static_gpubox_trainer.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/static_gpubox_trainer.py b/tools/static_gpubox_trainer.py
index 70ec79dfb..e9278d748 100755
--- a/tools/static_gpubox_trainer.py
+++ b/tools/static_gpubox_trainer.py
@@ -85,7 +85,7 @@ def run(self):
         elif fleet.is_worker():
             self.run_worker()
             fleet.stop_worker()
-            self.record_result()
+            #self.record_result()
         logger.info("Run Success, Exit.")
         logger.info("-" * 100)
 
@@ -111,12 +111,12 @@ def run_worker(self):
         place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
         self.exe = paddle.static.Executor(place)
 
-        with open("./{}_worker_main_program.prototxt".format(
-                fleet.worker_index()), 'w+') as f:
-            f.write(str(paddle.static.default_main_program()))
-        with open("./{}_worker_startup_program.prototxt".format(
-                fleet.worker_index()), 'w+') as f:
-            f.write(str(paddle.static.default_startup_program()))
+        #with open("./{}_worker_main_program.prototxt".format(
+        #        fleet.worker_index()), 'w+') as f:
+        #    f.write(str(paddle.static.default_main_program()))
+        #with open("./{}_worker_startup_program.prototxt".format(
+        #        fleet.worker_index()), 'w+') as f:
+        #    f.write(str(paddle.static.default_startup_program()))
 
         self.exe.run(paddle.static.default_startup_program())
         fleet.init_worker()

From 2e63c3034574239bd99dca2fb38fba71d8051bc7 Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Thu, 23 Jun 2022 07:44:19 +0000
Subject: [PATCH 05/10] updata v2.3 from qa

---
 test_tipc/doc/benchmark_train.md         |  4 ++--
 test_tipc/test_train_inference_python.sh | 14 +++++++-----
 tools/static_gpubox_trainer.py           | 28 ++++++++++++++++++------
 3 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/test_tipc/doc/benchmark_train.md b/test_tipc/doc/benchmark_train.md
index cc3b17c7c..4dde315e6 100644
--- a/test_tipc/doc/benchmark_train.md
+++ b/test_tipc/doc/benchmark_train.md
@@ -8,7 +8,7 @@
 
 ```shell
 # 运行格式：bash test_tipc/prepare.sh  train_benchmark.txt  mode
-bash test_tipc/prepare.sh test_tipc/configs/dnn/train_benchmark.txt benchmark_train
+bash test_tipc/prepare.sh test_tipc/configs/dnn/train_infer_python.txt benchmark_train
 ```
 
 ## 1.2 功能测试
@@ -22,7 +22,7 @@ bash test_tipc/benchmark_train.sh test_tipc/configs/dnn/train_infer_python.txt b
 `test_tipc/benchmark_train.sh`支持根据传入的第三个参数实现只运行某一个训练配置，如下：
 ```shell
 # 运行格式：bash test_tipc/benchmark_train.sh train_benchmark.txt mode
-bash test_tipc/benchmark_train.sh test_tipc/configs/dnn/train_infer_python.txt benchmark_train 
+bash test_tipc/benchmark_train.sh test_tipc/configs/dnn/train_infer_python.txt benchmark_train null_bs8_null_null_N1C8
 ```
 dynamic_bs8_fp32_DP_N1C1为test_tipc/benchmark_train.sh传入的参数，格式如下：
 `${modeltype}_${batch_size}_${fp_item}_${run_mode}_${device_num}`
diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh
index 8ca3abf40..4dfbb27ee 100755
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@@ -223,11 +223,12 @@ if [ ${MODE} = "benchmark_train" ]; then
 	export PADDLE_PSERVER_PORT_ARRAY=(29011)
 
 	# set gpu numbers according to your device
-	export FLAGS_selected_gpus="0,1,2,3,4,5,6,7"
+	#export FLAGS_selected_gpus="0,1,2,3,4,5,6,7"
+	export FLAGS_selected_gpus=${gpu_list}
 
 	# set your model yaml
 	SC="tools/static_gpubox_trainer.py -m models/rank/dnn/config_gpubox.yaml"
-
+    BATCH="-o runner.train_batch_size="$train_batch_value
 	# run pserver
 	export TRAINING_ROLE=PSERVER
 	for((i=0;i<$PADDLE_PSERVER_NUMS;i++))
@@ -235,7 +236,8 @@ if [ ${MODE} = "benchmark_train" ]; then
 		cur_port=${PADDLE_PSERVER_PORT_ARRAY[$i]}
 		echo "PADDLE WILL START PSERVER "$cur_port
 		export PADDLE_PORT=${cur_port}
-		python3.7 -u $SC 
+        cmd="${python} ${SC} ${BATCH}"
+        eval $cmd
 	done
 
 	# run trainer
@@ -244,10 +246,10 @@ if [ ${MODE} = "benchmark_train" ]; then
 	do
 		echo "PADDLE WILL START Trainer "$i
 		export PADDLE_TRAINER_ID=$i
-		python3.7 -u $SC 
+        cmd="${python} ${SC} ${BATCH}"
+        eval $cmd
 	done
-fi
-if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
+elif [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
     GPUID=$3
     if [ ${#GPUID} -le 0 ];then
         env=" "
diff --git a/tools/static_gpubox_trainer.py b/tools/static_gpubox_trainer.py
index e9278d748..c7ba88bde 100755
--- a/tools/static_gpubox_trainer.py
+++ b/tools/static_gpubox_trainer.py
@@ -40,6 +40,7 @@
 
 def parse_args():
     parser = argparse.ArgumentParser("PaddleRec train script")
+    parser.add_argument("-o", "--opt", nargs='*', type=str)
     parser.add_argument(
         '-m',
         '--config_yaml',
@@ -56,6 +57,19 @@ def parse_args():
     args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml))
     yaml_helper = YamlHelper()
     config = yaml_helper.load_yaml(args.config_yaml)
+    # modify config from command
+    if args.opt:
+        for parameter in args.opt:
+            parameter = parameter.strip()
+            key, value = parameter.split("=")
+            if type(config.get(key)) is int:
+                value = int(value)
+            if type(config.get(key)) is float:
+                value = float(value)
+            if type(config.get(key)) is bool:
+                value = (True if value.lower() == "true" else False)
+            config[key] = value
+
     config["yaml_path"] = args.config_yaml
     config["config_abs_dir"] = args.abs_dir
     config["profiler_options"] = args.profiler_options
@@ -85,7 +99,7 @@ def run(self):
         elif fleet.is_worker():
             self.run_worker()
             fleet.stop_worker()
-            #self.record_result()
+            self.record_result()
         logger.info("Run Success, Exit.")
         logger.info("-" * 100)
 
@@ -111,12 +125,12 @@ def run_worker(self):
         place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
         self.exe = paddle.static.Executor(place)
 
-        #with open("./{}_worker_main_program.prototxt".format(
-        #        fleet.worker_index()), 'w+') as f:
-        #    f.write(str(paddle.static.default_main_program()))
-        #with open("./{}_worker_startup_program.prototxt".format(
-        #        fleet.worker_index()), 'w+') as f:
-        #    f.write(str(paddle.static.default_startup_program()))
+        with open("./{}_worker_main_program.prototxt".format(
+                fleet.worker_index()), 'w+') as f:
+            f.write(str(paddle.static.default_main_program()))
+        with open("./{}_worker_startup_program.prototxt".format(
+                fleet.worker_index()), 'w+') as f:
+            f.write(str(paddle.static.default_startup_program()))
 
         self.exe.run(paddle.static.default_startup_program())
         fleet.init_worker()

From 0e2d7229983ced8a0833a531a62270285a3b49e3 Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Wed, 6 Jul 2022 02:28:44 +0000
Subject: [PATCH 06/10] update benchmark from qa

---
 test_tipc/benchmark_train.sh                 |  24 +-
 test_tipc/configs/dnn/train_infer_python.txt |   2 +-
 test_tipc/scripts/analysis.py                | 353 -------------------
 test_tipc/test_train_inference_python.sh     |   5 +-
 4 files changed, 13 insertions(+), 371 deletions(-)
 delete mode 100644 test_tipc/scripts/analysis.py

diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index 84935c272..fa87f9f65 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -86,10 +86,8 @@ line_num=`grep -n "train_benchmark_params" $FILENAME  | cut -d ":" -f 1`
 # for train log parser
 batch_size=$(func_parser_value "${lines[line_num]}")
 line_num=`expr $line_num + 1`
-fp_items=$(func_parser_value "${lines[line_num]}")
-line_num=`expr $line_num + 1`
 epoch=$(func_parser_value "${lines[line_num]}")
-
+fp_items=$(func_parser_value "${lines[line_num]}")
 line_num=`expr $line_num + 1`
 profile_option_key=$(func_parser_key "${lines[line_num]}")
 profile_option_params=$(func_parser_value "${lines[line_num]}")
@@ -146,10 +144,6 @@ else
     device_num=${params_list[4]}
     IFS=";"
 
-    if [ ${precision} = "null" ];then
-        precision="fp32"
-    fi
-
     fp_items_list=($precision)
     batch_size_list=($batch_size)
     device_num_list=($device_num)
@@ -169,7 +163,7 @@ for batch_size in ${batch_size_list[*]}; do
                 run_process_type="SingleP"
                 log_path="$SAVE_LOG/profiling_log"
                 mkdir -p $log_path
-                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_profiling"
                 func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id 
                 # set profile_option params
                 tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
@@ -185,8 +179,8 @@ for batch_size in ${batch_size_list[*]}; do
                 speed_log_path="$SAVE_LOG/index"
                 mkdir -p $log_path
                 mkdir -p $speed_log_path
-                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
-                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_log"
+                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_speed"
                 func_sed_params "$FILENAME" "${line_profile}" "null"  # sed profile_id as null
                 cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                 echo $cmd
@@ -197,7 +191,7 @@ for batch_size in ${batch_size_list[*]}; do
                 eval "cat ${log_path}/${log_name}"
 
                 # parser log
-                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+                _model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
                 cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
                         --speed_log_file '${speed_log_path}/${speed_log_name}' \
                         --model_name ${_model_name} \
@@ -221,8 +215,8 @@ for batch_size in ${batch_size_list[*]}; do
                 speed_log_path="$SAVE_LOG/index"
                 mkdir -p $log_path
                 mkdir -p $speed_log_path
-                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
-                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_log"
+                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_speed"
                 func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id"  # sed used gpu_id 
                 func_sed_params "$FILENAME" "${line_profile}" "null"  # sed --profile_option as null
                 cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
@@ -233,7 +227,7 @@ for batch_size in ${batch_size_list[*]}; do
                 export model_run_time=$((${job_et}-${job_bt}))
                 eval "cat ${log_path}/${log_name}"
                 # parser log
-                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+                _model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
                 
                 cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
                         --speed_log_file '${speed_log_path}/${speed_log_name}' \
@@ -244,7 +238,7 @@ for batch_size in ${batch_size_list[*]}; do
                         --keyword ips: \
                         --skip_steps 2 \
                         --device_num ${device_num} \
-                        --speed_unit images/s \
+                        --speed_unit samples/s \
                         --convergence_key loss: "
                 echo $cmd
                 eval $cmd
diff --git a/test_tipc/configs/dnn/train_infer_python.txt b/test_tipc/configs/dnn/train_infer_python.txt
index b6b7dd591..c914570b9 100755
--- a/test_tipc/configs/dnn/train_infer_python.txt
+++ b/test_tipc/configs/dnn/train_infer_python.txt
@@ -51,5 +51,5 @@ inference:-u tools/paddle_infer.py --model_name=dnn --reader_file=models/rank/dn
 null:null
 ===========================train_benchmark_params=========================== 
 batchsize:2048
-epoch:3
+epochs:6
 --profiler_options="batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile"
diff --git a/test_tipc/scripts/analysis.py b/test_tipc/scripts/analysis.py
deleted file mode 100644
index d17bdf8d7..000000000
--- a/test_tipc/scripts/analysis.py
+++ /dev/null
@@ -1,353 +0,0 @@
-# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import argparse
-import json
-import os
-import re
-import traceback
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument(
-        "--filename", type=str, help="The name of log which need to analysis.")
-    parser.add_argument("--speed_log_file", type=str, help="json file")
-    parser.add_argument(
-        "--log_with_profiler",
-        type=str,
-        help="The path of train log with profiler")
-    parser.add_argument(
-        "--profiler_path", type=str, help="The path of profiler timeline log.")
-    parser.add_argument(
-        "--keyword", type=str, help="Keyword to specify analysis data")
-    parser.add_argument(
-        "--separator",
-        type=str,
-        default=None,
-        help="Separator of different field in log")
-    parser.add_argument(
-        '--position',
-        type=int,
-        default=None,
-        help='The position of data field')
-    parser.add_argument(
-        '--range',
-        type=str,
-        default="",
-        help='The range of data field to intercept')
-    parser.add_argument(
-        '--skip_steps',
-        type=int,
-        default=0,
-        help='The number of steps to be skipped')
-    parser.add_argument(
-        '--model_mode',
-        type=int,
-        default=-1,
-        help='Analysis mode, default value is -1')
-
-    parser.add_argument(
-        '--model_name',
-        type=str,
-        default="model_name",
-        help='training model_name, transformer_base')
-    parser.add_argument(
-        '--base_batch_size', type=int, help='base_batch size on gpu')
-    parser.add_argument('--fp_item', type=str, help='fp_item:fp16|fp32')
-    parser.add_argument('--run_mode', type=str, default="DP", help='DP|MP|PP')
-    parser.add_argument(
-        '--convergence_key',
-        type=str,
-        default="",
-        help="Keyword to specify loss data")
-    parser.add_argument(
-        '--speed_unit', type=str, default="images/s", help='IPS unit')
-    parser.add_argument(
-        '--device_num',
-        type=str,
-        default='N1C1',
-        help='device_num:N1C1|N1C8|N4C32')
-    args = parser.parse_args()
-    args.separator = None if args.separator == "None" else args.separator
-    return args
-
-
-def _is_number(num):
-    pattern = re.compile(r'^[-+]?[-0-9]\d*\.\d*|[-+]?\.?[0-9]\d*$')
-    result = pattern.match(num)
-    if result:
-        return True
-    else:
-        return False
-
-
-class TimeAnalyzer(object):
-    def __init__(self,
-                 filename,
-                 keyword=None,
-                 separator=None,
-                 position=None,
-                 range="-1"):
-        if filename is None:
-            raise Exception("Please specify the filename!")
-
-        if keyword is None:
-            raise Exception("Please specify the keyword!")
-
-        self.filename = filename
-        self.keyword = keyword
-        self.separator = separator
-        self.position = position
-        self.range = range
-        self.records = None
-        self._distil()
-
-    def _distil(self):
-        self.records = []
-        with open(self.filename, "r") as f_object:
-            lines = f_object.readlines()
-            for line in lines:
-                if self.keyword not in line:
-                    continue
-                try:
-                    result = None
-
-                    # Distil the string from a line.
-                    line = line.strip()
-                    line_words = line.split(
-                        self.separator) if self.separator else line.split()
-                    if args.position:
-                        result = line_words[self.position]
-                    else:
-                        # Distil the string following the keyword.
-                        for i in range(len(line_words) - 1):
-                            if line_words[i] == self.keyword:
-                                result = line_words[i + 1]
-                                break
-
-                    # Distil the result from the picked string.
-                    if not self.range:
-                        result = result[0:]
-                    elif _is_number(self.range):
-                        result = result[0:int(self.range)]
-                    else:
-                        result = result[int(self.range.split(":")[0]):int(
-                            self.range.split(":")[1])]
-                    self.records.append(float(result))
-                except Exception as exc:
-                    print("line is: {}; separator={}; position={}".format(
-                        line, self.separator, self.position))
-
-        print("Extract {} records: separator={}; position={}".format(
-            len(self.records), self.separator, self.position))
-
-    def _get_fps(self,
-                 mode,
-                 base_batch_size,
-                 gpu_num,
-                 avg_of_records,
-                 unit=None):
-        if mode == -1:
-            assert unit, "Please set the unit when mode is -1."
-            fps = gpu_num * avg_of_records
-        elif mode == 0:
-            # s/step -> samples/s
-            fps = (base_batch_size * gpu_num) / avg_of_records
-            unit = "samples/s"
-        elif mode == 1:
-            # steps/s -> steps/s
-            fps = avg_of_records
-            unit = "steps/s"
-        elif mode == 2:
-            # s/step -> steps/s
-            fps = 1 / avg_of_records
-            unit = "steps/s"
-        elif mode == 3:
-            # steps/s -> samples/s
-            fps = base_batch_size * gpu_num * avg_of_records
-            unit = "samples/s"
-        elif mode == 4:
-            # s/epoch -> s/epoch
-            fps = avg_of_records
-            unit = "s/epoch"
-        else:
-            ValueError("Unsupported analysis mode.")
-
-        return fps, unit
-
-    def analysis(self,
-                 base_batch_size,
-                 gpu_num=1,
-                 skip_steps=0,
-                 mode=-1,
-                 unit=None):
-        if base_batch_size <= 0:
-            print("base_batch_size should larger than 0.")
-            return 0, ''
-
-        if len(
-                self.records
-        ) <= skip_steps:  # to address the condition which item of log equals to skip_steps
-            print("no records")
-            return 0, ''
-
-        sum_of_records = 0
-        sum_of_records_skipped = 0
-        skip_min = self.records[skip_steps]
-        skip_max = self.records[skip_steps]
-
-        count = len(self.records)
-        for i in range(count):
-            sum_of_records += self.records[i]
-            if i >= skip_steps:
-                sum_of_records_skipped += self.records[i]
-                if self.records[i] < skip_min:
-                    skip_min = self.records[i]
-                if self.records[i] > skip_max:
-                    skip_max = self.records[i]
-
-        avg_of_records = sum_of_records / float(count)
-        avg_of_records_skipped = sum_of_records_skipped / float(count -
-                                                                skip_steps)
-
-        fps, fps_unit = self._get_fps(mode, base_batch_size, gpu_num,
-                                      avg_of_records, unit)
-        fps_skipped, _ = self._get_fps(mode, base_batch_size, gpu_num,
-                                       avg_of_records_skipped, unit)
-        if mode == -1:
-            print("average ips of %d steps, skip 0 step:" % count)
-            print("\tAvg: %.3f %s" % (avg_of_records, fps_unit))
-            print("\tFPS: %.3f %s" % (fps, fps_unit))
-            if skip_steps > 0:
-                print("average ips of %d steps, skip %d steps:" %
-                      (count, skip_steps))
-                print("\tAvg: %.3f %s" % (avg_of_records_skipped, fps_unit))
-                print("\tMin: %.3f %s" % (skip_min, fps_unit))
-                print("\tMax: %.3f %s" % (skip_max, fps_unit))
-                print("\tFPS: %.3f %s" % (fps_skipped, fps_unit))
-        elif mode == 1 or mode == 3:
-            print("average latency of %d steps, skip 0 step:" % count)
-            print("\tAvg: %.3f steps/s" % avg_of_records)
-            print("\tFPS: %.3f %s" % (fps, fps_unit))
-            if skip_steps > 0:
-                print("average latency of %d steps, skip %d steps:" %
-                      (count, skip_steps))
-                print("\tAvg: %.3f steps/s" % avg_of_records_skipped)
-                print("\tMin: %.3f steps/s" % skip_min)
-                print("\tMax: %.3f steps/s" % skip_max)
-                print("\tFPS: %.3f %s" % (fps_skipped, fps_unit))
-        elif mode == 0 or mode == 2:
-            print("average latency of %d steps, skip 0 step:" % count)
-            print("\tAvg: %.3f s/step" % avg_of_records)
-            print("\tFPS: %.3f %s" % (fps, fps_unit))
-            if skip_steps > 0:
-                print("average latency of %d steps, skip %d steps:" %
-                      (count, skip_steps))
-                print("\tAvg: %.3f s/step" % avg_of_records_skipped)
-                print("\tMin: %.3f s/step" % skip_min)
-                print("\tMax: %.3f s/step" % skip_max)
-                print("\tFPS: %.3f %s" % (fps_skipped, fps_unit))
-
-        return round(fps_skipped, 3), fps_unit
-
-
-class ExceptionTest(Exception):
-    pass
-
-
-class LossAnalyzer(object):
-    def __init__(self, filename, convergence_key=None, separator=None):
-        if filename is None:
-            raise Exception("Please specify the filename!")
-        if convergence_key is None:
-            raise Exception("Please specify the keyword of loss!")
-        self.filename = filename
-        self.convergence_key = convergence_key
-        self.separator = separator
-
-    def get_loss(self):
-        with open(self.filename, "r") as f_object:
-            lines = f_object.readlines()
-            lines.reverse()
-            result_loss = 0
-            for line in lines:
-                if self.convergence_key not in line:
-                    continue
-                try:
-                    result_loss = 0
-                    line = line.strip()
-                    line_words = line.split(
-                        self.separator) if self.separator else line.split()
-                    for i in range(len(line_words) - 1):
-                        if line_words[i] == self.convergence_key:
-                            result_loss = line_words[i + 1]
-                            result_loss = result_loss.replace(',', '')
-                            raise ExceptionTest()
-                except ExceptionTest:
-                    break
-        print("\tLoss: {}".format(result_loss))
-        return result_loss
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    run_info = dict()
-    run_info["model_branch"] = os.getenv("model_branch")
-    run_info["model_commit"] = os.getenv("model_commit")
-    run_info["model_name"] = args.model_name
-    run_info["batch_size"] = args.base_batch_size
-    run_info["fp_item"] = args.fp_item
-    if re.match(r'DP.-MP.-PP.', args.run_mode) or 'DP_MoE_C' in args.run_mode:
-        run_info["run_mode"] = 'Collective'
-    else:
-        run_info["run_mode"] = args.run_mode
-    run_info["convergence_value"] = 0
-    run_info["convergence_key"] = args.convergence_key
-    run_info["ips"] = 0
-    run_info["speed_unit"] = args.speed_unit
-    run_info["device_num"] = args.device_num
-    run_info["model_run_time"] = os.getenv('model_run_time')
-    run_info["frame_commit"] = os.getenv('frame_commit')
-    run_info["frame_version"] = os.getenv('frame_version')
-    device_num = args.device_num
-    print("---device_num:-", device_num)
-    index_c = device_num.index('C')
-    print("---index_c:-", index_c)
-    gpu_num = int(device_num[index_c + 1:len(device_num)])
-    print("-----gpu_num:", gpu_num)
-    if "pwgan" in args.model_name:
-        print("------analysis ", args.model_name)
-        args.keyword = "avg_ips:"
-
-    try:
-        analyzer = TimeAnalyzer(args.filename, args.keyword, args.separator,
-                                args.position, args.range)
-        run_info["ips"], run_info["speed_unit"] = analyzer.analysis(
-            base_batch_size=args.base_batch_size,
-            gpu_num=gpu_num,
-            skip_steps=args.skip_steps,
-            mode=args.model_mode,
-            unit=args.speed_unit)
-        if args.convergence_key != "":
-            loss_analyzer = LossAnalyzer(args.filename, args.convergence_key)
-            run_info["convergence_value"] = loss_analyzer.get_loss()
-    except Exception:
-        traceback.print_exc()
-    print("{}".format(json.dumps(run_info))
-          )  # it's required, for the log file path  insert to the database
-    with open(args.speed_log_file, "w") as f:
-        f.write(json.dumps(run_info))
diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh
index 4dfbb27ee..726030aab 100755
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@@ -229,6 +229,7 @@ if [ ${MODE} = "benchmark_train" ]; then
 	# set your model yaml
 	SC="tools/static_gpubox_trainer.py -m models/rank/dnn/config_gpubox.yaml"
     BATCH="-o runner.train_batch_size="$train_batch_value
+    EPOCH="-o runner.epochs="$epoch_num
 	# run pserver
 	export TRAINING_ROLE=PSERVER
 	for((i=0;i<$PADDLE_PSERVER_NUMS;i++))
@@ -236,7 +237,7 @@ if [ ${MODE} = "benchmark_train" ]; then
 		cur_port=${PADDLE_PSERVER_PORT_ARRAY[$i]}
 		echo "PADDLE WILL START PSERVER "$cur_port
 		export PADDLE_PORT=${cur_port}
-        cmd="${python} ${SC} ${BATCH}"
+        cmd="${python} ${SC} ${BATCH} ${EPOCH}"
         eval $cmd
 	done
 
@@ -246,7 +247,7 @@ if [ ${MODE} = "benchmark_train" ]; then
 	do
 		echo "PADDLE WILL START Trainer "$i
 		export PADDLE_TRAINER_ID=$i
-        cmd="${python} ${SC} ${BATCH}"
+        cmd="${python} ${SC} ${BATCH} ${EPOCH}"
         eval $cmd
 	done
 elif [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then

From c87a4e5c5e26f915714c0cdb96f39e360492b062 Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Wed, 6 Jul 2022 02:34:41 +0000
Subject: [PATCH 07/10] update benchmark from qa

---
 test_tipc/benchmark_train.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index fa87f9f65..71e3eadc4 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -87,7 +87,7 @@ line_num=`grep -n "train_benchmark_params" $FILENAME  | cut -d ":" -f 1`
 batch_size=$(func_parser_value "${lines[line_num]}")
 line_num=`expr $line_num + 1`
 epoch=$(func_parser_value "${lines[line_num]}")
-fp_items=$(func_parser_value "${lines[line_num]}")
+fp_items="null"
 line_num=`expr $line_num + 1`
 profile_option_key=$(func_parser_key "${lines[line_num]}")
 profile_option_params=$(func_parser_value "${lines[line_num]}")

From cd40dae4b3604c71ac9305e203eb52f90b1a5f65 Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Wed, 6 Jul 2022 03:12:49 +0000
Subject: [PATCH 08/10] update benchmark from qa

---
 test_tipc/benchmark_train.sh                 | 4 ++--
 test_tipc/configs/dnn/train_infer_python.txt | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index 71e3eadc4..5697eb913 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -202,7 +202,7 @@ for batch_size in ${batch_size_list[*]}; do
                         --skip_steps 2 \
                         --device_num ${device_num} \
                         --speed_unit samples/s \
-                        --convergence_key loss: "
+                        --convergence_key auc: "
                 echo $cmd
                 eval $cmd
                 last_status=${PIPESTATUS[0]}
@@ -239,7 +239,7 @@ for batch_size in ${batch_size_list[*]}; do
                         --skip_steps 2 \
                         --device_num ${device_num} \
                         --speed_unit samples/s \
-                        --convergence_key loss: "
+                        --convergence_key auc: "
                 echo $cmd
                 eval $cmd
                 last_status=${PIPESTATUS[0]}
diff --git a/test_tipc/configs/dnn/train_infer_python.txt b/test_tipc/configs/dnn/train_infer_python.txt
index c914570b9..970ba7bd3 100755
--- a/test_tipc/configs/dnn/train_infer_python.txt
+++ b/test_tipc/configs/dnn/train_infer_python.txt
@@ -51,5 +51,5 @@ inference:-u tools/paddle_infer.py --model_name=dnn --reader_file=models/rank/dn
 null:null
 ===========================train_benchmark_params=========================== 
 batchsize:2048
-epochs:6
+epochs:12
 --profiler_options="batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile"

From df9c9f5fb560efdb4168d0d476474d8c30e9adbe Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Wed, 6 Jul 2022 08:00:18 +0000
Subject: [PATCH 09/10] back to fluid

---
 tools/static_gpubox_trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/static_gpubox_trainer.py b/tools/static_gpubox_trainer.py
index 2a6757707..1dd05495d 100755
--- a/tools/static_gpubox_trainer.py
+++ b/tools/static_gpubox_trainer.py
@@ -28,7 +28,7 @@
 import logging
 
 import profiler
-from paddle.incubate.fleet.utils.fleet_util import FleetUtil
+from paddle.fluid.incubate.fleet.utils.fleet_util import FleetUtil
 fleet_util = FleetUtil()
 
 __dir__ = os.path.dirname(os.path.abspath(__file__))

From 08be45fa38b41bbe4e7a236892f37dda2147c114 Mon Sep 17 00:00:00 2001
From: wangzhen38 <wangzhen38@baidu.com>
Date: Thu, 7 Jul 2022 03:14:33 +0000
Subject: [PATCH 10/10] update by qa

---
 test_tipc/benchmark_train.sh                 | 4 ++--
 test_tipc/configs/dnn/train_infer_python.txt | 6 ++++--
 tools/static_gpubox_trainer.py               | 2 ++
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index 5697eb913..214e3ebf3 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -201,7 +201,7 @@ for batch_size in ${batch_size_list[*]}; do
                         --keyword ips: \
                         --skip_steps 2 \
                         --device_num ${device_num} \
-                        --speed_unit samples/s \
+                        --speed_unit example/s \
                         --convergence_key auc: "
                 echo $cmd
                 eval $cmd
@@ -238,7 +238,7 @@ for batch_size in ${batch_size_list[*]}; do
                         --keyword ips: \
                         --skip_steps 2 \
                         --device_num ${device_num} \
-                        --speed_unit samples/s \
+                        --speed_unit example/s \
                         --convergence_key auc: "
                 echo $cmd
                 eval $cmd
diff --git a/test_tipc/configs/dnn/train_infer_python.txt b/test_tipc/configs/dnn/train_infer_python.txt
index 970ba7bd3..08fc02e68 100755
--- a/test_tipc/configs/dnn/train_infer_python.txt
+++ b/test_tipc/configs/dnn/train_infer_python.txt
@@ -50,6 +50,8 @@ inference:-u tools/paddle_infer.py --model_name=dnn --reader_file=models/rank/dn
 --benchmark:True
 null:null
 ===========================train_benchmark_params=========================== 
-batchsize:2048
-epochs:12
+batch_size:2048
+epoch:50
 --profiler_options="batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile"
+run_mode:PSGPU
+fp_items:null
diff --git a/tools/static_gpubox_trainer.py b/tools/static_gpubox_trainer.py
index 1dd05495d..1ec0d6034 100755
--- a/tools/static_gpubox_trainer.py
+++ b/tools/static_gpubox_trainer.py
@@ -152,6 +152,7 @@ def run_worker(self):
         self.PSGPU.set_slot_vector(gpuslot)
         self.PSGPU.set_slot_dim_vector(gpu_mf_sizes)
         self.PSGPU.init_gpu_ps([int(s) for s in gpus_env.split(",")])
+        gpu_num = len(gpus_env.split(","))
         opt_info = paddle.static.default_main_program()._fleet_opt
         if use_auc is True:
             opt_info['stat_var_names'] = [
@@ -176,6 +177,7 @@ def run_worker(self):
 
             epoch_time = time.time() - epoch_start_time
             epoch_speed = self.example_nums / epoch_time
+            epoch_speed = epoch_speed / gpu_num
             if use_auc is True:
                 global_auc = auc(self.model.stat_pos, self.model.stat_neg,
                                  paddle.static.global_scope(), fleet.util)