From 47006bd1d82c9640c6e982cfa61ef54dcb33c4ff Mon Sep 17 00:00:00 2001
From: gmm <38800877+mmglove@users.noreply.github.com>
Date: Thu, 8 Jun 2023 19:44:48 +0800
Subject: [PATCH] [Fix] fix profile for training (#3312)

---
 paddleseg/utils/train_profiler.py | 46 +++++++++++++++++++++----------
 test_tipc/benchmark_train.sh      | 35 +++++++++++++----------
 2 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/paddleseg/utils/train_profiler.py b/paddleseg/utils/train_profiler.py
index 4b4d53b849..629ef4ef05 100644
--- a/paddleseg/utils/train_profiler.py
+++ b/paddleseg/utils/train_profiler.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import sys
-import paddle
+import paddle.profiler as profiler
 
 # A global variable to record the number of calling times for profiler
 # functions. It is used to specify the tracing range of training steps.
@@ -21,7 +21,7 @@
 
 # A global variable to avoid parsing from string every time.
 _profiler_options = None
-
+_prof = None
 
 class ProfilerOptions(object):
     '''
@@ -31,9 +31,10 @@ class ProfilerOptions(object):
       "profile_path=model.profile"
       "batch_range=[50, 60]; profile_path=model.profile"
       "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
+
     ProfilerOptions supports following key-value pair:
       batch_range      - a integer list, e.g. [100, 110].
-      state            - a string, the optional values are 'CPU', 'GPU' or 'All'.
+      state            - a string, the optional values are 'CPU', 'GPU' or 'All'. 
       sorted_key       - a string, the optional values are 'calls', 'total',
                          'max', 'min' or 'ave.
       tracer_option    - a string, the optional values are 'Default', 'OpDetail',
@@ -52,11 +53,10 @@ def __init__(self, options_str):
             'sorted_key': 'total',
             'tracer_option': 'Default',
             'profile_path': '/tmp/profile',
-            'exit_on_finished': True
+            'exit_on_finished': True,
+            'timer_only': True
         }
-
-        if options_str != "":
-            self._parse_from_string(options_str)
+        self._parse_from_string(options_str)
 
     def _parse_from_string(self, options_str):
         for kv in options_str.replace(' ', '').split(';'):
@@ -73,6 +73,8 @@ def _parse_from_string(self, options_str):
                     'state', 'sorted_key', 'tracer_option', 'profile_path'
             ]:
                 self._options[key] = value
+            elif key == 'timer_only':
+                self._options[key] = value
 
     def __getitem__(self, name):
         if self._options.get(name, None) is None:
@@ -86,7 +88,6 @@ def add_profiler_step(options_str=None):
     Enable the operator-level timing using PaddlePaddle's profiler.
     The profiler uses a independent variable to count the profiler steps.
     One call of this function is treated as a profiler step.
-
     Args:
       profiler_options - a string to initialize the ProfilerOptions.
                          Default is None, and the profiler is disabled.
@@ -94,18 +95,33 @@ def add_profiler_step(options_str=None):
     if options_str is None:
         return
 
+    global _prof 
     global _profiler_step_id
     global _profiler_options
 
     if _profiler_options is None:
         _profiler_options = ProfilerOptions(options_str)
-
-    if _profiler_step_id == _profiler_options['batch_range'][0]:
-        paddle.utils.profiler.start_profiler(_profiler_options['state'],
-                                             _profiler_options['tracer_option'])
-    elif _profiler_step_id == _profiler_options['batch_range'][1]:
-        paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
-                                            _profiler_options['profile_path'])
+    # profile : https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/performance_improving/profiling_model.html#chakanxingnengshujudetongjibiaodan
+    # timer_only = True  only the model's throughput and time overhead are displayed
+    # timer_only = False calling summary can print a statistical form that presents performance data from different perspectives.
+    # timer_only = False the output Timeline information can be found in the profiler_log directory
+    if _prof is None:
+        _timer_only = str(_profiler_options['timer_only']) == str(True)
+        _prof = profiler.Profiler(
+                   scheduler = (_profiler_options['batch_range'][0], _profiler_options['batch_range'][1]),
+                   on_trace_ready = profiler.export_chrome_tracing('./profiler_log'),
+                   timer_only = _timer_only)
+        _prof.start()
+    else:
+        _prof.step()
+        
+    if _profiler_step_id == _profiler_options['batch_range'][1]:
+        _prof.stop()
+        _prof.summary(
+             op_detail=True,
+             thread_sep=False,
+             time_unit='ms')
+        _prof = None
         if _profiler_options['exit_on_finished']:
             sys.exit(0)
 
diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index 001006541b..92383d262b 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -110,7 +110,8 @@ repo_name=$(get_repo_name )
 SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
 mkdir -p "${SAVE_LOG}/benchmark_log/"
 status_log="${SAVE_LOG}/benchmark_log/results.log"
-
+# get benchmark profiling params : PROFILING_TIMER_ONLY=no|True|False
+PROFILING_TIMER_ONLY=${PROFILING_TIMER_ONLY:-"True"}
 # The number of lines in which train params can be replaced.
 line_python=3
 line_gpuid=4
@@ -195,19 +196,25 @@ for batch_size in ${batch_size_list[*]}; do
             gpu_id=$(set_gpu_id $device_num)
 
             if [ ${#gpu_id} -le 1 ]; then
-                log_path="$SAVE_LOG/profiling_log"
-                mkdir -p $log_path
-                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
-                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id
-                # set profile_option params
-                tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
-
-                # run test_train_inference_python.sh
-                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
-                echo $cmd
-                eval $cmd
-                eval "cat ${log_path}/${log_name}"
-
+                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id 
+                if [[ ${PROFILING_TIMER_ONLY} != "no" ]];then
+                    echo "run profile"
+                    # The default value of profile_option's timer_only parameter is True
+                    if [[ ${PROFILING_TIMER_ONLY} = "False" ]];then
+                        profile_option="${profile_option};timer_only=False"
+                    fi
+                    log_path="$SAVE_LOG/profiling_log"
+                    mkdir -p $log_path
+                    log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
+                    # set profile_option params
+                    tmp=`sed -i "${line_profile}s/.*/\"${profile_option}\"/" "${FILENAME}"`
+                    # run test_train_inference_python.sh
+                    cmd="timeout 5m bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                    echo $cmd
+                    eval ${cmd}
+                    eval "cat ${log_path}/${log_name}"
+                fi
+                echo "run without profile"
                 # without profile
                 log_path="$SAVE_LOG/train_log"
                 speed_log_path="$SAVE_LOG/index"