Skip to content

Commit

Permalink
[Fix] fix profile for training (#3312)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmglove authored Jun 8, 2023
1 parent 28d8065 commit 47006bd
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 29 deletions.
46 changes: 31 additions & 15 deletions paddleseg/utils/train_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
# limitations under the License.

import sys
import paddle
import paddle.profiler as profiler

# A global variable to record the number of calling times for profiler
# functions. It is used to specify the tracing range of training steps.
_profiler_step_id = 0

# A global variable to avoid parsing from string every time.
_profiler_options = None

_prof = None

class ProfilerOptions(object):
'''
Expand All @@ -31,9 +31,10 @@ class ProfilerOptions(object):
"profile_path=model.profile"
"batch_range=[50, 60]; profile_path=model.profile"
"batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
ProfilerOptions supports following key-value pair:
batch_range - a integer list, e.g. [100, 110].
state - a string, the optional values are 'CPU', 'GPU' or 'All'.
state - a string, the optional values are 'CPU', 'GPU' or 'All'.
sorted_key - a string, the optional values are 'calls', 'total',
'max', 'min' or 'ave.
tracer_option - a string, the optional values are 'Default', 'OpDetail',
Expand All @@ -52,11 +53,10 @@ def __init__(self, options_str):
'sorted_key': 'total',
'tracer_option': 'Default',
'profile_path': '/tmp/profile',
'exit_on_finished': True
'exit_on_finished': True,
'timer_only': True
}

if options_str != "":
self._parse_from_string(options_str)
self._parse_from_string(options_str)

def _parse_from_string(self, options_str):
for kv in options_str.replace(' ', '').split(';'):
Expand All @@ -73,6 +73,8 @@ def _parse_from_string(self, options_str):
'state', 'sorted_key', 'tracer_option', 'profile_path'
]:
self._options[key] = value
elif key == 'timer_only':
self._options[key] = value

def __getitem__(self, name):
if self._options.get(name, None) is None:
Expand All @@ -86,26 +88,40 @@ def add_profiler_step(options_str=None):
Enable the operator-level timing using PaddlePaddle's profiler.
The profiler uses a independent variable to count the profiler steps.
One call of this function is treated as a profiler step.
Args:
profiler_options - a string to initialize the ProfilerOptions.
Default is None, and the profiler is disabled.
'''
if options_str is None:
return

global _prof
global _profiler_step_id
global _profiler_options

if _profiler_options is None:
_profiler_options = ProfilerOptions(options_str)

if _profiler_step_id == _profiler_options['batch_range'][0]:
paddle.utils.profiler.start_profiler(_profiler_options['state'],
_profiler_options['tracer_option'])
elif _profiler_step_id == _profiler_options['batch_range'][1]:
paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
_profiler_options['profile_path'])
# profile : https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/performance_improving/profiling_model.html#chakanxingnengshujudetongjibiaodan
# timer_only = True only the model's throughput and time overhead are displayed
# timer_only = False calling summary can print a statistical form that presents performance data from different perspectives.
# timer_only = False the output Timeline information can be found in the profiler_log directory
if _prof is None:
_timer_only = str(_profiler_options['timer_only']) == str(True)
_prof = profiler.Profiler(
scheduler = (_profiler_options['batch_range'][0], _profiler_options['batch_range'][1]),
on_trace_ready = profiler.export_chrome_tracing('./profiler_log'),
timer_only = _timer_only)
_prof.start()
else:
_prof.step()

if _profiler_step_id == _profiler_options['batch_range'][1]:
_prof.stop()
_prof.summary(
op_detail=True,
thread_sep=False,
time_unit='ms')
_prof = None
if _profiler_options['exit_on_finished']:
sys.exit(0)

Expand Down
35 changes: 21 additions & 14 deletions test_tipc/benchmark_train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ repo_name=$(get_repo_name )
SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log
mkdir -p "${SAVE_LOG}/benchmark_log/"
status_log="${SAVE_LOG}/benchmark_log/results.log"

# get benchmark profiling params : PROFILING_TIMER_ONLY=no|True|False
PROFILING_TIMER_ONLY=${PROFILING_TIMER_ONLY:-"True"}
# The number of lines in which train params can be replaced.
line_python=3
line_gpuid=4
Expand Down Expand Up @@ -195,19 +196,25 @@ for batch_size in ${batch_size_list[*]}; do
gpu_id=$(set_gpu_id $device_num)

if [ ${#gpu_id} -le 1 ]; then
log_path="$SAVE_LOG/profiling_log"
mkdir -p $log_path
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
# set profile_option params
tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`

# run test_train_inference_python.sh
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
echo $cmd
eval $cmd
eval "cat ${log_path}/${log_name}"

func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
if [[ ${PROFILING_TIMER_ONLY} != "no" ]];then
echo "run profile"
# The default value of profile_option's timer_only parameter is True
if [[ ${PROFILING_TIMER_ONLY} = "False" ]];then
profile_option="${profile_option};timer_only=False"
fi
log_path="$SAVE_LOG/profiling_log"
mkdir -p $log_path
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
# set profile_option params
tmp=`sed -i "${line_profile}s/.*/\"${profile_option}\"/" "${FILENAME}"`
# run test_train_inference_python.sh
cmd="timeout 5m bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
echo $cmd
eval ${cmd}
eval "cat ${log_path}/${log_name}"
fi
echo "run without profile"
# without profile
log_path="$SAVE_LOG/train_log"
speed_log_path="$SAVE_LOG/index"
Expand Down

0 comments on commit 47006bd

Please sign in to comment.