gpt benchmark fix v2 (#1244)

ZHUI · web-flow · commit 0a3faeea7a85 · 2021-10-28T14:59:48.000+08:00
diff --git a/examples/language_model/gpt/run_pretrain.py b/examples/language_model/gpt/run_pretrain.py
@@ -252,14 +252,13 @@ def do_train(args):
                     lr_scheduler.step()
                 optimizer.clear_grad()
 
-                paddle.device.cuda.synchronize()
+                loss_numpy = loss.numpy()
                 train_run_cost += time.time() - train_start
 
                 # Profile for model benchmark
                 profiler.add_profiler_step(args.profiler_options)
 
                 if global_step % args.logging_freq == 0:
-                    loss_numpy = loss.numpy()
                     speed = args.logging_freq / (
                         train_reader_cost + train_run_cost)
                     avg_reader_cost = train_reader_cost / args.logging_freq
diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh
@@ -55,7 +55,7 @@ function _train(){
 
     if [ $fp_item = "fp16" ]; then
         use_fp16_cmd="--use_amp true" 
-        if [ $dygraph_name = "dygraph" && $gpt_repo = "gpt3" ]; then
+        if [ $dygraph_name = "dygraph" ] && [ $gpt_repo = "gpt3" ]; then
             use_fp16_cmd="--use_pure_fp16 true"
         fi
     fi