@@ -526,14 +526,14 @@ def _wrap_amp_model(self, args, model):
526
526
if self .args .pipeline_parallel_degree > 1 or (self .args .tensor_parallel_degree > 1 and self .sharding is None ):
527
527
self .scaler = paddle .amp .GradScaler (init_loss_scaling = self .args .scale_loss )
528
528
if self .args .amp_master_grad :
529
- mix_precision_utils .MixPrecisionScaler (self .scaler ) # retun value has no use
529
+ mix_precision_utils .MixPrecisionScaler (self .scaler ) # return value has no use
530
530
self .scaler = fleet .distributed_scaler (self .scaler )
531
531
elif self .sharding is not None :
532
532
self .scaler = paddle .amp .GradScaler (init_loss_scaling = self .args .scale_loss )
533
533
if self .amp_dtype == "float16" or self .amp_dtype == "bfloat16" :
534
534
if ShardingOption .SHARD_OP in self .args .sharding :
535
535
if self .args .amp_master_grad :
536
- mix_precision_utils .MixPrecisionScaler (self .scaler ) # retun value has no use
536
+ mix_precision_utils .MixPrecisionScaler (self .scaler ) # return value has no use
537
537
self .scaler = fleet .distributed_scaler (self .scaler )
538
538
else :
539
539
# scaler for stage2 and stage3
@@ -763,7 +763,7 @@ def create_zcc_manager(self, unwrapped_model, resume_from_checkpoint=None):
763
763
"""
764
764
Create zero cost checkpoint manager.
765
765
Has to be called after pipeline model is created.
766
- resume_from_checkpoint: if use Flash checkpoing EMA, load previous checkpoint status
766
+ resume_from_checkpoint: if use Flash checkpoint EMA, load previous checkpoint status
767
767
"""
768
768
assert isinstance (
769
769
self .model , PretrainedModel
@@ -1223,7 +1223,7 @@ def _inner_training_loop(
1223
1223
) and availiable_no_sync
1224
1224
# sharding
1225
1225
# stage1. the same as ddp
1226
- # stage2. manualy collect gradient on dp group
1226
+ # stage2. manually collect gradient on dp group
1227
1227
1228
1228
dp_master_grad = (
1229
1229
self .args .world_size > 1 and self .args .amp_master_grad and not self .args .use_hybrid_parallel
@@ -1263,15 +1263,15 @@ def fused_allreduce_gradients_no_sync(paramlist, hcg):
1263
1263
self ._check_loss_valid (tr_loss )
1264
1264
1265
1265
self .timers and self .timers ("forward-backward" ).stop ()
1266
- # Maunally collect gradients
1266
+ # Manually collect gradients
1267
1267
# Case 1: Use recompute and dp
1268
1268
# Case 2: Hack dp with master_grad
1269
1269
# Case 3: Pipeline or sharding overlap
1270
1270
# local_rank != -1 don't means dp in networks.
1271
1271
self .timers and self .timers ("all-reduce" ).start ()
1272
1272
1273
1273
# Case 1: Use recompute and dp / sharding stage1,
1274
- # manualy collect gradient for dp.
1274
+ # manually collect gradient for dp.
1275
1275
if (args .recompute or args .use_expert_parallel ) and availiable_no_sync :
1276
1276
fused_allreduce_gradients_no_sync (list (model .parameters ()), None )
1277
1277
@@ -2041,21 +2041,21 @@ def _load_rng_state(self, checkpoint):
2041
2041
core .default_cpu_generator ().set_state (checkpoint_rng_state ["cpu" ])
2042
2042
if core .is_compiled_with_cuda ():
2043
2043
if not len (checkpoint_rng_state ["cuda" ]) == core .get_cuda_device_count ():
2044
- raise ValueError ("Length of gpu state list shoule be equal to the gpu device count" )
2044
+ raise ValueError ("Length of gpu state list should be equal to the gpu device count" )
2045
2045
for i in range (core .get_cuda_device_count ()):
2046
2046
core .default_cuda_generator (i ).set_state (checkpoint_rng_state ["cuda" ][i ])
2047
2047
2048
2048
if core .is_compiled_with_xpu ():
2049
2049
if not len (checkpoint_rng_state ["cuda" ]) == core .get_xpu_device_count ():
2050
- raise ValueError ("Length of xpu state list shoule be equal to the xpu device count" )
2050
+ raise ValueError ("Length of xpu state list should be equal to the xpu device count" )
2051
2051
for i in range (core .get_xpu_device_count ()):
2052
2052
core .default_xpu_generator (i ).set_state (checkpoint_rng_state ["cuda" ][i ])
2053
2053
2054
2054
if paddle .device .get_all_custom_device_type () is not None :
2055
2055
custom_device_type = paddle .device .get_all_custom_device_type ()
2056
2056
for device in custom_device_type :
2057
2057
if not len (checkpoint_rng_state ["cuda" ]) == core .get_custom_device_count (device ):
2058
- raise ValueError ("Length of custom device state list shoule be equal to the custom device count" )
2058
+ raise ValueError ("Length of custom device state list should be equal to the custom device count" )
2059
2059
for i in range (core .get_custom_device_count (device )):
2060
2060
core .default_custom_device_generator (paddle .CustomPlace (device , i )).set_state (
2061
2061
checkpoint_rng_state ["cuda" ][i ]
0 commit comments