Skip to content

Commit 5f782a8

Browse files
authored
add grpo kk 0.96 (#10519)
1 parent a00471f commit 5f782a8

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

llm/config/qwen/grpo_argument.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ eval_datasets: "ppo-kk/5ppl/test.jsonl" # Path to the evaluation dataset
2121
prompt_key: "src" # Key for the prompt in the dataset
2222
response_key: "tgt" # Key for the response in the dataset
2323
dataloader_drop_last: true # Whether to drop the last incomplete batch in the DataLoader
24+
dataloader_shuffle: false # Whether to shuffle the train dataset
2425
balance_batch: true # Whether to balance batch size across dataset_world_size
2526
use_remove_padding: true # Whether to remove padding tokens in the input
2627

@@ -46,7 +47,7 @@ rollout_quant_type: "" # Quantization type, e.g., "weight_only_int8"
4647
# training args
4748
do_train: true # Whether to perform training
4849
seed: 42 # Random seed for reproducibility
49-
global_batch_size: 4 # Global batch size for training
50+
global_batch_size: 8 # Global batch size for training
5051
global_gen_batch_size: -1 # Global generation batch size for dynamic sampling
5152
global_mini_batch_size: -1 # Mini-batch size for training
5253
rollout_n: 8 # Number of rollouts
@@ -65,7 +66,7 @@ adam_beta1: 0.9 # AdamW optimizer beta1
6566
adam_beta2: 0.999 # AdamW optimizer beta2
6667
adam_epsilon: 1e-8 # AdamW optimizer epsilon
6768
max_grad_norm: 1.0 # Maximum gradient norm for clipping
68-
max_steps: -1 # Maximum number of training steps
69+
max_steps: 3600 # Maximum number of training steps
6970
save_steps: 300 # Number of steps between model saves
7071
save_strategy: "steps" # Strategy for saving models
7172
ignore_save_lr_and_optim: true # Whether to ignore saving learning rate and optimizer state (leave empty if not specified)
@@ -98,7 +99,7 @@ eval_steps: 20 # Number of steps between evaluations
9899

99100
# device memory optimization args
100101
use_flash_attention: true # Whether to use fused attention operations
101-
use_fused_rms_norm: true # Whether to use fused RMS norm operations, which needs to install fused_ln in slm/model_zoo/gpt-3/external_ops
102+
use_fused_rms_norm: false # Whether to use fused RMS norm operations, which needs to install fused_ln in slm/model_zoo/gpt-3/external_ops
102103
use_fused_rope: false # Whether to use fused rope operations
103104
use_fused_head_and_loss_fn: true # Whether to use fused head and loss function
104105
use_fused_linear: true # Whether to use fused linear operations
@@ -115,4 +116,4 @@ release_grads: true # Whether to release gradients
115116
offload_optim: false # Whether to offload optimizer to pinned memory
116117

117118
# benchmark args
118-
skip_profile_timer: false # Whether to skip profiling timer
119+
skip_profile_timer: false # Whether to skip profiling timer

0 commit comments

Comments
 (0)