Skip to content

Commit

Permalink
Update rp effective seq len sampling
Browse files Browse the repository at this point in the history
  • Loading branch information
mzio committed Sep 19, 2024
1 parent 90abd34 commit 50aabea
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dataset:
- redpajama/train.json[50000]
eval_data:
- redpajama/train.json
num_train_samples: 10000 # (8 * 2500) * (1024) = 20M
num_train_samples: 10000 # (8 * 2500) * (2048) = 20M
max_train_samples: 20000
max_eval_num: 1000
max_length: 32768
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
dataset:
name: redpajama_sample_contig
dataset_config:
train_data:
- redpajama/train.json[50000]
eval_data:
- redpajama/train.json
num_train_samples: 10000 # (8 * 2500) * (2048) = 20M
max_train_samples: 20000
max_eval_num: 1000
max_length: 32768
min_length: 2048
chat_template: llama-3
chunk_size: 2048 # sequence length for distilling
seed: 42
cache_dir: '/scr-ssd/mzhang/data/long-llm/long-llm/' # Change this to where you want to save
load_from_cache_file: true
esl_model_config: base_llama3_8b
filter_by_esl: true
dataloaders_dir: '/scr-ssd/mzhang/projects/lolcats/src/dataloaders'

pretrained_model_config:
pretrained_model_name_or_path: 'meta-llama/Meta-Llama-3-8B'
cache_dir: '/scr-ssd/mzhang/models/llama3' # Set this to where you want to save checkpoint weights
preprocess_config: null

dataloader:
batch_size: 1
num_workers: 2
drop_last: false
pin_memory: true

optimizer:
optim: adamw_torch_fused
lr: 1e-4
weight_decay: 0.0

lr_scheduler:
lr_scheduler_type: reduce_lr_on_plateau
mode: min
factor: 0.1
patience: 10
min_lr: 0.00001

trainer: # HuggingFace Trainer-like arguments
name: default_lm
bf16: true
train_split: train
val_split: validation
num_train_epochs: 2
gradient_accumulation_steps: 8
seed: 42
batch_size: 1
load_best_model_at_end: true
greater_is_better: false
metric_for_best_model: eval/loss # eval/rouge/geometric_mean
logging_steps: 100
evaluation_strategy: steps
max_steps: -1
eval_steps: 100
max_eval_batches: null

finetune:
method: lora
kwargs:
r: 8
lora_alpha: 16 # 32
lora_dropout: 0 # 0.05
target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]

0 comments on commit 50aabea

Please sign in to comment.