From dd4cf33d3ae81629578637cab8be4a120f34dfe7 Mon Sep 17 00:00:00 2001 From: Optimox Date: Sun, 27 Oct 2024 10:21:32 +0100 Subject: [PATCH] update configs to match parallel PR --- recipes/configs/gemma2/27B_full.yaml | 4 +++- recipes/configs/gemma2/27B_lora.yaml | 4 +++- recipes/configs/gemma2/27B_lora_single_device.yaml | 5 +++-- recipes/configs/gemma2/27B_qlora_single_device.yaml | 5 +++-- recipes/configs/gemma2/2B_full.yaml | 4 +++- recipes/configs/gemma2/2B_lora.yaml | 4 +++- recipes/configs/gemma2/2B_lora_single_device.yaml | 5 +++-- recipes/configs/gemma2/2B_qlora_single_device.yaml | 5 +++-- recipes/configs/gemma2/9B_full.yaml | 4 +++- recipes/configs/gemma2/9B_lora.yaml | 4 +++- recipes/configs/gemma2/9B_lora_single_device.yaml | 5 +++-- recipes/configs/gemma2/9B_qlora_single_device.yaml | 5 +++-- 12 files changed, 36 insertions(+), 18 deletions(-) diff --git a/recipes/configs/gemma2/27B_full.yaml b/recipes/configs/gemma2/27B_full.yaml index eebeefbd4..dee049024 100644 --- a/recipes/configs/gemma2/27B_full.yaml +++ b/recipes/configs/gemma2/27B_full.yaml @@ -23,6 +23,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -53,6 +54,7 @@ loss: _component_: torchtune.modules.loss.CEWithChunkedOutputLoss max_steps_per_epoch: null gradient_accumulation_steps: 1 +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -69,4 +71,4 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-27b-finetune log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True diff --git a/recipes/configs/gemma2/27B_lora.yaml b/recipes/configs/gemma2/27B_lora.yaml index e78b40633..265895090 100644 --- a/recipes/configs/gemma2/27B_lora.yaml +++ b/recipes/configs/gemma2/27B_lora.yaml @@ -23,6 +23,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -65,6 +66,7 @@ batch_size: 4 epochs: 3 max_steps_per_epoch: null gradient_accumulation_steps: 1 +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -81,4 +83,4 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-27b-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True diff --git a/recipes/configs/gemma2/27B_lora_single_device.yaml b/recipes/configs/gemma2/27B_lora_single_device.yaml index 56727e529..e245aafa9 100644 --- a/recipes/configs/gemma2/27B_lora_single_device.yaml +++ b/recipes/configs/gemma2/27B_lora_single_device.yaml @@ -22,6 +22,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -64,7 +65,7 @@ batch_size: 2 epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 8 -compile: False +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -82,7 +83,7 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-27b-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Show case the usage of pytorch profiler # Set enabled to False as it's only needed for debugging training diff --git a/recipes/configs/gemma2/27B_qlora_single_device.yaml b/recipes/configs/gemma2/27B_qlora_single_device.yaml index a1b7fcd37..2f0e7d6ca 100644 --- a/recipes/configs/gemma2/27B_qlora_single_device.yaml +++ b/recipes/configs/gemma2/27B_qlora_single_device.yaml @@ -22,6 +22,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -64,7 +65,7 @@ batch_size: 4 epochs: 3 max_steps_per_epoch: null gradient_accumulation_steps: 4 -compile: False +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -82,7 +83,7 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-27b-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Show case the usage of pytorch profiler # Set enabled to False as it's only needed for debugging training diff --git a/recipes/configs/gemma2/2B_full.yaml b/recipes/configs/gemma2/2B_full.yaml index 9386fae4b..e302dd759 100644 --- a/recipes/configs/gemma2/2B_full.yaml +++ b/recipes/configs/gemma2/2B_full.yaml @@ -23,6 +23,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -55,6 +56,7 @@ loss: _component_: torchtune.modules.loss.CEWithChunkedOutputLoss max_steps_per_epoch: null gradient_accumulation_steps: 1 +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -71,4 +73,4 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-finetune log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True diff --git a/recipes/configs/gemma2/2B_lora.yaml b/recipes/configs/gemma2/2B_lora.yaml index e6ef6e6e9..9a439ee0a 100644 --- a/recipes/configs/gemma2/2B_lora.yaml +++ b/recipes/configs/gemma2/2B_lora.yaml @@ -22,6 +22,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -67,6 +68,7 @@ batch_size: 4 epochs: 3 max_steps_per_epoch: null gradient_accumulation_steps: 1 +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -83,4 +85,4 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True diff --git a/recipes/configs/gemma2/2B_lora_single_device.yaml b/recipes/configs/gemma2/2B_lora_single_device.yaml index 484f133b4..1a2703fb4 100644 --- a/recipes/configs/gemma2/2B_lora_single_device.yaml +++ b/recipes/configs/gemma2/2B_lora_single_device.yaml @@ -22,6 +22,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -66,7 +67,7 @@ batch_size: 8 epochs: 3 max_steps_per_epoch: null gradient_accumulation_steps: 2 -compile: False +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -84,7 +85,7 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Show case the usage of pytorch profiler # Set enabled to False as it's only needed for debugging training diff --git a/recipes/configs/gemma2/2B_qlora_single_device.yaml b/recipes/configs/gemma2/2B_qlora_single_device.yaml index b5d7c9147..c2525460f 100644 --- a/recipes/configs/gemma2/2B_qlora_single_device.yaml +++ b/recipes/configs/gemma2/2B_qlora_single_device.yaml @@ -22,6 +22,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -66,7 +67,7 @@ batch_size: 4 epochs: 3 max_steps_per_epoch: null gradient_accumulation_steps: 4 -compile: False +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -84,7 +85,7 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Show case the usage of pytorch profiler # Set enabled to False as it's only needed for debugging training diff --git a/recipes/configs/gemma2/9B_full.yaml b/recipes/configs/gemma2/9B_full.yaml index d599970a2..0002b1c3b 100644 --- a/recipes/configs/gemma2/9B_full.yaml +++ b/recipes/configs/gemma2/9B_full.yaml @@ -23,6 +23,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -53,6 +54,7 @@ loss: _component_: torchtune.modules.loss.CEWithChunkedOutputLoss max_steps_per_epoch: null gradient_accumulation_steps: 1 +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -69,4 +71,4 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-9b-finetune log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True diff --git a/recipes/configs/gemma2/9B_lora.yaml b/recipes/configs/gemma2/9B_lora.yaml index 1cf209a24..5b0141e9e 100644 --- a/recipes/configs/gemma2/9B_lora.yaml +++ b/recipes/configs/gemma2/9B_lora.yaml @@ -23,6 +23,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -65,6 +66,7 @@ batch_size: 4 epochs: 3 max_steps_per_epoch: null gradient_accumulation_steps: 1 +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -81,4 +83,4 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-9b-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True diff --git a/recipes/configs/gemma2/9B_lora_single_device.yaml b/recipes/configs/gemma2/9B_lora_single_device.yaml index 57d066bb0..197ee121a 100644 --- a/recipes/configs/gemma2/9B_lora_single_device.yaml +++ b/recipes/configs/gemma2/9B_lora_single_device.yaml @@ -22,6 +22,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -64,7 +65,7 @@ batch_size: 8 epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 2 -compile: False +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -82,7 +83,7 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-9b-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Show case the usage of pytorch profiler # Set enabled to False as it's only needed for debugging training diff --git a/recipes/configs/gemma2/9B_qlora_single_device.yaml b/recipes/configs/gemma2/9B_qlora_single_device.yaml index 3c198bead..80a330310 100644 --- a/recipes/configs/gemma2/9B_qlora_single_device.yaml +++ b/recipes/configs/gemma2/9B_qlora_single_device.yaml @@ -22,6 +22,7 @@ tokenizer: # Dataset dataset: + packed: False # Set to true for great speed ups _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: True @@ -64,7 +65,7 @@ batch_size: 4 epochs: 3 max_steps_per_epoch: null gradient_accumulation_steps: 4 -compile: False +compile: False # pytorch compile, set to true for perf/memory improvement # Training env device: cuda @@ -82,7 +83,7 @@ metric_logger: log_dir: ${output_dir} output_dir: /tmp/alpaca-gemma2-9b-lora log_every_n_steps: 1 -log_peak_memory_stats: False +log_peak_memory_stats: True # Show case the usage of pytorch profiler # Set enabled to False as it's only needed for debugging training