From ee2713d682f282175039297b087efb1d4a7c4fad Mon Sep 17 00:00:00 2001 From: 1000850000 user Date: Fri, 13 Sep 2024 01:59:49 +0000 Subject: [PATCH] dtype changes to scenarios.yaml and README.md Signed-off-by: 1000850000 user --- .../framework_plugin_autogptq.py | 4 +++- plugins/fused-ops-and-kernels/README.md | 8 ++++++++ scripts/benchmarks/scenarios.yaml | 14 ++++++++------ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py index cee1b47..6514b1b 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py @@ -274,7 +274,9 @@ def augmentation( # some assertions assert peft_config is not None, "need peft_config to install PEFT adapters" - + assert ( + model.dtype == torch.float16 or train_args.fp16 + ), "need to run in fp16 mixed precision or load model in fp16" # call the prepare_model_for_kbit_training. This will no longer be called # inside SFTTrainer, because we eventually return None for the peft_config. model = prepare_model_for_kbit_training( diff --git a/plugins/fused-ops-and-kernels/README.md b/plugins/fused-ops-and-kernels/README.md index 800cfdd..03fc90c 100644 --- a/plugins/fused-ops-and-kernels/README.md +++ b/plugins/fused-ops-and-kernels/README.md @@ -16,6 +16,14 @@ Plugin | Description | Depends | Loading | Augmentation | Callbacks [fast_quantized_peft](./src/fms_accelerate_foak/framework_plugin_fast_quantized_peft.py) | LoRA fused ops, fast cross-entropy, fast rms, fast RoPE | Contains extracted code | | ✅ [fast_kernels](./src/fms_accelerate_foak/framework_plugin_fast_kernels.py) | Enhanced version of quantized_peft, that also works for full-FT and non-quant peft | Contains extracted code | | ✅ +### Supported DataType Settings +**Compatibility Matrix with Mixed Precision** +torch_dtype | Mixed Precision | Full-FT-FOAK | PEFT-FOAK | QPEFT-FOAK +-- | -- | -- | -- | -- +FLOAT16 | - | ✗ Not Allowed | ✗| ✗ +FLOAT16 | FP16 | ValueError:
Attempting to
unscale FP16 gradients.
[See here](https://github.com/huggingface/peft/blob/main/docs/source/developer_guides/troubleshooting.md) | **Compatible** | **Compatible** +BFLOAT16 | - | ✗ | ✗ | ✗ +BFLOAT16 | BF16 | **Compatible** | **Compatible** | [Less Performant](https://github.com/foundation-model-stack/fms-acceleration/issues/84) ### Code Extracted from Unsloth diff --git a/scripts/benchmarks/scenarios.yaml b/scripts/benchmarks/scenarios.yaml index 18b63de..7fdf124 100644 --- a/scripts/benchmarks/scenarios.yaml +++ b/scripts/benchmarks/scenarios.yaml @@ -49,12 +49,14 @@ scenarios: - 'mistralai/Mixtral-8x7B-Instruct-v0.1' - 'NousResearch/Llama-2-70b-hf' torch_dtype: bfloat16 + bf16: True - name: standard-peft framework_config: - - foak-fast-kernels arguments: + bf16: True learning_rate: 2e-4 torch_dtype: bfloat16 peft_method: lora @@ -71,9 +73,9 @@ scenarios: framework_config: - baseline-peft-bnb arguments: - bf16: True + fp16: True learning_rate: 2e-4 - torch_dtype: bfloat16 + torch_dtype: float16 peft_method: lora r: 16 lora_alpha: 16 @@ -89,9 +91,9 @@ scenarios: - accelerated-peft-bnb - accelerated-peft-bnb-foak arguments: - bf16: True + fp16: True learning_rate: 2e-4 - torch_dtype: bfloat16 + torch_dtype: float16 peft_method: lora r: 16 lora_alpha: 16 @@ -110,8 +112,8 @@ scenarios: - accelerated-peft-autogptq-foak arguments: learning_rate: 2e-4 - bf16: True - torch_dtype: bfloat16 + fp16: True + torch_dtype: float16 peft_method: lora r: 16 lora_alpha: 16