From ee2713d682f282175039297b087efb1d4a7c4fad Mon Sep 17 00:00:00 2001
From: 1000850000 user <aaron.chew1@ibm.com>
Date: Fri, 13 Sep 2024 01:59:49 +0000
Subject: [PATCH] dtype changes to scenarios.yaml and README.md

Signed-off-by: 1000850000 user <aaron.chew1@ibm.com>
---
 .../framework_plugin_autogptq.py                   |  4 +++-
 plugins/fused-ops-and-kernels/README.md            |  8 ++++++++
 scripts/benchmarks/scenarios.yaml                  | 14 ++++++++------
 3 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py
index cee1b47..6514b1b 100644
--- a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py
+++ b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py
@@ -274,7 +274,9 @@ def augmentation(
 
         # some assertions
         assert peft_config is not None, "need peft_config to install PEFT adapters"
-
+        assert (
+            model.dtype == torch.float16 or train_args.fp16
+        ), "need to run in fp16 mixed precision or load model in fp16"
         # call the prepare_model_for_kbit_training. This will no longer be called
         # inside SFTTrainer, because we eventually return None for the peft_config.
         model = prepare_model_for_kbit_training(
diff --git a/plugins/fused-ops-and-kernels/README.md b/plugins/fused-ops-and-kernels/README.md
index 800cfdd..03fc90c 100644
--- a/plugins/fused-ops-and-kernels/README.md
+++ b/plugins/fused-ops-and-kernels/README.md
@@ -16,6 +16,14 @@ Plugin | Description | Depends | Loading | Augmentation | Callbacks
 [fast_quantized_peft](./src/fms_accelerate_foak/framework_plugin_fast_quantized_peft.py) | LoRA fused ops, fast cross-entropy, fast rms, fast RoPE | Contains extracted code |  | ✅
 [fast_kernels](./src/fms_accelerate_foak/framework_plugin_fast_kernels.py) | Enhanced version of quantized_peft, that also works for full-FT and non-quant peft | Contains extracted code |  | ✅
 
+### Supported DataType Settings
+**Compatibility Matrix with Mixed Precision**
+torch_dtype | Mixed Precision | Full-FT-FOAK | PEFT-FOAK | QPEFT-FOAK
+-- | -- | -- | -- | --
+FLOAT16 | - | ✗ Not Allowed | ✗| ✗
+FLOAT16 | FP16 | ValueError: <br>Attempting to <br>unscale FP16 gradients. <br>[See here](https://github.com/huggingface/peft/blob/main/docs/source/developer_guides/troubleshooting.md) | **Compatible** | **Compatible**
+BFLOAT16 | - | ✗ | ✗ | ✗
+BFLOAT16 | BF16 | **Compatible** | **Compatible** | [Less Performant](https://github.com/foundation-model-stack/fms-acceleration/issues/84)
 
 ### Code Extracted from Unsloth
 
diff --git a/scripts/benchmarks/scenarios.yaml b/scripts/benchmarks/scenarios.yaml
index 18b63de..7fdf124 100644
--- a/scripts/benchmarks/scenarios.yaml
+++ b/scripts/benchmarks/scenarios.yaml
@@ -49,12 +49,14 @@ scenarios:
                 - 'mistralai/Mixtral-8x7B-Instruct-v0.1'
                 - 'NousResearch/Llama-2-70b-hf'
             torch_dtype: bfloat16
+            bf16: True
 
     -   name: standard-peft
         framework_config: 
             - 
             - foak-fast-kernels
         arguments:
+            bf16: True
             learning_rate: 2e-4
             torch_dtype: bfloat16
             peft_method: lora
@@ -71,9 +73,9 @@ scenarios:
         framework_config: 
             - baseline-peft-bnb
         arguments:
-            bf16: True
+            fp16: True
             learning_rate: 2e-4
-            torch_dtype: bfloat16
+            torch_dtype: float16
             peft_method: lora
             r: 16
             lora_alpha: 16
@@ -89,9 +91,9 @@ scenarios:
             - accelerated-peft-bnb
             - accelerated-peft-bnb-foak
         arguments:
-            bf16: True
+            fp16: True
             learning_rate: 2e-4
-            torch_dtype: bfloat16
+            torch_dtype: float16
             peft_method: lora
             r: 16
             lora_alpha: 16
@@ -110,8 +112,8 @@ scenarios:
             - accelerated-peft-autogptq-foak
         arguments:
             learning_rate: 2e-4
-            bf16: True
-            torch_dtype: bfloat16
+            fp16: True
+            torch_dtype: float16
             peft_method: lora
             r: 16
             lora_alpha: 16