Skip to content

Commit

Permalink
prepare full-foak benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
achew010 committed Sep 6, 2024
1 parent c8459bc commit 1f3d3fb
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@

# Local
from .framework_plugin_fast_quantized_peft import FastQuantizedPeftAccelerationPlugin
from .framework_plugin_fast_kernels import FastKernelsAccelerationPlugin
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, configurations: Dict[str, Dict]):
)

self._check_config_and_maybe_check_values(
key="base_layer",
key="base_layer",
values=["auto_gptq", "bitsandbytes"],
default="auto_gptq"
)
Expand Down Expand Up @@ -113,11 +113,6 @@ def augmentation(
modifiable_args: Tuple[LoraConfig],
):

# this seems to be only needed for fused lora?
assert (
model.dtype == torch.float16 and train_args.fp16
), "need to run in fp16 mixed precision or load model in fp16 when using fused lora"

terms = set()
for k, v in self.configurations.items():
if v:
Expand All @@ -138,7 +133,7 @@ def augmentation(
AccelerationPlugin.register_plugin(
FastKernelsAccelerationPlugin,
configuration_or_paths=[
"training.fused_ops_and_kernels"
"training.fused_ops_and_kernels",
"peft.quantization.fused_ops_and_kernels",
],
)
7 changes: 6 additions & 1 deletion sample-configurations/CONTENTS.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,9 @@ framework_configs:
- accelerated-peft
- attention-and-distributed-packing
- fused-ops-and-kernels
filename: accelerated-peft-autogptq-foak-padding-free-sample-configuration.yaml
filename: accelerated-peft-autogptq-foak-padding-free-sample-configuration.yaml

- shortname: foak-fast-kernels
plugins:
- fused-ops-and-kernels
filename: foak-fast-kernels-sample-configuration.yaml
31 changes: 31 additions & 0 deletions sample-configurations/foak-fast-kernels-sample-configuration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# FMS Acceleration Plugin Configuration.
#
# Each stanza incorporates various configurations for
# different fine-tuning / training tasks.
plugins:
# Configurations to accelerate data packing/padding in training
training:

fused_ops_and_kernels:

# if under training stanza, then putting
# base_layer and fused_lora will be a misnomer
# - this should be in peft.quantized
# However, if it is specified, it will still
# be read. This is useful in use cases where
# the yaml is system generated and not shown
# to a user.

# activate various unsloth optimizations
# there are two versions of the plugin
# - the FastKernel version supports individual kernels
# - the FastQuantized version is all-or-nothing

# fast loss triton kernels
fast_loss: True

# fast rms norm triton kernels
fast_rsm_layernorm: True

# fast RoPE embedding triton kernels
fast_rope_embeddings: True
6 changes: 6 additions & 0 deletions scripts/benchmarks/scenarios.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@

scenarios:
- name: full-finetuning
framework_config:
- null
- foak-fast-kernels
arguments:
learning_rate: 2e-5
model_name_or_path:
Expand All @@ -46,6 +49,9 @@ scenarios:
torch_dtype: float16

- name: standard-peft
framework_config:
- null
- foak-fast-kernels
arguments:
learning_rate: 2e-4
torch_dtype: float16
Expand Down
3 changes: 3 additions & 0 deletions scripts/generate_sample_configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def read_configuration(path: str) -> Dict:
KEY_BNB_NF4_FOAK = "bnb-nf4-foak"
KEY_AADP_PADDING_FREE = "aadp-padding-free"
KEY_AADP_MULTIPACK = "aadp-multipack"
KEY_FAST_KERNELS = "foak-fast-kernels"

CONFIGURATIONS = {
KEY_AUTO_GPTQ: "plugins/accelerated-peft/configs/autogptq.yaml",
Expand All @@ -171,6 +172,7 @@ def read_configuration(path: str) -> Dict:
),
KEY_AADP_PADDING_FREE: "plugins/attention-and-distributed-packing/configs/padding_free.yaml",
KEY_AADP_MULTIPACK: "plugins/attention-and-distributed-packing/configs/multipack.yaml",
KEY_FAST_KERNELS: "plugins/fused-ops-and-kernels/configs/fast_full.yaml",
}

# list of (tag, combi) tuples
Expand All @@ -190,6 +192,7 @@ def read_configuration(path: str) -> Dict:
("accelerated-peft-autogptq-foak-padding-free", (KEY_AADP_PADDING_FREE,KEY_AUTO_GPTQ, KEY_AUTO_GPTQ_FOAK)),
("accelerated-peft-bnb-nf4-foak-padding-free", (KEY_AADP_PADDING_FREE,KEY_BNB_NF4, KEY_BNB_NF4_FOAK)),
("aadp-padding-free-multipack", (KEY_AADP_PADDING_FREE, KEY_AADP_MULTIPACK)),
("foak-fast-kernels", (KEY_FAST_KERNELS))
]


Expand Down

0 comments on commit 1f3d3fb

Please sign in to comment.