-
Notifications
You must be signed in to change notification settings - Fork 19
Open
Description
Not sure if this is intended behaviour, but the error between the kernelized SiLUActivation and default pytorch implementation is huge. This creates erroneous output for GraniteMoeHybrid.
from transformers.activations import SiLUActivation
from transformers.integrations import hub_kernels
import torch
from kernels import (
LayerRepository,
LocalLayerRepository,
use_kernel_mapping,
Mode,
use_kernel_forward_from_hub,
kernelize
)
if __name__ == "__main__":
device = torch.device("cuda")
dtype = torch.bfloat16
act = SiLUActivation()
x = torch.load('error_input.dmp')
print(x.size())
y = act(x)
kernel_layer_mapping = {
"SiLU": {
"cuda": {
Mode.INFERENCE | Mode.TORCH_COMPILE: LayerRepository(
repo_id="kernels-community/activation", layer_name="Silu", version=">=0.1.0"
)
}
},
}
with use_kernel_mapping(kernel_layer_mapping):
act_k = kernelize(act, mode=Mode.INFERENCE, device=device)
y_ = act_k(x)
print("Max error:", torch.abs(y - y_).max())torch.Size([1, 51, 1024])
Fetching 7 files: 100%|██████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 12677.08it/s]
Download complete: : 0.00B [00:00, ?B/s] Max error: tensor(17.2500, device='cuda:0', dtype=torch.bfloat16) ?it/s]
Download complete: : 0.00B [00:00, ?B/s]
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels