Skip to content

SiLU kernel discrepancy with F.silu #76

@shawntan

Description

@shawntan

Not sure if this is intended behaviour, but the error between the kernelized SiLUActivation and default pytorch implementation is huge. This creates erroneous output for GraniteMoeHybrid.

from transformers.activations import SiLUActivation
from transformers.integrations import hub_kernels
import torch
from kernels import (
    LayerRepository,
    LocalLayerRepository,
    use_kernel_mapping, 
    Mode,
    use_kernel_forward_from_hub,
    kernelize
)
if __name__ == "__main__":  
    device = torch.device("cuda")
    dtype = torch.bfloat16
    act = SiLUActivation()
    x = torch.load('error_input.dmp')
    print(x.size())
    y = act(x)
    kernel_layer_mapping = {
        "SiLU": {
            "cuda": {
                Mode.INFERENCE | Mode.TORCH_COMPILE: LayerRepository(
                    repo_id="kernels-community/activation", layer_name="Silu", version=">=0.1.0"
                )
            }
        },
    }
    with use_kernel_mapping(kernel_layer_mapping):
        act_k = kernelize(act, mode=Mode.INFERENCE, device=device)
        y_ = act_k(x)
    print("Max error:", torch.abs(y - y_).max())
torch.Size([1, 51, 1024])
Fetching 7 files: 100%|██████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 12677.08it/s]
Download complete: : 0.00B [00:00, ?B/s]              Max error: tensor(17.2500, device='cuda:0', dtype=torch.bfloat16) ?it/s]
Download complete: : 0.00B [00:00, ?B/s]

error_input.dmp

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions