Adding activation kernels (#40890)

MekkCyber · web-flow · commit c830fc120760 · 2025-09-17T11:36:09.000+02:00
* first commit

* add mode

* revert modeling

* add compile

* rm print
diff --git a/src/transformers/activations.py b/src/transformers/activations.py
@@ -18,6 +18,7 @@
 import torch
 from torch import Tensor, nn
 
+from .integrations.hub_kernels import use_kernel_forward_from_hub
 from .utils import logging
 from .utils.import_utils import is_torchdynamo_compiling
 
@@ -38,6 +39,7 @@ def forward(self, input: Tensor) -> Tensor:
         return nn.functional.gelu(input, approximate="tanh")
 
 
+@use_kernel_forward_from_hub("NewGELU")
 class NewGELUActivation(nn.Module):
     """
     Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
@@ -70,6 +72,7 @@ def forward(self, input: Tensor) -> Tensor:
         return self.act(input)
 
 
+@use_kernel_forward_from_hub("FastGELU")
 class FastGELUActivation(nn.Module):
     """
     Applies GELU approximation that is slower than QuickGELU but more accurate. See: https://github.com/hendrycks/GELUs
@@ -79,6 +82,7 @@ def forward(self, input: Tensor) -> Tensor:
         return 0.5 * input * (1.0 + torch.tanh(input * 0.7978845608 * (1.0 + 0.044715 * input * input)))
 
 
+@use_kernel_forward_from_hub("QuickGELU")
 class QuickGELUActivation(nn.Module):
     """
     Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs
diff --git a/src/transformers/integrations/hub_kernels.py b/src/transformers/integrations/hub_kernels.py
@@ -84,6 +84,33 @@
                 )
             },
         },
+        "FastGELU": {
+            "cuda": {
+                Mode.INFERENCE | Mode.TORCH_COMPILE: LayerRepository(
+                    repo_id="kernels-community/activation",
+                    layer_name="FastGELU",
+                    version=">=0.0.4,<0.1.0",
+                )
+            }
+        },
+        "QuickGELU": {
+            "cuda": {
+                Mode.INFERENCE | Mode.TORCH_COMPILE: LayerRepository(
+                    repo_id="kernels-community/activation",
+                    layer_name="QuickGELU",
+                    version=">=0.0.4,<0.1.0",
+                )
+            }
+        },
+        "NewGELU": {
+            "cuda": {
+                Mode.INFERENCE | Mode.TORCH_COMPILE: LayerRepository(
+                    repo_id="kernels-community/activation",
+                    layer_name="NewGELU",
+                    version=">=0.0.4,<0.1.0",
+                )
+            }
+        },
     }
 
     register_kernel_mapping(_KERNEL_MAPPING)