pytorch · kimishpatel · Dec 6, 2024 · Dec 5, 2024
@@ -106,7 +106,7 @@ def __init__(
 
         # Note: import this after portable_lib
         from executorch.extension.llm.custom_ops import (  # noqa
-            sdpa_with_kv_cache,  # usort: skip
+            custom_ops,  # usort: skip
         )
         from executorch.kernels import quantized  # noqa
 

@@ -23,7 +23,7 @@
 from executorch.examples.models.llama.runner.generation import LlamaRunner
 
 # Note: import this after portable_lib
-from executorch.extension.llm.custom_ops import sdpa_with_kv_cache  # noqa # usort: skip
+from executorch.extension.llm.custom_ops import custom_ops  # noqa # usort: skip
 from executorch.kernels import quantized  # noqa
 
 

@@ -99,7 +99,7 @@ def _replace_sdpa_with_custom_op(module: torch.nn.Module):
 
 
 def replace_sdpa_with_custom_op(module: torch.nn.Module) -> torch.nn.Module:
-    from executorch.extension.llm.custom_ops import sdpa_with_kv_cache  # noqa
+    from executorch.extension.llm.custom_ops import custom_ops  # noqa
 
     _replace_sdpa_with_custom_op(module)
     return module

@@ -18,7 +18,7 @@
 from executorch.extension.pybindings.portable_lib import (
     _load_for_executorch_from_buffer,
 )
-from executorch.extension.llm.custom_ops import sdpa_with_kv_cache  # noqa # usort: skip
+from executorch.extension.llm.custom_ops import custom_ops  # noqa # usort: skip
 from executorch.kernels import quantized  # noqa # usort: skip
 
 logging.basicConfig(level=logging.INFO)

@@ -14,7 +14,7 @@
 from PIL import Image
 
 # Custom ops has to be loaded after portable_lib.
-from executorch.extension.llm.custom_ops import sdpa_with_kv_cache  # noqa # usort: skip
+from executorch.extension.llm.custom_ops import custom_ops  # noqa # usort: skip
 from executorch.kernels import quantized  # noqa # usort: skip
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"

@@ -38,7 +38,7 @@ A sampler class in C++ to sample the logistics given some hyperparameters.
 ## custom_ops
 Contains custom op, such as:
 - custom sdpa: implements CPU flash attention and avoids copies by taking the kv cache as one of its arguments.
-  - _sdpa_with_kv_cache.py_, _op_sdpa_aot.cpp_: custom op definition in PyTorch with C++ registration.
+  - _custom_ops.py_, _op_sdpa_aot.cpp_: custom op definition in PyTorch with C++ registration.
   - _op_sdpa.cpp_: the optimized operator implementation and registration of _sdpa_with_kv_cache.out_.
 
 ## runner

@@ -17,7 +17,6 @@
 
 from torch.library import impl
 
-# TODO rename this file to custom_ops_meta_registration.py
 try:
     op = torch.ops.llama.sdpa_with_kv_cache.default
     assert op is not None

@@ -81,7 +81,7 @@ def define_common_targets():
     runtime.python_library(
         name = "custom_ops_aot_py",
         srcs = [
-            "sdpa_with_kv_cache.py",
+            "custom_ops.py",
         ],
         visibility = [
             "//executorch/...",

@@ -11,7 +11,7 @@
 import torch
 import torch.nn.functional as F
 
-from .sdpa_with_kv_cache import custom_ops_lib  # noqa
+from .custom_ops import custom_ops_lib  # noqa
 
 
 def _sdpa_with_kv_cache_ref(q, k, v, k_cache, v_cache, attn_mask, start_pos, seq_len):