pytorch · facebook-github-bot · Mar 7, 2025 · Mar 6, 2025 · cccclai · Mar 6, 2025
@@ -0,0 +1,66 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+runtime.python_library(
+    name = "llama_transformer",
+    srcs = [
+        "llama_transformer.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.examples.apple.coreml.llama",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/examples/models/llama:llama_transformer",
+    ],
+)
+
+runtime.python_library(
+    name = "utils",
+    srcs = [
+        "utils.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.examples.apple.coreml.llama",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "//caffe2:torch",
+    ],
+)
+
+runtime.python_binary(
+    name = "export",
+    srcs = [
+        "export.py",
+    ],
+    main_function = "executorch.examples.apple.coreml.llama.export.main",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "fbsource//third-party/pypi/coremltools:coremltools",
+        ":llama_transformer",
+        ":utils",
+        "//caffe2:torch",
+        "//executorch/backends/apple/coreml:backend",
+        "//executorch/backends/apple/coreml:partitioner",
+        "//executorch/examples/models/llama:source_transformation",
+        "//executorch/exir/backend:utils",
+        "//executorch/exir/capture:config",
+        "//executorch/exir/passes:lib",
+        "//executorch/exir/passes:quant_fusion_pass",
+        "//executorch/exir/passes:sym_shape_eval_pass",
+        "//executorch/exir/program:program",
+        "//executorch/extension/export_util:export_util",
+        "//executorch/extension/llm/export:export_lib",
+    ],
+)
@@ -6,12 +6,18 @@
 
 import argparse
 
-import sys
-
 import coremltools as ct
 import torch
 from executorch.backends.apple.coreml.compiler import CoreMLBackend  # pyre-ignore
 from executorch.backends.apple.coreml.partition import CoreMLPartitioner  # pyre-ignore
+
+from executorch.examples.apple.coreml.llama.llama_transformer import (
+    InputManager,
+    load_model,
+)
+from executorch.examples.apple.coreml.llama.utils import (
+    replace_linear_with_split_linear,
+)
 from executorch.examples.models.llama.source_transformation.quantize import (
     EmbeddingQuantHandler,
 )
@@ -24,10 +30,6 @@
 from executorch.exir.program._program import to_edge_with_preserved_ops
 from executorch.extension.export_util.utils import save_pte_program
 
-sys.path.insert(0, ".")
-from llama_transformer import InputManager, load_model
-from utils import replace_linear_with_split_linear
-
 
 def main() -> None:
     parser = argparse.ArgumentParser()

@@ -443,7 +443,7 @@ def forward(
         if not self.use_cache_list:
             k_out = torch.stack(k_out, dim=0)
             v_out = torch.stack(v_out, dim=0)
-        return logits, k_out, v_out
+        return logits, k_out, v_out  # pyre-ignore[7]
 
 
 def load_model(checkpoint_path, params_path, max_seq_length, use_cache_list):
@@ -614,7 +614,7 @@ def get_inputs(self, tokens: List[int]):
                     torch.tensor(tokens, dtype=torch.int64),
                     torch.zeros(self.seq_length - input_length, dtype=torch.int64),
                 ],
-                axis=-1,
+                dim=-1,
             ).reshape(1, -1),
             # input_pos
             torch.tensor([self.input_pos], dtype=torch.long),

@@ -5,19 +5,19 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
-import sys
 
 import sentencepiece as spm
 
 import torch
+from executorch.examples.apple.coreml.llama.llama_transformer import (
+    InputManager,
+    load_model,
+)
 
-from executorch.runtime import Runtime
-
-
-sys.path.insert(0, ".")
 from executorch.examples.models.llama.runner.generation import next_token
 from executorch.examples.models.llama.tokenizer import tiktoken
-from llama_transformer import InputManager, load_model
+
+from executorch.runtime import Runtime
 
 
 class Tokenizer:

diff --git a/pyproject.toml b/pyproject.toml
@@ -92,6 +92,7 @@ flatc = "executorch.data.bin:flatc"
 # TODO(mnachin T180504136): Do not put examples/models
 # into core pip packages. Refactor out the necessary utils
 # or core models files into a separate package.
+"executorch.examples.apple.coreml.llama" = "examples/apple/coreml/llama"
 "executorch.examples.models" = "examples/models"
 "executorch.exir" = "exir"
 "executorch.extension" = "extension"