Skip to content

Add buck rules in coreml llama transformer #9017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions examples/apple/coreml/llama/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Any targets that should be shared between fbcode and xplat must be defined in
# targets.bzl. This file can contain fbcode-only targets.

load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

runtime.python_library(
name = "llama_transformer",
srcs = [
"llama_transformer.py",
],
_is_external_target = True,
base_module = "executorch.examples.apple.coreml.llama",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are adding buck rules so the internal repo can import coreml llama modules from oss. Will replace the internal kv_io_sliding_llama definition.

visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
"//caffe2:torch",
"//executorch/examples/models/llama:llama_transformer",
],
)

runtime.python_library(
name = "utils",
srcs = [
"utils.py",
],
_is_external_target = True,
base_module = "executorch.examples.apple.coreml.llama",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
"//caffe2:torch",
],
)

runtime.python_binary(
name = "export",
srcs = [
"export.py",
],
main_function = "executorch.examples.apple.coreml.llama.export.main",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
"fbsource//third-party/pypi/coremltools:coremltools",
":llama_transformer",
":utils",
"//caffe2:torch",
"//executorch/backends/apple/coreml:backend",
"//executorch/backends/apple/coreml:partitioner",
"//executorch/examples/models/llama:source_transformation",
"//executorch/exir/backend:utils",
"//executorch/exir/capture:config",
"//executorch/exir/passes:lib",
"//executorch/exir/passes:quant_fusion_pass",
"//executorch/exir/passes:sym_shape_eval_pass",
"//executorch/exir/program:program",
"//executorch/extension/export_util:export_util",
"//executorch/extension/llm/export:export_lib",
],
)
14 changes: 8 additions & 6 deletions examples/apple/coreml/llama/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@

import argparse

import sys

import coremltools as ct
import torch
from executorch.backends.apple.coreml.compiler import CoreMLBackend # pyre-ignore
from executorch.backends.apple.coreml.partition import CoreMLPartitioner # pyre-ignore

from executorch.examples.apple.coreml.llama.llama_transformer import (
InputManager,
load_model,
)
from executorch.examples.apple.coreml.llama.utils import (
replace_linear_with_split_linear,
)
from executorch.examples.models.llama.source_transformation.quantize import (
EmbeddingQuantHandler,
)
Expand All @@ -24,10 +30,6 @@
from executorch.exir.program._program import to_edge_with_preserved_ops
from executorch.extension.export_util.utils import save_pte_program

sys.path.insert(0, ".")
from llama_transformer import InputManager, load_model
from utils import replace_linear_with_split_linear


def main() -> None:
parser = argparse.ArgumentParser()
Expand Down
4 changes: 2 additions & 2 deletions examples/apple/coreml/llama/llama_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ def forward(
if not self.use_cache_list:
k_out = torch.stack(k_out, dim=0)
v_out = torch.stack(v_out, dim=0)
return logits, k_out, v_out
return logits, k_out, v_out # pyre-ignore[7]


def load_model(checkpoint_path, params_path, max_seq_length, use_cache_list):
Expand Down Expand Up @@ -614,7 +614,7 @@ def get_inputs(self, tokens: List[int]):
torch.tensor(tokens, dtype=torch.int64),
torch.zeros(self.seq_length - input_length, dtype=torch.int64),
],
axis=-1,
dim=-1,
).reshape(1, -1),
# input_pos
torch.tensor([self.input_pos], dtype=torch.long),
Expand Down
12 changes: 6 additions & 6 deletions examples/apple/coreml/llama/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
# LICENSE file in the root directory of this source tree.

import argparse
import sys

import sentencepiece as spm

import torch
from executorch.examples.apple.coreml.llama.llama_transformer import (
InputManager,
load_model,
)

from executorch.runtime import Runtime


sys.path.insert(0, ".")
from executorch.examples.models.llama.runner.generation import next_token
from executorch.examples.models.llama.tokenizer import tiktoken
from llama_transformer import InputManager, load_model

from executorch.runtime import Runtime


class Tokenizer:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ flatc = "executorch.data.bin:flatc"
# TODO(mnachin T180504136): Do not put examples/models
# into core pip packages. Refactor out the necessary utils
# or core models files into a separate package.
"executorch.examples.apple.coreml.llama" = "examples/apple/coreml/llama"
"executorch.examples.models" = "examples/models"
"executorch.exir" = "exir"
"executorch.extension" = "extension"
Expand Down
Loading