pytorch
diff --git a/‎.ci/scripts/setup-openvino.sh
Lines changed: 28 additions & 0 deletions b/‎.ci/scripts/setup-openvino.sh
Lines changed: 28 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llama.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_llama.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_llama_torchao_lowbit.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_openvino.sh
Lines changed: 16 additions & 0 deletions b/‎.ci/scripts/test_openvino.sh
Lines changed: 16 additions & 0 deletions
diff --git a/‎.ci/scripts/test_phi_3_mini.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_phi_3_mini.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_qnn_static_llama.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_qnn_static_llama.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml
Lines changed: 22 additions & 0 deletions b/‎.github/workflows/pull.yml
Lines changed: 22 additions & 0 deletions
diff --git a/‎.gitmodules
Lines changed: 2 additions & 2 deletions b/‎.gitmodules
Lines changed: 2 additions & 2 deletions
diff --git a/‎.lintrunner.toml
Lines changed: 2 additions & 0 deletions b/‎.lintrunner.toml
Lines changed: 2 additions & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 13 additions & 3 deletions b/‎CMakeLists.txt
Lines changed: 13 additions & 3 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/apple/coreml/TARGETS
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/TARGETS
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/__init__.py
Lines changed: 2 additions & 1 deletion b/‎backends/arm/_passes/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 6 additions & 3 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎backends/arm/_passes/cast_int64_pass.py
Lines changed: 2 additions & 2 deletions b/‎backends/arm/_passes/cast_int64_pass.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/_passes/cast_to_int32_pass.py
Lines changed: 54 additions & 0 deletions b/‎backends/arm/_passes/cast_to_int32_pass.py
Lines changed: 54 additions & 0 deletions
diff --git a/‎backends/arm/_passes/fuse_constant_ops_pass.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/fuse_constant_ops_pass.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/match_arg_ranks_pass.py
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/match_arg_ranks_pass.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/right_shift_support.py
Lines changed: 4 additions & 1 deletion b/‎backends/arm/operator_support/right_shift_support.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 5 additions & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+git clone https://github.com/openvinotoolkit/openvino.git
+cd openvino && git checkout releases/2025/1
+git submodule update --init --recursive
+sudo ./install_build_dependencies.sh
+mkdir build && cd build
+cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON
+make -j$(nproc)
+
+cd ..
+cmake --install build --prefix dist
+
+source dist/setupvars.sh
+cd ../backends/openvino
+pip install -r requirements.txt
+cd scripts
+./openvino_build.sh --enable_python
@@ -269,7 +269,7 @@ $PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
-$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
+$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
 
 
 RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10 --warmup=1"
 
@@ -55,7 +55,7 @@ cmake --build cmake-out/examples/models/llama -j16 --config Release
 download_stories_model_artifacts
 
 echo "Creating tokenizer.bin"
-$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
+$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
 
 # Export model
 LLAMA_CHECKPOINT=stories110M.pt
 
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+source openvino/dist/setupvars.sh
+cd backends/openvino/tests
+python test_runner.py --test_type ops
+python test_runner.py --test_type models
@@ -56,7 +56,7 @@ cmake_build_phi_3_mini() {
 prepare_tokenizer() {
   echo "Downloading and converting tokenizer.model"
   wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"
-  $PYTHON_EXECUTABLE -m executorch.extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
+  $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
 }
 
 # Export phi-3-mini model to pte
 
@@ -30,7 +30,7 @@ pip install graphviz
 # Download stories llama110m artifacts
 download_stories_model_artifacts
 echo "Creating tokenizer.bin"
-$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
+$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
 
 set +e
 # Compile only as weight sharing is not applicable on x86
 
@@ -736,3 +736,25 @@ jobs:
         conda activate "${CONDA_ENV}"
 
         # placeholder for mediatek to add more tests
+
+  test-openvino-linux:
+    name: test-openvino-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-gcc9
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-openvino.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_openvino.sh
@@ -1,9 +1,9 @@
 [submodule "backends/arm/third-party/ethos-u-core-driver"]
 	path = backends/arm/third-party/ethos-u-core-driver
-	url = https://github.com/pytorch-labs/ethos-u-core-driver-mirror
+	url = https://git.gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-core-driver.git
 [submodule "backends/arm/third-party/serialization_lib"]
 	path = backends/arm/third-party/serialization_lib
-	url = https://github.com/pytorch-labs/tosa_serialization_lib-mirror
+	url = https://git.gitlab.arm.com/tosa/tosa-serialization.git
 [submodule "backends/vulkan/third-party/Vulkan-Headers"]
 	path = backends/vulkan/third-party/Vulkan-Headers
 	url = https://github.com/KhronosGroup/Vulkan-Headers
 
@@ -299,12 +299,14 @@ include_patterns = [
     # TODO(https://github.com/pytorch/executorch/issues/7441): Gradually start enabling all folders.
     # 'backends/**/*.py',
     'backends/arm/**/*.py',
+    'backends/openvino/**/*.py',
     'build/**/*.py',
     'codegen/**/*.py',
     # 'devtools/**/*.py',
     'devtools/visualization/**/*.py',
     'docs/**/*.py',
     # 'examples/**/*.py',
+    'examples/openvino/**/*.py',
     # 'exir/**/*.py',
     # 'extension/**/*.py',
     'kernels/**/*.py',
 
@@ -1,6 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -161,7 +161,7 @@ if(OPTIMIZE_SIZE)
   set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Os")
 else()
   # -O2: Moderate opt.
-  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")
+  set(CMAKE_CXX_FLAGS_RELEASE "-O2 ${CMAKE_CXX_FLAGS_RELEASE}")
 endif()
 
 option(EXECUTORCH_BUILD_ANDROID_JNI "Build Android JNI" OFF)
@@ -204,6 +204,8 @@ option(EXECUTORCH_BUILD_MPS "Build the MPS backend" OFF)
 
 option(EXECUTORCH_BUILD_NEURON "Build the backends/mediatek directory" OFF)
 
+option(EXECUTORCH_BUILD_OPENVINO "Build the Openvino backend" OFF)
+
 option(EXECUTORCH_BUILD_PYBIND "Build the Python Bindings" OFF)
 
 option(EXECUTORCH_BUILD_QNN "Build the Qualcomm backend" OFF)
@@ -715,6 +717,10 @@ if(EXECUTORCH_BUILD_NEURON)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/mediatek)
 endif()
 
+if(EXECUTORCH_BUILD_OPENVINO)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/openvino)
+endif()
+
 if(EXECUTORCH_BUILD_QNN)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/qualcomm)
 endif()
@@ -751,7 +757,7 @@ if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_LLM)
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/tokenizer)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/tokenizers)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_MODULE)
@@ -817,6 +823,10 @@ if(EXECUTORCH_BUILD_PYBIND)
     list(APPEND _dep_libs mpsdelegate)
   endif()
 
+  if(EXECUTORCH_BUILD_OPENVINO)
+    list(APPEND _dep_libs openvino_backend)
+  endif()
+
   if(EXECUTORCH_BUILD_XNNPACK)
     # need to explicitly specify XNNPACK and microkernels-prod
     # here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
 
@@ -29,6 +29,7 @@ Platform Support:
   - Arm
   - Cadence
   - MediaTek
+  - OpenVINO
   - Qualcomm
   - Vulkan
   - XNNPACK
 
@@ -72,7 +72,7 @@ runtime.cxx_python_extension(
     headers = glob([
         "runtime/inmemoryfs/**/*.hpp",
     ]),
-    base_module = "",
+    base_module = "executorch.backends.apple.coreml",
     compiler_flags = [
         "-std=c++17",
     ],
 
@@ -7,7 +7,8 @@
 from . import arm_pass_utils  # noqa
 from .annotate_channels_last_dim_order_pass import AnnotateChannelsLastDimOrder  # noqa
 from .annotate_decomposed_matmul import AnnotateDecomposedMatmulPass  # noqa
-from .cast_int64_pass import CastInt64ToInt32Pass  # noqa
+from .cast_int64_pass import CastInt64BuffersToInt32Pass  # noqa
+from .cast_to_int32_pass import CastToInt32Pass  # noqa
 from .conv1d_unsqueeze_pass import Conv1dUnsqueezePass  # noqa
 from .convert_any_default_dim_dims_pass import ConvertAnyDefaultDimDimsPass  # noqa
 from .convert_expand_copy_to_repeat import ConvertExpandCopyToRepeatPass  # noqa
 
@@ -10,7 +10,8 @@
 from executorch.backends.arm._passes import (
     AnnotateChannelsLastDimOrder,
     AnnotateDecomposedMatmulPass,
-    CastInt64ToInt32Pass,
+    CastInt64BuffersToInt32Pass,
+    CastToInt32Pass,
     ComputeConstantOpsAOT,
     Conv1dUnsqueezePass,
     ConvertAnyDefaultDimDimsPass,
@@ -80,6 +81,8 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertToClampPass())
         self.add_pass(ConvertMinMaxPass())
         self.add_pass(ConvertAnyDefaultDimDimsPass())
+        if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
+            self.add_pass(CastToInt32Pass())
 
         self.add_pass(ReplaceScalarWithTensorArgPass())
         self.add_pass(AnnotateDecomposedMatmulPass())
@@ -94,7 +97,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(SizeAdjustConv2DPass())
         self.add_pass(ConvertExpandCopyToRepeatPass())
         self.add_pass(UnsqueezeBeforeRepeatPass())
-        self.add_pass(CastInt64ToInt32Pass(exported_program))
+        self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
         self.add_pass(KeepDimsFalseToSqueezePass())
         self.add_pass(Conv1dUnsqueezePass(exported_program))
         self.add_pass(DecomposeSelectPass())
@@ -141,7 +144,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(SizeAdjustConv2DPass())
         self.add_pass(ConvertExpandCopyToRepeatPass())
         self.add_pass(UnsqueezeBeforeRepeatPass())
-        self.add_pass(CastInt64ToInt32Pass(exported_program))
+        self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
         self.add_pass(KeepDimsFalseToSqueezePass())
         self.add_pass(Conv1dUnsqueezePass(exported_program))
         self.add_pass(DecomposeSelectPass())
 
@@ -15,13 +15,13 @@
 logger.setLevel(logging.WARNING)
 
 
-class CastInt64ToInt32Pass(ExportPass):
+class CastInt64BuffersToInt32Pass(ExportPass):
     """
     Cast int64 buffers to int32 if the int64 data is in int32 range.
     """
 
     def __init__(self, exported_program: torch.export.ExportedProgram):
-        super(CastInt64ToInt32Pass, self).__init__()
+        super(CastInt64BuffersToInt32Pass, self).__init__()
         self.exported_program = exported_program
 
     def _assert_within_int32(self, tensor: torch.Tensor, node: torch.fx.Node):
 
@@ -0,0 +1,54 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+
+class CastToInt32Pass(ExportPass):
+    """Casts the input to int32 if it is not already and casts back the output to the original input dtype."""
+
+    targeted_ops = {
+        exir_ops.edge.aten.bitwise_left_shift.Tensor,
+        exir_ops.edge.aten.bitwise_right_shift.Tensor,
+    }
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in self.targeted_ops:
+            return super().call_operator(op, args, kwargs, meta)
+
+        new_args: list = []
+        did_cast = False
+        for arg in args:
+            if arg.data.dtype != torch.int32:
+                new_args.append(
+                    super().call_operator(
+                        exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+                        (arg,),
+                        {"dtype": torch.int32},
+                        meta,
+                    )
+                )
+                did_cast = True
+            else:
+                new_args.append(arg)
+
+        output = super().call_operator(
+            op,
+            tuple(new_args),
+            {},
+            meta,
+        )
+
+        if did_cast:
+            output = super().call_operator(
+                exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+                (output,),
+                {"dtype": args[0].data.dtype},
+                meta,
+            )
+        return output
@@ -161,6 +161,7 @@ def f(node_name_pre_computed):
         exir_ops.edge.aten.arange.start_step,
         exir_ops.edge.aten.eye.default,
         exir_ops.edge.aten.linspace.default,
+        torch.ops.aten.scalar_tensor.default,
     ]
 
     def __init__(self, exported_program: ExportedProgram) -> None:
 
@@ -45,6 +45,9 @@ def __init__(self, exported_program):
         exir_ops.edge.aten.sub.Tensor,
         exir_ops.edge.aten.mul.Tensor,
         exir_ops.edge.aten.div.Tensor,
+        exir_ops.edge.aten.bitwise_right_shift.Tensor,
+        exir_ops.edge.aten.bitwise_left_shift.Tensor,
+        exir_ops.edge.aten.eq.Tensor,
     ]
 
     def _match_op_rank(self, graph_module, node, arg, max_rank):
 
@@ -22,7 +22,10 @@
 
 @register_tosa_support_check
 class RightShiftSupported(SupportedTOSAOperatorCheck):
-    targets = [exir_ops.edge.aten.__rshift__.Scalar]
+    targets = [
+        exir_ops.edge.aten.bitwise_right_shift.Tensor,
+        exir_ops.edge.aten.__rshift__.Scalar,
+    ]
 
     tosa_specs = [
         TosaSpecification.create_from_string("TOSA-0.80+BI"),
 
@@ -158,6 +158,7 @@ def is_node_supported(
             exir_ops.edge.aten.hardswish.default,
             exir_ops.edge.aten.div.Tensor,
             exir_ops.edge.aten.eq.Tensor,
+            exir_ops.edge.aten.eq.Scalar,
             exir_ops.edge.aten.exp.default,
             exir_ops.edge.aten.log.default,
             exir_ops.edge.aten.linear.default,
@@ -205,6 +206,9 @@ def is_node_supported(
             exir_ops.edge.aten.amin.default,
             exir_ops.edge.aten.eye.default,
             exir_ops.edge.aten.linspace.default,
+            exir_ops.edge.aten.bitwise_left_shift.Tensor,
+            exir_ops.edge.aten.__lshift__.Scalar,
+            torch.ops.aten.scalar_tensor.default,
         ]
 
         return supported
@@ -232,6 +236,7 @@ class EthosU55NotSupported(OperatorSupportBase):
         exir_ops.edge.aten.amax.default,  # REDUCE_MAX
         exir_ops.edge.aten.amin.default,  # REDUCE_MIN
         exir_ops.edge.aten.eq.Tensor,
+        exir_ops.edge.aten.eq.Scalar,
         exir_ops.edge.aten.ge.Tensor,
         exir_ops.edge.aten.gt.Tensor,
         exir_ops.edge.aten.le.Tensor,
 
@@ -35,7 +35,7 @@
     op_reciprocal,
     op_repeat,
     op_rescale,
-    op_rshift,
+    op_rshift_tensor,
     op_rsqrt,
     op_sigmoid,
     op_slice,
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ cmake_build_phi_3_mini() {`
`56`	`56`	`prepare_tokenizer() {`
`57`	`57`	`echo "Downloading and converting tokenizer.model"`
`58`	`58`	`wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"`
`59`		`- $PYTHON_EXECUTABLE -m executorch.extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin`
	`59`	`+ $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin`
`60`	`60`	`}`
`61`	`61`
`62`	`62`	`# Export phi-3-mini model to pte`
Original file line number	Diff line number	Diff line change
`@@ -161,6 +161,7 @@ def f(node_name_pre_computed):`
`161`	`161`	`exir_ops.edge.aten.arange.start_step,`
`162`	`162`	`exir_ops.edge.aten.eye.default,`
`163`	`163`	`exir_ops.edge.aten.linspace.default,`
	`164`	`+ torch.ops.aten.scalar_tensor.default,`
`164`	`165`	`]`
`165`	`166`
`166`	`167`	`def __init__(self, exported_program: ExportedProgram) -> None:`
Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,9 @@ def __init__(self, exported_program):`
`45`	`45`	`exir_ops.edge.aten.sub.Tensor,`
`46`	`46`	`exir_ops.edge.aten.mul.Tensor,`
`47`	`47`	`exir_ops.edge.aten.div.Tensor,`
	`48`	`+ exir_ops.edge.aten.bitwise_right_shift.Tensor,`
	`49`	`+ exir_ops.edge.aten.bitwise_left_shift.Tensor,`
	`50`	`+ exir_ops.edge.aten.eq.Tensor,`
`48`	`51`	`]`
`49`	`52`
`50`	`53`	`def _match_op_rank(self, graph_module, node, arg, max_rank):`