pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-presets.yml
Lines changed: 27 additions & 0 deletions b/‎.github/workflows/build-presets.yml
Lines changed: 27 additions & 0 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 0 additions & 26 deletions b/‎.github/workflows/trunk.yml
Lines changed: 0 additions & 26 deletions
diff --git a/‎.gitmodules
Lines changed: 3 additions & 0 deletions b/‎.gitmodules
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj
Lines changed: 2 additions & 2 deletions b/‎backends/apple/coreml/runtime/workspace/executorchcoreml.xcodeproj/project.pbxproj
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/coreml/scripts/install_requirements.sh
Lines changed: 0 additions & 8 deletions b/‎backends/apple/coreml/scripts/install_requirements.sh
Lines changed: 0 additions & 8 deletions
diff --git a/‎backends/arm/README.md
Lines changed: 1 addition & 1 deletion b/‎backends/arm/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/__init__.py
Lines changed: 0 additions & 1 deletion b/‎backends/arm/_passes/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py
Lines changed: 58 additions & 25 deletions b/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py
Lines changed: 58 additions & 25 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 9 additions & 8 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 9 additions & 8 deletions
@@ -1 +1 @@
-59d5cf083b4f860dea76fe8936076177f9367f10
+01f1cc44cbbfdf6307aa01b803a4ee22f9ade946
@@ -66,3 +66,30 @@ jobs:
         ./install_requirements.sh > /dev/null
         cmake --preset ${{ matrix.preset }}
         cmake --build cmake-out -j$(( $(nproc) - 1 ))
+
+  windows:
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    strategy:
+      fail-fast: false
+      matrix:
+        preset: [pybind]
+    with:
+      job-name: build
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      submodules: recursive
+      timeout: 90
+      script: |
+        set -eux
+        conda init powershell
+        powershell -Command "& {
+          \$ErrorActionPreference = 'Stop'
+          Set-PSDebug -Trace 1
+
+          conda create --yes --quiet -n et python=3.12
+          conda activate et
+
+          python install_requirements.py
+          cmake --preset ${{ matrix.preset }}
+          \$numCores = [System.Environment]::GetEnvironmentVariable('NUMBER_OF_PROCESSORS') - 1
+          cmake --build cmake-out -j \$numCores
+        }"
@@ -692,29 +692,3 @@ jobs:
       build-mode: Release
       build-tool: cmake
       docker-image: executorch-ubuntu-22.04-clang12
-
-  unittest-nxp-neutron:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        set -eux
-        
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-        
-        # Build and install Executorch
-        PYTHON_EXECUTABLE=python \
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
-        .ci/scripts/setup-linux.sh --build-tool "cmake"
-        
-        # Run pytest
-        PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
@@ -67,3 +67,6 @@
 [submodule "shim"]
 	path = shim
 	url = https://github.com/facebook/buck2-shims-meta
+[submodule "third-party/json"]
+	path = third-party/json
+	url = https://github.com/nlohmann/json.git
@@ -925,7 +925,7 @@
 					"$(SRCROOT)/../include/executorch/runtime/core/portable_type/c10",
 					"$(SRCROOT)/../sdk",
 					"$(SRCROOT)/../util",
-					"$(SRCROOT)/../../third-party/nlohmann_json/single_include",
+					"$(SRCROOT)/../../../../../third-party/json/single_include",
 					"$(SRCROOT)/../../third-party/coremltools/deps/protobuf/src",
 				);
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
@@ -957,7 +957,7 @@
 					"$(SRCROOT)/../include/executorch/runtime/core/portable_type/c10",
 					"$(SRCROOT)/../sdk",
 					"$(SRCROOT)/../util",
-					"$(SRCROOT)/../../third-party/nlohmann_json/single_include",
+					"$(SRCROOT)/../../../../../third-party/json/single_include",
 					"$(SRCROOT)/../../third-party/coremltools/deps/protobuf/src",
 				);
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 
@@ -49,14 +49,6 @@ mkdir "$COREMLTOOLS_DIR_PATH/build"
 cmake -S "$COREMLTOOLS_DIR_PATH" -B "$COREMLTOOLS_DIR_PATH/build"
 cmake --build "$COREMLTOOLS_DIR_PATH/build" --parallel --target mlmodel
 
-echo "${green}ExecuTorch: Cloning nlohmann."
-git clone https://github.com/nlohmann/json.git "$COREML_DIR_PATH/third-party/nlohmann_json"
-STATUS=$?
-if [ $STATUS -ne 0 ]; then
-    echo "${red}ExecuTorch: Failed to clone nlohmann."
-    exit 1
-fi
-
 echo "${green}ExecuTorch: Copying protobuf files."
 mkdir -p "$COREML_DIR_PATH/runtime/sdk/format/"
 cp -rf "$PROTOBUF_FILES_DIR_PATH" "$COREML_DIR_PATH/runtime/sdk/format/"
@@ -101,7 +101,7 @@ backends/arm/test/setup_testing.sh
 The you can run the tests with
 
 ```
-pytest -c /dev/null -v -n auto backends/arm/test --arm_run_corstoneFVP
+pytest -c /dev/null -v -n auto backends/arm/test
 ```
 
 ## Passes
 
@@ -20,7 +20,6 @@
 from .convert_split_to_slice import ConvertSplitToSlicePass  # noqa
 from .convert_squeezes_to_view import ConvertSqueezesToViewPass  # noqa
 from .convert_to_clamp import ConvertToClampPass  # noqa
-from .decompose_batchnorm_pass import DecomposeBatchNormPass  # noqa
 from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
 from .decompose_gelu_pass import DecomposeGeluPass  # noqa
 
@@ -1,5 +1,4 @@
 # Copyright 2024-2025 Arm Limited and/or its affiliates.
-# All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -36,7 +35,7 @@
 def _transpose_impl(*args, **kwargs):
     # Validate length of dim_order array
     dim = args[1]
-    assert len(dim) <= 4
+    assert len(dim) in (4, 5)
     # Pass-through in edge-IR
     return args[0]
 
@@ -45,13 +44,15 @@ class AnnotateChannelsLastDimOrder(ExportPass):
     """
     Annotates each node with a tosa_dim_order. tosa_dim_order can be seen as a channels-last dim-order
     that in most cases will be (0, 2, 3, 1) for nodes with 4D-shapes. The pass also inserts passthrough_to_tosa._transpose
-    when a transition between 3D and 4D tensors happen.
+    when a transition between 3D and 4D/5D tensors happen.
     The annotated tosa_dim_order is used to permute the node's shape such that it gives a TOSA-compliant shape.
     """
 
     NHWC_order = (0, 2, 3, 1)
     NHWC_inverse_order = (0, 3, 1, 2)
     HWCM_order = (2, 3, 0, 1)
+    NNHWC_order = (0, 1, 3, 4, 2)
+    NNHWC_inverse_order = (0, 1, 4, 2, 3)
 
     def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
         """
@@ -81,8 +82,12 @@ def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
 
     @staticmethod
     def memory_format_differs(shape):
-        """Returns true if the shape will have a different memory layout in NCHW and NHWC format"""
-        if len(shape) >= 4:
+        """Returns true if the shape will have a different memory layout in (N)NCHW and (N)NHWC format"""
+        if len(shape) >= 5:
+            C = shape[2]
+            H = shape[3]
+            W = shape[4]
+        elif len(shape) == 4:
             C = shape[1]
             H = shape[2]
             W = shape[3]
@@ -98,14 +103,24 @@ def memory_format_differs(shape):
     @staticmethod
     def is_channel_reshape(input_shape, output_shape):
         """Returns true if the reshape changes the channel dimension"""
-        if not len(input_shape) == len(output_shape) == 4:
+        if not (
+            (len(input_shape) == len(output_shape) and (len(output_shape) in (4, 5)))
+            or (len(input_shape) == 4 and len(output_shape) == 5)
+            or (len(input_shape) == 5 and len(output_shape) == 4)
+        ):
             return False
 
-        C_old = input_shape[1]
-        C_new = output_shape[1]
+        C_old = input_shape[-3]
+        C_new = output_shape[-3]
 
-        N_new = output_shape[0]
-        N_old = input_shape[0]
+        N_new = (
+            output_shape[0]
+            if len(output_shape) == 4
+            else output_shape[0] * output_shape[1]
+        )
+        N_old = (
+            input_shape[0] if len(input_shape) == 4 else input_shape[0] * input_shape[1]
+        )
 
         return (N_old != N_new) or (C_old != C_new)
 
@@ -119,7 +134,11 @@ def insert_input_transpose(node, input_node, graph_module):
                 torch.ops.passthrough_to_tosa._transpose.default,
                 args=(
                     input_node,
-                    list(AnnotateChannelsLastDimOrder.NHWC_inverse_order),
+                    list(
+                        AnnotateChannelsLastDimOrder.NNHWC_inverse_order
+                        if len(get_first_fake_tensor(input_node).size()) == 5
+                        else AnnotateChannelsLastDimOrder.NHWC_inverse_order
+                    ),
                 ),
                 quantize=quantize,
                 q_params=q_params,
@@ -137,15 +156,28 @@ def insert_output_transpose(node, graph_module):
             permute_node = create_node(
                 graph_module.graph,
                 torch.ops.passthrough_to_tosa._transpose.default,
-                args=(node, list(AnnotateChannelsLastDimOrder.NHWC_order)),
+                args=(
+                    node,
+                    list(
+                        AnnotateChannelsLastDimOrder.NNHWC_order
+                        if len(get_first_fake_tensor(node).size()) == 5
+                        else AnnotateChannelsLastDimOrder.NHWC_order
+                    ),
+                ),
             )
             permute_node.meta["tosa_dim_order"] = (
-                AnnotateChannelsLastDimOrder.NHWC_order
+                AnnotateChannelsLastDimOrder.NNHWC_order
+                if len(get_first_fake_tensor(node).size()) == 5
+                else AnnotateChannelsLastDimOrder.NHWC_order
+            )
+            permute_node.meta["val"] = get_first_fake_tensor(node).permute(
+                AnnotateChannelsLastDimOrder.NNHWC_order
+                if len(get_first_fake_tensor(node).size()) == 5
+                else AnnotateChannelsLastDimOrder.NHWC_order
             )
-            permute_node.meta["val"] = node.meta["val"].permute(
-                AnnotateChannelsLastDimOrder.NHWC_order
+            node.meta["tosa_dim_order"] = tuple(
+                range(len(get_first_fake_tensor(node).size()))
             )
-            node.meta["tosa_dim_order"] = (0, 1, 2, 3)
             users = [user for user in node.users if user != permute_node]
             for user in users:
                 user.replace_input_with(node, permute_node)
@@ -159,8 +191,8 @@ def insert_output_transpose(node, graph_module):
     def _insert_view_transpose(
         input_shape, output_shape, node, input_node, graph_module
     ):
-        nchw_to_nhwc = len(input_shape) < 4 and len(output_shape) == 4
-        nhwc_to_nchw = len(input_shape) == 4 and len(output_shape) < 4
+        nchw_to_nhwc = len(input_shape) < 4 and len(output_shape) >= 4
+        nhwc_to_nchw = len(input_shape) >= 4 and len(output_shape) < 4
         channel_reshape = AnnotateChannelsLastDimOrder.is_channel_reshape(
             output_shape, input_shape
         )
@@ -178,11 +210,11 @@ def _insert_view_transpose(
 
     def insert_tosa_transposes(self, graph_module: torch.fx.GraphModule):
         """
-        Transposes are needed for operators transforming the input to a different rank, as 4D-tensors are assumed to be in NHWC-format, whereas all other are in NCHW format.
+        Transposes are needed for operators transforming the input to a different rank, as 4D and 5D-tensors are assumed to be in (N)NHWC-format, whereas all other are in (N)NCHW format.
         This is relevant for the following cases:
-        - view:       <4D ->  4D
-        - view:        4D -> <4D
-        Additionally, a 4D->4D view operation acting on the channel dimension currently needs to be performed in NCHW format, leadning to one extra input and output transpose for this case.
+        - view:       <4D ->  >=4D
+        - view:      >=4D ->   <4D
+        Additionally, a 4D/5D->4D/5D view operation acting on the channel dimension currently needs to be performed in (N)NCHW format, leadning to one extra input and output transpose for this case.
 
         Transposes can be avoided for shapes where there is no difference in actual memory, e.g for
         - H == W == 1
@@ -212,12 +244,13 @@ def call(self, graph_module: torch.fx.GraphModule):
                     # The weights of TOSA DEPTHWISE_CONV2D have shape (H, W, C, M) which corresponds to
                     # dim_order = (2, 3, 0, 1) (https://www.mlplatform.org/tosa/tosa_spec.html#_depthwise_conv2d).
                     dim_order = self.HWCM_order
+            elif node_data.dim() == 5:
+                dim_order = self.NNHWC_order  # type: ignore[assignment]
             else:
                 dim_order = tuple(range(node_data.dim()))  # type: ignore[assignment]
             node.meta["tosa_dim_order"] = dim_order
-        # Take care of cases when:
-        # 4D (NHWC) -> >4D (NCH)
-        # 3D (NCH)  ->  4D (NHWC)
+        # Insert TOSA transposes to convert between (N)NCHW and (N)NHWC format.
+        # See insert_tosa_transposes for insertion conditions.
         self.insert_tosa_transposes(graph_module)
         graph_module.recompile()
         graph_module = super().call(graph_module).graph_module
 
@@ -24,7 +24,6 @@
     ConvertSplitToSlicePass,
     ConvertSqueezesToViewPass,
     ConvertToClampPass,
-    DecomposeBatchNormPass,
     DecomposeCosineSimilarityPass,
     DecomposeDivPass,
     DecomposeGeluPass,
@@ -85,12 +84,13 @@ def _transform(self, graph_module: GraphModule):
     def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(FuseQuantizedActivationPass())
         self.add_pass(RemoveGetItemPass())
-        self.add_pass(DecomposeBatchNormPass())
         self.add_pass(ConvertSplitToSlicePass())
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
         self.add_pass(DecomposeLinearVectorNormPass())
-        self.add_pass(DecomposeMeanDimPass())
+        self.add_pass(
+            DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
+        )
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
         self.add_pass(ConvertMinMaxPass())
@@ -116,7 +116,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(UnsqueezeBeforeRepeatPass())
         self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
         self.add_pass(DecomposeSumPass())
-        self.add_pass(Conv1dUnsqueezePass(exported_program))
+        self.add_pass(Conv1dUnsqueezePass())
         self.add_pass(DecomposeSelectPass())
         self.add_pass(ConvertSqueezesToViewPass())
 
@@ -141,10 +141,11 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
         self.add_pass(DecomposeLeakyReLUPass())
-        self.add_pass(DecomposeBatchNormPass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
-        self.add_pass(DecomposeMeanDimPass())
+        self.add_pass(
+            DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
+        )
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeSoftmaxPass())
@@ -169,7 +170,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(UnsqueezeBeforeRepeatPass())
         self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
         self.add_pass(DecomposeSumPass())
-        self.add_pass(Conv1dUnsqueezePass(exported_program))
+        self.add_pass(Conv1dUnsqueezePass())
         self.add_pass(DecomposeSelectPass())
         self.add_pass(ConvertSqueezesToViewPass())
 
@@ -209,7 +210,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
-        self.add_pass(DecomposeMeanDimPass())
+        self.add_pass(DecomposeMeanDimPass(graph_module, self.tosa_spec))
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeCosineSimilarityPass())
         self.add_pass(DecomposeDivPass())
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-59d5cf083b4f860dea76fe8936076177f9367f10`
	`1`	`+01f1cc44cbbfdf6307aa01b803a4ee22f9ade946`