pytorch
diff --git a/‎.ci/scripts/build_android_instrumentation.sh
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/build_android_instrumentation.sh
Lines changed: 3 additions & 3 deletions
diff --git a/‎.ci/scripts/test_ios_ci.sh
Lines changed: 5 additions & 1 deletion b/‎.ci/scripts/test_ios_ci.sh
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/_android.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_android.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml
Lines changed: 24 additions & 1 deletion b/‎.github/workflows/android-release-artifacts.yml
Lines changed: 24 additions & 1 deletion
diff --git a/‎CONTRIBUTING.md
Lines changed: 6 additions & 10 deletions b/‎CONTRIBUTING.md
Lines changed: 6 additions & 10 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/cadence/aot/quantizer/quantizer.py
Lines changed: 22 additions & 22 deletions b/‎backends/cadence/aot/quantizer/quantizer.py
Lines changed: 22 additions & 22 deletions
diff --git a/‎backends/qualcomm/_passes/decompose_einsum.py
Lines changed: 3 additions & 0 deletions b/‎backends/qualcomm/_passes/decompose_einsum.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/decompose_linalg_vector_norm.py
Lines changed: 3 additions & 0 deletions b/‎backends/qualcomm/_passes/decompose_linalg_vector_norm.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/layout_transform.py
Lines changed: 1 addition & 0 deletions b/‎backends/qualcomm/_passes/layout_transform.py
Lines changed: 1 addition & 0 deletions
@@ -12,10 +12,10 @@ if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
 fi
 which "${PYTHON_EXECUTABLE}"
 
-mkdir -p "${BUILD_AAR_DIR}"/executorch_android/src/androidTest/resources
-cp extension/module/test/resources/add.pte "${BUILD_AAR_DIR}"/executorch_android/src/androidTest/resources
+mkdir -p extension/android/executorch_android/src/androidTest/resources
+cp extension/module/test/resources/add.pte extension/android/executorch_android/src/androidTest/resources
 
-pushd "${BUILD_AAR_DIR}"
+pushd extension/android
 ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:testDebugUnitTest
 ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:assembleAndroidTest
 popd
@@ -7,7 +7,7 @@
 
 set -e
 
-APP_PATH="examples/demo-apps/apple_ios/ExecuTorchDemo/ExecuTorchDemo"
+APP_PATH="executorch-examples/apple/ExecuTorchDemo/ExecuTorchDemo"
 MODEL_NAME="mv3"
 SIMULATOR_NAME="executorch"
 
@@ -34,6 +34,10 @@ say() {
   echo -e "\033[1m\n\t** $1 **\n\033[0m"
 }
 
+say "Cloning the Demo App"
+
+git clone --depth 1 https://github.com/pytorch-labs/executorch-examples.git
+
 say "Installing CoreML Backend Requirements"
 
 ./backends/apple/coreml/scripts/install_requirements.sh
 
@@ -37,7 +37,7 @@ jobs:
 
         mkdir -p ${ARTIFACTS_DIR_NAME}/library_test_dir
         bash .ci/scripts/build_android_instrumentation.sh
-        cp ${BUILD_AAR_DIR}/executorch_android/build/outputs/apk/androidTest/debug/executorch_android-debug-androidTest.apk "${ARTIFACTS_DIR_NAME}/library_test_dir"
+        cp extension/android/executorch_android/build/outputs/apk/androidTest/debug/executorch_android-debug-androidTest.apk "${ARTIFACTS_DIR_NAME}/library_test_dir"
 
         mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
         bash examples/models/llama/install_requirements.sh
 
@@ -7,6 +7,10 @@ on:
         description: Version name to be uploaded for AAR release
         required: false
         type: string
+      upload_to_maven:
+        description: Upload the AAR to maven staging repository
+        required: false
+        type: boolean
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -31,11 +35,14 @@ jobs:
   build-aar:
     name: build-aar
     needs: check-if-aar-exists
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    if: ${{ !github.event.pull_request.head.repo.fork }}
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.7
+    secrets: inherit
     permissions:
       id-token: write
       contents: read
     with:
+      secrets-env: EXECUTORCH_MAVEN_SIGNING_KEYID EXECUTORCH_MAVEN_SIGNING_PASSWORD EXECUTORCH_MAVEN_CENTRAL_PASSWORD EXECUTORCH_MAVEN_CENTRAL_USERNAME EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
       submodules: 'true'
@@ -52,6 +59,16 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool buck2
         export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
 
+        mkdir -p ~/.gradle
+        touch ~/.gradle/gradle.properties
+        echo "signing.keyId=${SECRET_EXECUTORCH_MAVEN_SIGNING_KEYID}" >> ~/.gradle/gradle.properties
+        echo "signing.password=${SECRET_EXECUTORCH_MAVEN_SIGNING_PASSWORD}" >> ~/.gradle/gradle.properties
+        echo "mavenCentralUsername=${SECRET_EXECUTORCH_MAVEN_CENTRAL_USERNAME}" >> ~/.gradle/gradle.properties
+        echo "mavenCentralPassword=${SECRET_EXECUTORCH_MAVEN_CENTRAL_PASSWORD}" >> ~/.gradle/gradle.properties
+        echo "signing.secretKeyRingFile=/tmp/secring.gpg" >> ~/.gradle/gradle.properties
+
+        echo -n "$SECRET_EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS" | base64 -d > /tmp/secring.gpg
+
         # Build AAR Package
         mkdir aar-out
         export BUILD_AAR_DIR=aar-out
@@ -61,6 +78,12 @@ jobs:
 
         shasum -a 256 "${ARTIFACTS_DIR_NAME}/executorch.aar"
 
+        # Publish to maven staging
+        UPLOAD_TO_MAVEN="${{ inputs.upload_to_maven }}"
+        if [[ "$UPLOAD_TO_MAVEN" == "true" ]]; then
+          (cd aar-out; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:publishToMavenCentral)
+        fi
+
   upload-release-aar:
     name: upload-release-aar
     needs: build-aar
 
@@ -1,7 +1,6 @@
 Thank you for your interest in contributing to ExecuTorch! We want to make
 it easy to contribute to this project.
 
-&nbsp;
 
 ## Dev Install
 
@@ -91,7 +90,7 @@ executorch
 │   └── <a href="runtime/platform">platform</a> - Layer between architecture specific code and portable C++.
 ├── <a href="schema">schema</a> - ExecuTorch PTE file format flatbuffer schemas.
 ├── <a href="scripts">scripts</a> - Utility scripts for building libs, size management, dependency management, etc.
-├── <a href="shim">shim</a> - Compatibility layer between OSS and Internal builds.
+├── <a href="shim_et">shim_et</a> - Compatibility layer between OSS and Internal builds.
 ├── <a href="test">test</a> - Broad scoped end-to-end tests.
 ├── <a href="third-party">third-party</a> - Third-party dependencies.
 ├── <a href="tools">tools</a> - Tools for building ExecuTorch from source, for different built tools (CMake, Buck).
@@ -192,9 +191,6 @@ in the Github repo.
 
 ## Coding Style
 
-Goal: Encourage standards that make it easier to read, edit, maintain, and debug
-the ExecuTorch code.
-
 ### lintrunner
 
 We use [`lintrunner`](https://pypi.org/project/lintrunner/) to help make sure the
@@ -259,7 +255,7 @@ toolchains, and having access to relatively modern C++ features.
 
 #### C/C++ standard library usage
 
-**Restricted usage of the C++ standard library.**
+**Restricted usage of the C++ standard library**
 
 Rationale: ExecuTorch is intended to be portable to bare-metal systems that lack
 certain features, like dynamic memory, threading, and locking, required by parts
@@ -280,7 +276,7 @@ careful to also manually destroy objects initialized in this way.
 
 #### C++ language features
 
-**Exceptions: Do not use.**
+**Exceptions: Do not use**
 - Rationale: Exceptions are not widely supported on some classes of
   microcontrollers and DSPs, and they can significantly increase binary size.
 
@@ -289,12 +285,12 @@ must work with threading**
 - Rationale: The core runtime must work on systems that do not have threading
   support.
 
-**RTTI, dynamic_cast, and `<typeid>`: Do not use.**
+**RTTI, dynamic_cast, and `<typeid>`: Do not use**
 - Rationale: RTTI adds extra data to every virtual class. ExecuTorch doesn't
   have a strong need for `dynamic_cast` and friends, so it's better to reduce
   the binary size.
 
-**Templates and template metaprogramming: Be careful and avoid if possible.**
+**Templates and template metaprogramming: Be careful and avoid if possible**
 - Rationale: Most templating results in code generation, and is one of the most
   common sources of binary bloat. Some use of templates is fine (e.g. an
   `ArrayRef<T>`, or code that handles multiple `ScalarType` types), but for the
@@ -359,7 +355,7 @@ docs](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/
 for basics.
 
 1. Push your branch to your fork of `pytorch/executorch`. Most people do not
-  have permission to push a branch directoy to the upstream repo.
+  have permission to push a branch directory to the upstream repo.
 1. Create your PR
    - Use the `main` branch as the base.
    - Give the PR a clear and descriptive title. It will become the title of the
 
@@ -49,9 +49,9 @@ Key value propositions of ExecuTorch are:
 ## Getting Started
 To get started you can:
 
-- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/main/index.html) on getting things running locally and deploy a model to a device
+- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/main/index.html) to get things running locally and deploy a model to a device
 - Use this [Colab Notebook](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) to start playing around right away
-- Jump straight into LLMs use cases by following specific instructions for [Llama](./examples/models/llama/README.md) and [Llava](./examples/models/llava/README.md)
+- Jump straight into LLM use cases by following specific instructions for [Llama](./examples/models/llama/README.md) and [Llava](./examples/models/llava/README.md)
 
 ## Feedback and Engagement
 
 
@@ -43,7 +43,7 @@
 from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
 
 
-act_qspec_asym8u = QuantizationSpec(
+act_qspec_asym8s = QuantizationSpec(
     dtype=torch.int8,
     quant_min=-128,
     quant_max=127,
@@ -52,7 +52,7 @@
     observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
 )
 
-wgt_qspec_asym8u = QuantizationSpec(
+wgt_qspec_asym8s = QuantizationSpec(
     dtype=torch.int8,
     quant_min=-128,
     quant_max=127,
@@ -61,7 +61,7 @@
     observer_or_fake_quant_ctr=MinMaxObserver,
 )
 
-wgt_qspec_asym8s = QuantizationSpec(
+wgt_qspec_sym8s = QuantizationSpec(
     dtype=torch.int8,
     quant_min=-128,
     quant_max=127,
@@ -72,17 +72,17 @@
 
 bias_qspec: Optional[QuantizationSpec] = None
 
-qconfig_A8uW8u = QuantizationConfig(
-    act_qspec_asym8u,
-    act_qspec_asym8u,
-    wgt_qspec_asym8u,
+qconfig_A8W8 = QuantizationConfig(
+    act_qspec_asym8s,
+    act_qspec_asym8s,
+    wgt_qspec_asym8s,
     None,
 )
 
-qconfig_A8uW8s = QuantizationConfig(
-    act_qspec_asym8u,
-    act_qspec_asym8u,
-    wgt_qspec_asym8s,
+qconfig_A8W8sym = QuantizationConfig(
+    act_qspec_asym8s,
+    act_qspec_asym8s,
+    wgt_qspec_sym8s,
     None,
 )
 
@@ -189,15 +189,15 @@ def get_supported_operators(cls) -> List[OperatorConfig]:
 
 def get_cadence_default_quantizers() -> List[Quantizer]:
     return [
-        CadenceAtenQuantizer(AddmmPattern(), qconfig_A8uW8u),
-        CadenceAtenQuantizer(BmmPattern(), qconfig_A8uW8u),
-        CadenceAtenQuantizer(Conv1dPattern(), qconfig_A8uW8s),
-        CadenceAtenQuantizer(Conv2dPattern(), qconfig_A8uW8s),
-        CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8uW8u),
-        CadenceAtenQuantizer(LinearPattern(), qconfig_A8uW8u),
-        CadenceAtenQuantizer(MatmulPattern(), qconfig_A8uW8u),
-        CadenceAtenQuantizer(ReluPattern0(), qconfig_A8uW8u),
-        CadenceAtenQuantizer(ReluPattern1(), qconfig_A8uW8u),
+        CadenceAtenQuantizer(AddmmPattern(), qconfig_A8W8),
+        CadenceAtenQuantizer(BmmPattern(), qconfig_A8W8),
+        CadenceAtenQuantizer(Conv1dPattern(), qconfig_A8W8sym),
+        CadenceAtenQuantizer(Conv2dPattern(), qconfig_A8W8sym),
+        CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8W8),
+        CadenceAtenQuantizer(LinearPattern(), qconfig_A8W8),
+        CadenceAtenQuantizer(MatmulPattern(), qconfig_A8W8),
+        CadenceAtenQuantizer(ReluPattern0(), qconfig_A8W8),
+        CadenceAtenQuantizer(ReluPattern1(), qconfig_A8W8),
     ]
 
 
@@ -244,6 +244,6 @@ class CadenceWakeWordQuantizer(CadenceQuantizer):
     def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None:
         if quantizers is None:
             quantizers = get_cadence_default_quantizers()
-        quantizers.append(CadenceAtenQuantizer(AddPattern(), qconfig_A8uW8u))
-        quantizers.append(CadenceAtenQuantizer(CatPattern(), qconfig_A8uW8u))
+        quantizers.append(CadenceAtenQuantizer(AddPattern(), qconfig_A8W8))
+        quantizers.append(CadenceAtenQuantizer(CatPattern(), qconfig_A8W8))
         super().__init__(quantizers)
@@ -8,6 +8,8 @@
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx.experimental.proxy_tensor import make_fx
 
+from .utils import copy_nn_module_stack
+
 
 class DecomposeEinsum(ExportPass):
     """
@@ -36,6 +38,7 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
                         remap[f"arg1_{i+1}"] = arg
 
                     for decomposed_node in decomposed_module.graph.nodes:
+                        copy_nn_module_stack(node, decomposed_node)
                         # This is the arg[0] equation string, which is not required anymore after decomposition
                         if "arg0" in decomposed_node.name:
                             continue
 
@@ -8,6 +8,8 @@
 from executorch.exir import to_edge
 from executorch.exir.pass_base import ExportPass, PassResult
 
+from .utils import copy_nn_module_stack
+
 
 class LinalgVectorNorm(torch.nn.Module):
     def __init__(self, exp, dim, keepdim):
@@ -62,6 +64,7 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
                     remap = {"x": node.args[0]}
 
                     for decomposed_node in decomposed_module.graph.nodes:
+                        copy_nn_module_stack(node, decomposed_node)
                         # no need to copy existent 'output'
                         if decomposed_node.op == "output":
                             for user in node.users.copy():
 
@@ -47,6 +47,7 @@ class LayoutTransform(ExportPass):
     layout_agnostic_ops = {
         exir_ops.edge.aten.abs.default,
         exir_ops.edge.aten.add.Tensor,
+        exir_ops.edge.aten.amax.default,
         exir_ops.edge.aten.bitwise_or.Tensor,
         exir_ops.edge.aten.bmm.default,
         exir_ops.edge.aten.bitwise_and.Tensor,