Skip to content

Commit f6e2887

Browse files
committed
Update base for Update on "Dtype selective build: fail if not xplat, if portable/optimized not in kernel_deps"
#10985 Try to make user error harder for dtype selective build. Emit warning for now, as too many failures when set to failure :( For example: ``` buck2 build //xplat/sgr/resources/tests/handwriting:pkg buck2 build fbsource//xplat/sgr/resources/mwa:main_pkg_libAndroid Differential Revision: [D75027794](https://our.internmc.facebook.com/intern/diff/D75027794/) [ghstack-poisoned]
2 parents bbf5ae5 + 7d194cf commit f6e2887

File tree

143 files changed

+965
-711
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+965
-711
lines changed

.github/workflows/_link_check.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ jobs:
1818
timeout: 120
1919
script: |
2020
./scripts/lint_urls.sh $(
21-
{ [ "${{ github.event_name }}" = "pull_request" ] \
22-
&& git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
23-
|| \
24-
{ [ "${{ github.event_name }}" = "push" ] \
25-
&& git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
21+
if [ "${{ github.event_name }}" = "pull_request" ]; then
22+
echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}"
23+
else
24+
echo "${{ github.event.before }}" "${{ github.sha }}"
25+
fi
2626
) || {
2727
echo
2828
echo "URL lint failed."
@@ -43,11 +43,11 @@ jobs:
4343
timeout: 60
4444
script: |
4545
./scripts/lint_xrefs.sh $(
46-
{ [ "${{ github.event_name }}" = "pull_request" ] \
47-
&& git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
48-
|| \
49-
{ [ "${{ github.event_name }}" = "push" ] \
50-
&& git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
46+
if [ "${{ github.event_name }}" = "pull_request" ]; then
47+
echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}"
48+
else
49+
echo "${{ github.event.before }}" "${{ github.sha }}"
50+
fi
5151
) || {
5252
echo
5353
echo "Xref lint failed."

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ jobs:
367367
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
368368
369369
mkdir -p aar-out
370-
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash scripts/build_android_library.sh
370+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
371371
mkdir -p extension/benchmark/android/benchmark/app/libs
372372
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
373373
pushd extension/benchmark/android/benchmark

.github/workflows/pull.yml

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -343,34 +343,6 @@ jobs:
343343
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
344344
PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
345345
346-
test-pybind-build-linux:
347-
name: test-pybind-build-linux
348-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
349-
permissions:
350-
id-token: write
351-
contents: read
352-
strategy:
353-
fail-fast: false
354-
with:
355-
runner: linux.2xlarge
356-
docker-image: executorch-ubuntu-22.04-clang12
357-
submodules: 'recursive'
358-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
359-
timeout: 90
360-
script: |
361-
# The generic Linux job chooses to use base env, not the one setup by the image
362-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
363-
conda activate "${CONDA_ENV}"
364-
365-
# build module for executorch.extension.pybindings.portable_lib
366-
BUILD_TOOL="cmake"
367-
PYTHON_EXECUTABLE=python \
368-
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON" \
369-
bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
370-
371-
# see if we can import the module successfully
372-
python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
373-
374346
test-binary-size-linux-gcc:
375347
name: test-binary-size-linux-gcc
376348
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

.github/workflows/trunk.yml

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -289,30 +289,6 @@ jobs:
289289
# Build and test coreml delegate
290290
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
291291
292-
test-pybind-build-macos:
293-
name: test-pybind-build-macos
294-
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
295-
strategy:
296-
matrix:
297-
include:
298-
- build-tool: cmake
299-
fail-fast: false
300-
with:
301-
runner: macos-m1-stable
302-
python-version: '3.11'
303-
submodules: 'recursive'
304-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
305-
timeout: 180
306-
script: |
307-
bash .ci/scripts/setup-conda.sh
308-
309-
# build module for executorch.extension.pybindings.portable_lib
310-
BUILD_TOOL=${{ matrix.build-tool }}
311-
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON" PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
312-
313-
# see if we can import the module successfully
314-
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
315-
316292
test-static-llama-ane:
317293
name: test-static-llama-ane
318294
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

backends/arm/runtime/EthosUBackend.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,6 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
189189
// the end of the execution of the Ethos-U custom delegate
190190
char* ethosu_scratch =
191191
static_cast<char*>(temp_allocator->allocate(handles.scratch_data_size));
192-
extern size_t ethosu_fast_scratch_size;
193-
extern unsigned char* ethosu_fast_scratch;
194192
ET_LOG(
195193
Debug,
196194
"EthosUBackend::execute: Running program data:\n cmd %p %zu\n weight %p %zu\n scratch %p %zu\n fast scratch %p %zu\n",
@@ -200,8 +198,8 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
200198
handles.weight_data_size,
201199
ethosu_scratch,
202200
handles.scratch_data_size,
203-
ethosu_fast_scratch,
204-
ethosu_fast_scratch_size);
201+
nullptr,
202+
0);
205203

206204
// Write argument values (from EValue tensor) into Ethos-U scratch
207205
// TODO(MLETORCH-123): Optimise into direct write from Vela into the SRAM
@@ -311,12 +309,9 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
311309
static_cast<uint64_t>(
312310
reinterpret_cast<uintptr_t>((handles.weight_data))),
313311
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(ethosu_scratch)),
314-
static_cast<uint64_t>(
315-
reinterpret_cast<uintptr_t>(ethosu_fast_scratch))};
312+
0};
316313
size_t bases_size[ETHOSU_NUM_BASE_ADDRS] = {
317-
handles.weight_data_size,
318-
handles.scratch_data_size,
319-
ethosu_fast_scratch_size};
314+
handles.weight_data_size, handles.scratch_data_size, 0};
320315
int result = 0;
321316
EXECUTORCH_PROF_START(
322317
event_tracer, event_tracer_local_scope, "+EthosUBackend::execute()NPU");

backends/arm/scripts/build_executor_runner.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ then
103103
memory_mode="Shared_Sram"
104104
if [[ ${target} =~ "ethos-u85" ]]
105105
then
106-
memory_mode="Dedicated_Sram_384KB"
106+
memory_mode="Sram_Only"
107107
fi
108108
fi
109109

backends/arm/test/test_arm_baremetal.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ test_pytest_models() { # Test ops and other things
8787
backends/arm/scripts/build_executorch.sh
8888

8989
# Run arm baremetal pytest tests without FVP
90-
pytest --verbose --color=yes --numprocesses=auto backends/arm/test/models
90+
pytest --verbose --color=yes backends/arm/test/models
9191
echo "${TEST_SUITE_NAME}: PASS"
9292
}
9393

@@ -122,7 +122,7 @@ test_pytest_models_ethosu_fvp() { # Same as test_pytest but also sometime verify
122122
backends/arm/test/setup_testing.sh
123123

124124
# Run arm baremetal pytest tests with FVP
125-
pytest --verbose --color=yes --numprocesses=auto backends/arm/test/models --arm_run_corstoneFVP
125+
pytest --verbose --color=yes backends/arm/test/models --arm_run_corstoneFVP
126126
echo "${TEST_SUITE_NAME}: PASS"
127127
}
128128

@@ -210,7 +210,10 @@ test_models_ethos-u85() { # End to End model tests using model_test.py
210210
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
211211
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
212212
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
213-
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
213+
# Temporarily not test inception_v4 on Ethos-U85. To support inception_v4 properly on Ethos-U85, we need to run the model in Dedicated_Sram memory mode with
214+
# 384KB(or another amount lower than 2MB) of SRAM passed as fast scratch area. The PR adding support for Dedicated_Sram(https://github.com/pytorch/executorch/pull/10714)
215+
# was reverted due to a change required in an internal variant of the examples/arm/executor_runner/arm_executor_runner.cpp
216+
# python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
214217

215218
echo "${TEST_SUITE_NAME}: PASS"
216219
}

backends/arm/test/test_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def get_args():
8181
if "u55" in args.target:
8282
args.memory_mode = "Shared_Sram"
8383
elif "u85" in args.target:
84-
args.memory_mode = "Dedicated_Sram_384KB"
84+
args.memory_mode = "Sram_Only"
8585
else:
8686
raise RuntimeError(f"Invalid target name {args.target}")
8787

backends/cadence/aot/replace_ops.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -283,31 +283,6 @@ def call_operator(self, op, args, kwargs, meta):
283283
return super().call_operator(op, args, kwargs, meta)
284284

285285

286-
@register_cadence_pass(CadencePassAttribute(opt_level=0))
287-
class ReplaceTCopyWithTransposePass(ExportPass):
288-
"""
289-
Replace t_copy with transpose_copy.int. If the input is 1D, the t_copy is
290-
a nop. t_copy is not supported, so this is an opt_level=0 pass.
291-
"""
292-
293-
def call_operator(self, op, args, kwargs, meta):
294-
if get_edge_overload_packet(op) != exir_ops.edge.aten.t_copy:
295-
return super().call_operator(op, args, kwargs, meta)
296-
297-
# Get the input tensor shape
298-
in_tensor = args[0].to_tensor() if isinstance(args[0], ProxyValue) else args[0]
299-
300-
# If the input is a 1D tensor, this t_copy is a nop, so return the input
301-
if in_tensor.dim() <= 1:
302-
return args[0]
303-
304-
assert in_tensor.dim() == 2, "t_copy expects a tensor with <= 2 dimensions"
305-
transpose_args = (args[0], 0, 1)
306-
return super().call_operator(
307-
exir_ops.edge.aten.transpose_copy.int, transpose_args, kwargs, meta
308-
)
309-
310-
311286
@register_cadence_pass(CadencePassAttribute(opt_level=0))
312287
class ReplaceMMWithAddMMPass(ExportPass):
313288
"""
@@ -2407,7 +2382,6 @@ class CadenceReplaceOpsInGraph:
24072382
passes = [
24082383
ReplaceEmptyTensorsWithFullPass,
24092384
ReplaceFunctionallyEquivalentOpTargets,
2410-
ReplaceTCopyWithTransposePass,
24112385
ReplacePermuteWithTransposePass,
24122386
ReplaceScalarWithTensorArgPass,
24132387
ReplaceConvolutionOptionalArgsWithConcreteArgsPass,

backends/cadence/aot/tests/test_fusion_ops_passes.py

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,12 @@ def test_no_replace_quant_permute_dequant_with_requantize(self):
295295
args=(permute, 4.5, 6, 0, 127, torch.int8),
296296
)
297297
builder.output(dequant)
298-
graph_module = FuseQuantDequantToRequantizePass(
298+
original_graph = builder.get_graph_module()
299+
converted_graph = FuseQuantDequantToRequantizePass(
299300
force_quant_dequant_fusion=False
300-
)(builder.get_graph_module()).graph_module
301+
)(original_graph).graph_module
301302
self.check_op_counts(
302-
graph_module,
303+
converted_graph,
303304
expected_op_counts={
304305
# Verify that no dequant/quant pair was replaced with requantize.
305306
# quantize -> permute -> dequantize should not be replaced with requantize.
@@ -310,30 +311,28 @@ def test_no_replace_quant_permute_dequant_with_requantize(self):
310311
)
311312

312313
def test_replace_quant_view_dequant_with_requantize(self):
313-
class M(torch.nn.Module):
314-
def __init__(self):
315-
super().__init__()
316-
317-
def forward(self, x):
318-
x = torch.ops.quantized_decomposed.quantize_per_tensor(
319-
x, 1.2, 3, 0, 127, torch.int8
320-
)
321-
x = x.view(-1)
322-
x = torch.ops.quantized_decomposed.dequantize_per_tensor(
323-
x, 4.5, 6, 0, 127, torch.int8
324-
)
325-
return x
326-
327-
inputs = torch.randn(2, 12, 1, 6)
328-
model = M()
329-
graph_module = export_to_edge(model, (inputs,)).exported_program().graph_module
330-
graph_module = FuseQuantDequantToRequantizePass()(graph_module).graph_module
331-
314+
builder = GraphBuilder()
315+
x = builder.placeholder("x", torch.randn(2, 12, 1, 6, dtype=torch.float32))
316+
quant = builder.call_operator(
317+
op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
318+
args=(x, 1.2, 3, 0, 127, torch.int8),
319+
)
320+
view = builder.call_operator(
321+
op=exir_ops.edge.aten.view_copy.default, args=(quant, [-1])
322+
)
323+
dequant = builder.call_operator(
324+
op=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
325+
args=(view, 4.5, 6, 0, 127, torch.int8),
326+
)
327+
builder.output(dequant)
328+
original_graph = builder.get_graph_module()
329+
converted_graph = FuseQuantDequantToRequantizePass()(
330+
original_graph
331+
).graph_module
332332
self.check_op_counts(
333-
graph_module,
333+
converted_graph,
334334
expected_op_counts={
335-
# Verify that no dequant/quant pair was replaced with requantize.
336-
# quantize -> permute -> dequantize should not be replaced with requantize.
335+
# Verify that dequant/quant pair was replaced with requantize.
337336
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
338337
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
339338
exir_ops.edge.cadence.requantize.default: 1,

backends/cadence/aot/tests/test_replace_ops_passes.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
4949
ReplaceSplitWithSlicePass,
5050
ReplaceSqueezeAndUnsqueezeWithViewPass,
51-
ReplaceTCopyWithTransposePass,
5251
ReplaceTransposedConvWithLinearPass,
5352
ReplaceTrivialConvWithLinear,
5453
ReplaceWhereWithFullArgsWithWhereScalar,
@@ -368,37 +367,6 @@ def forward(self, x: torch.Tensor):
368367
0,
369368
)
370369

371-
@parameterized.expand(
372-
[
373-
[(16, 32)],
374-
[(1, 240)],
375-
[(4, 16)],
376-
]
377-
)
378-
@torch.no_grad()
379-
def test_replace_t_copy_with_transpose(self, shape: Tuple[int]):
380-
class TCopy(torch.nn.Module):
381-
def forward(self, x: torch.Tensor):
382-
return exir_ops.edge.aten.t_copy(x)
383-
384-
w = torch.randn(shape)
385-
inputs = (w,)
386-
p1 = ReplaceTCopyWithTransposePass()
387-
p2 = ReplacePermuteWithTransposePass()
388-
model = TCopy()
389-
graph_module = export_to_edge(model, inputs).exported_program().graph_module
390-
graph_after_passes = cast(
391-
PassResult, p2(cast(PassResult, p1(graph_module)).graph_module)
392-
).graph_module
393-
self.assertEqual(
394-
count_node(graph_after_passes, exir_ops.edge.aten.transpose_copy.int),
395-
1,
396-
)
397-
self.assertEqual(
398-
count_node(graph_after_passes, exir_ops.edge.aten.t_copy),
399-
0,
400-
)
401-
402370
@parameterized.expand(
403371
[
404372
[(1, 8, 33), 8, 16, 3],

0 commit comments

Comments
 (0)