Skip to content

Commit ecd1cb7

Browse files
committed
Update on "Dtype selective build: check if portable/optimized in deps"
When dtype selective build is enabled: Show a warning if kernel_deps does not contain portable/optimized Error out if deps contains portable/optimized, and it is also in kernel_deps. Differential Revision: [D74922471](https://our.internmc.facebook.com/intern/diff/D74922471/) [ghstack-poisoned]
2 parents afe6a6a + 5e6c581 commit ecd1cb7

File tree

175 files changed

+2298
-1127
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

175 files changed

+2298
-1127
lines changed

.github/workflows/_link_check.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ jobs:
1818
timeout: 120
1919
script: |
2020
./scripts/lint_urls.sh $(
21-
{ [ "${{ github.event_name }}" = "pull_request" ] \
22-
&& git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
23-
|| \
24-
{ [ "${{ github.event_name }}" = "push" ] \
25-
&& git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
21+
if [ "${{ github.event_name }}" = "pull_request" ]; then
22+
echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}"
23+
else
24+
echo "${{ github.event.before }}" "${{ github.sha }}"
25+
fi
2626
) || {
2727
echo
2828
echo "URL lint failed."
@@ -43,11 +43,11 @@ jobs:
4343
timeout: 60
4444
script: |
4545
./scripts/lint_xrefs.sh $(
46-
{ [ "${{ github.event_name }}" = "pull_request" ] \
47-
&& git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
48-
|| \
49-
{ [ "${{ github.event_name }}" = "push" ] \
50-
&& git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
46+
if [ "${{ github.event_name }}" = "pull_request" ]; then
47+
echo "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}"
48+
else
49+
echo "${{ github.event.before }}" "${{ github.sha }}"
50+
fi
5151
) || {
5252
echo
5353
echo "Xref lint failed."

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ jobs:
367367
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
368368
369369
mkdir -p aar-out
370-
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash scripts/build_android_library.sh
370+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
371371
mkdir -p extension/benchmark/android/benchmark/app/libs
372372
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
373373
pushd extension/benchmark/android/benchmark

.github/workflows/pull.yml

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -343,34 +343,6 @@ jobs:
343343
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
344344
PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
345345
346-
test-pybind-build-linux:
347-
name: test-pybind-build-linux
348-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
349-
permissions:
350-
id-token: write
351-
contents: read
352-
strategy:
353-
fail-fast: false
354-
with:
355-
runner: linux.2xlarge
356-
docker-image: executorch-ubuntu-22.04-clang12
357-
submodules: 'recursive'
358-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
359-
timeout: 90
360-
script: |
361-
# The generic Linux job chooses to use base env, not the one setup by the image
362-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
363-
conda activate "${CONDA_ENV}"
364-
365-
# build module for executorch.extension.pybindings.portable_lib
366-
BUILD_TOOL="cmake"
367-
PYTHON_EXECUTABLE=python \
368-
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON" \
369-
bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
370-
371-
# see if we can import the module successfully
372-
python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
373-
374346
test-binary-size-linux-gcc:
375347
name: test-binary-size-linux-gcc
376348
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

.github/workflows/trunk.yml

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -289,30 +289,6 @@ jobs:
289289
# Build and test coreml delegate
290290
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
291291
292-
test-pybind-build-macos:
293-
name: test-pybind-build-macos
294-
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
295-
strategy:
296-
matrix:
297-
include:
298-
- build-tool: cmake
299-
fail-fast: false
300-
with:
301-
runner: macos-m1-stable
302-
python-version: '3.11'
303-
submodules: 'recursive'
304-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
305-
timeout: 180
306-
script: |
307-
bash .ci/scripts/setup-conda.sh
308-
309-
# build module for executorch.extension.pybindings.portable_lib
310-
BUILD_TOOL=${{ matrix.build-tool }}
311-
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON" PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
312-
313-
# see if we can import the module successfully
314-
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
315-
316292
test-static-llama-ane:
317293
name: test-static-llama-ane
318294
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

backends/arm/operators/op_avg_pool2d.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
register_node_visitor,
1818
)
1919
from executorch.backends.arm.operators.operator_validation_utils import (
20+
adjust_pooling_pad_if_needed,
2021
validate_num_inputs,
2122
validate_same_dtype,
2223
)
@@ -63,6 +64,20 @@ def _build_generic_avgpool2d(
6364
except IndexError:
6465
pad_size_list = [0, 0, 0, 0]
6566

67+
# Adjust the padding as necessary
68+
pad_size_list[1] = adjust_pooling_pad_if_needed(
69+
input_tensor.shape[2],
70+
kernel_size_list[0],
71+
stride_size_list[0],
72+
pad_size_list[1],
73+
)
74+
pad_size_list[3] = adjust_pooling_pad_if_needed(
75+
input_tensor.shape[3],
76+
kernel_size_list[1],
77+
stride_size_list[1],
78+
pad_size_list[3],
79+
)
80+
6681
attr = ts.TosaSerializerAttribute()
6782
attr.PoolAttribute(
6883
kernel=kernel_size_list,
@@ -192,6 +207,20 @@ def _build_generic_avgpool2d(
192207
except IndexError:
193208
pad_size_list = [0, 0, 0, 0]
194209

210+
# Adjust the padding as necessary
211+
pad_size_list[1] = adjust_pooling_pad_if_needed(
212+
input_tensor.shape[2],
213+
kernel_size_list[0],
214+
stride_size_list[0],
215+
pad_size_list[1],
216+
)
217+
pad_size_list[3] = adjust_pooling_pad_if_needed(
218+
input_tensor.shape[3],
219+
kernel_size_list[1],
220+
stride_size_list[1],
221+
pad_size_list[3],
222+
)
223+
195224
attr = ts.TosaSerializerAttribute()
196225
attr.AvgPool2dAttribute(
197226
kernel=kernel_size_list,

backends/arm/operators/op_max_pool2d.py

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,31 +17,14 @@
1717
register_node_visitor,
1818
)
1919
from executorch.backends.arm.operators.operator_validation_utils import (
20+
adjust_pooling_pad_if_needed,
2021
validate_num_inputs,
2122
validate_same_dtype,
2223
)
2324
from executorch.backends.arm.tosa_mapping import TosaArg
2425
from executorch.backends.arm.tosa_specification import TosaSpecification
2526

2627

27-
# Similarly to Conv2d, the TOSA spec requires that following is exactly divisible:
28-
# `(input + 2 * pad - kernel_size) / stride`
29-
# PyTorch however, does not require this, so as needed, we must adjust the padding.
30-
def adjust_pad_if_needed(
31-
input_size: int, kernel_size: int, stride: int, pad: int
32-
) -> int:
33-
if pad == 0:
34-
return pad
35-
36-
mod_remainder = (input_size + 2 * pad - kernel_size) % stride
37-
38-
# No need to adjust
39-
if mod_remainder == 0:
40-
return pad
41-
42-
return pad - mod_remainder
43-
44-
4528
@register_node_visitor
4629
class MaxPool2dVisitor_0_80(NodeVisitor):
4730
target = "aten.max_pool2d.default"
@@ -82,13 +65,13 @@ def define_node(
8265
pad_size_list = [0, 0, 0, 0]
8366

8467
# Adjust the padding as necessary
85-
pad_size_list[1] = adjust_pad_if_needed(
68+
pad_size_list[1] = adjust_pooling_pad_if_needed(
8669
input_tensor.shape[2],
8770
kernel_size[0],
8871
stride[0],
8972
pad_size_list[1],
9073
)
91-
pad_size_list[3] = adjust_pad_if_needed(
74+
pad_size_list[3] = adjust_pooling_pad_if_needed(
9275
input_tensor.shape[3],
9376
kernel_size[1],
9477
stride[1],
@@ -167,13 +150,13 @@ def define_node(
167150
pad_size_list = [0, 0, 0, 0]
168151

169152
# Adjust the padding as necessary
170-
pad_size_list[1] = adjust_pad_if_needed(
153+
pad_size_list[1] = adjust_pooling_pad_if_needed(
171154
input_tensor.shape[2],
172155
kernel_size[0],
173156
stride[0],
174157
pad_size_list[1],
175158
)
176-
pad_size_list[3] = adjust_pad_if_needed(
159+
pad_size_list[3] = adjust_pooling_pad_if_needed(
177160
input_tensor.shape[3],
178161
kernel_size[1],
179162
stride[1],

backends/arm/operators/operator_validation_utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,40 @@ def validate_same_dtype(op_name: str, tensors: List[Any]):
9999
f"{op_name}: Expected all tensors to have dtype {reference_dtype}, but "
100100
f"found inconsistent dtype {tensor.dtype}."
101101
)
102+
103+
104+
def adjust_pooling_pad_if_needed(
105+
input_size: int, kernel_size: int, stride: int, pad: int
106+
) -> int:
107+
"""
108+
Calculates the padding that needs to be removed to a pooling window to make it
109+
divisible by the kernels stride. All inputs should correspond to the same dimension.
110+
111+
Parameters:
112+
-----------
113+
input_size : int
114+
The size of the input to the operator.
115+
116+
kernel_size : int
117+
The size of the kernel.
118+
119+
stride : int
120+
The size of the stride.
121+
122+
pad : int
123+
The amount of padding.
124+
125+
Output:
126+
-------
127+
An int, representing the padding to remove to make the window divisible.
128+
"""
129+
if pad == 0:
130+
return pad
131+
132+
mod_remainder = (input_size + 2 * pad - kernel_size) % stride
133+
134+
# No need to adjust
135+
if mod_remainder == 0:
136+
return pad
137+
138+
return pad - mod_remainder

backends/arm/quantizer/quantization_annotator.py

Lines changed: 79 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import torch
1212
import torch.fx
13+
import torch.nn.functional as F
1314
from executorch.backends.arm.quantizer import QuantizationConfig
1415
from executorch.backends.arm.tosa_utils import get_node_debug_info
1516
from torch.ao.quantization.quantizer import QuantizationSpecBase, SharedQuantizationSpec
@@ -142,29 +143,33 @@ def _match_pattern(
142143
143144
Each 'pattern' element is composed of a list of disjunctive nodes types.
144145
"""
145-
assert len(pattern) == 2, "Only two-nodes patterns supported currently"
146-
147-
if node.target in pattern[0]:
148-
assert len(node.users) != 0
149-
parent = node
150-
child = next(iter(node.users))
151-
elif node.target in pattern[1]:
152-
assert len(node.args) != 0
153-
parent = node.args[0] # type: ignore[assignment]
154-
child = node
155-
else:
156-
return False
157-
158-
if len(parent.users) != 1:
159-
return False
160-
161-
if parent.target not in pattern[0] or child.target not in pattern[1]:
162-
return False
163-
146+
assert len(pattern) > 0, "No pattern provided"
164147
if filter_fn is not None:
165-
return filter_fn(parent) and filter_fn(child)
166-
167-
return True
148+
if not filter_fn(node):
149+
return False
150+
if len(pattern) == 1:
151+
# Base case where it has passed the filter_fn. Simply look if node.target is in pattern.
152+
return node.target in pattern[0]
153+
if node.target not in [op for sub_pattern in pattern for op in sub_pattern]:
154+
# node.target not in pattern. No need to look at the rest of the pattern.
155+
return False
156+
# Find the index of this node's target in pattern
157+
idx = [node.target in sub_pattern for sub_pattern in pattern].index(True)
158+
left_pattern = pattern[:idx]
159+
# Exclude idx as this contains node.target which we have already matched
160+
right_pattern = pattern[idx + 1 :]
161+
left_condition = True
162+
right_condition = True
163+
# Recursively look at the rest of the pattern by calling this function for
164+
# node's input and user node with updated patterns.
165+
if len(left_pattern) > 0:
166+
parent = node.all_input_nodes[0]
167+
if len(parent.users) != 1:
168+
return False
169+
left_condition = _match_pattern(parent, left_pattern, filter_fn)
170+
if len(right_pattern) > 0:
171+
right_condition = _match_pattern(list(node.users)[0], right_pattern, filter_fn)
172+
return left_condition and right_condition
168173

169174

170175
_one_to_one = [
@@ -274,6 +279,58 @@ def any_or_hardtanh_min_zero(n: Node):
274279
return n.target != torch.ops.aten.hardtanh.default or n.args[1] == 0
275280

276281
if _match_pattern(
282+
node,
283+
[
284+
[
285+
torch.ops.aten.conv1d.default,
286+
torch.ops.aten.conv2d.default,
287+
torch.ops.aten.conv2d.padding,
288+
],
289+
[torch.ops.aten.batch_norm.default, F.batch_norm],
290+
[torch.ops.aten.relu.default, torch.ops.aten.hardtanh.default],
291+
],
292+
filter_fn=any_or_hardtanh_min_zero,
293+
):
294+
if node.target in (
295+
torch.ops.aten.conv1d.default,
296+
torch.ops.aten.conv2d.default,
297+
torch.ops.aten.conv2d.padding,
298+
):
299+
quant_properties.quant_inputs = [
300+
_QuantProperty(0, input_act_qspec),
301+
_QuantProperty(1, weight_qspec, mark_annotated=True),
302+
_QuantProperty(2, bias_qspec, optional=True, mark_annotated=True),
303+
]
304+
elif node.target in (
305+
torch.ops.aten.relu.default,
306+
torch.ops.aten.hardtanh.default,
307+
):
308+
quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
309+
310+
elif _match_pattern(
311+
node,
312+
[
313+
[
314+
torch.ops.aten.conv1d.default,
315+
torch.ops.aten.conv2d.default,
316+
torch.ops.aten.conv2d.padding,
317+
],
318+
[torch.ops.aten.batch_norm.default, F.batch_norm],
319+
],
320+
):
321+
if node.target in (
322+
torch.ops.aten.conv1d.default,
323+
torch.ops.aten.conv2d.default,
324+
torch.ops.aten.conv2d.padding,
325+
):
326+
quant_properties.quant_inputs = [
327+
_QuantProperty(0, input_act_qspec),
328+
_QuantProperty(1, weight_qspec, mark_annotated=True),
329+
_QuantProperty(2, bias_qspec, optional=True, mark_annotated=True),
330+
]
331+
elif node.target in [torch.ops.aten.batch_norm.default, F.batch_norm]:
332+
quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
333+
elif _match_pattern(
277334
node,
278335
[
279336
[

0 commit comments

Comments
 (0)