Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Target] Automatically detect system triple when not specified by the user #16513

Merged
merged 6 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions python/tvm/relay/op/strategy/arm_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
pt, pl, pb, pr = topi.nn.get_pad_tuple(padding, (kh, kw))
is_winograd_applicable = (
"float" in data.dtype
and "custom" not in data.dtype
and "float" in kernel.dtype
and "custom" not in kernel.dtype
and kh == 3
and kw == 3
and stride_h == 1
Expand Down Expand Up @@ -315,8 +317,20 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
name="depthwise_conv2d_nchw.x86",
)
elif layout == "NHWC":
assert kernel_layout == "HWOI"
if target.features.is_aarch64 and target.features.has_asimd:
if kernel_layout != "HWOI":
logger.warning(
"""
depthwise_conv2d with layout NHWC and HWOI
kernel layout is not optimized for arm_cpu target.
"""
)
strategy.add_implementation(
wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc, need_kernel_layout=True),
wrap_topi_schedule(conv2d_generic.schedule_depthwise_conv2d_nhwc),
name="depthwise_conv2d_nhwc.generic",
)

elif target.features.is_aarch64 and target.features.has_asimd:
strategy.add_implementation(
wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
Expand Down
4 changes: 2 additions & 2 deletions python/tvm/topi/arm_cpu/injective.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# under the License.
# pylint: disable=invalid-name, unused-variable
"""Schedule for pooling operators"""
import numpy as np
import tvm
from tvm import te
from ..utils import is_empty_shape
Expand Down Expand Up @@ -69,7 +68,8 @@ def schedule_injective(outs):
if list(s[x].op.axis):
# do not vectorize for broadcast
dtype = "uint16" if x.dtype == "bfloat16" else x.dtype
(io, ii) = s[x].split(list(s[x].op.axis)[-1], 16 // np.dtype(dtype).itemsize)
itemsize = max(1, tvm.DataType(dtype).bits // 8)
(io, ii) = s[x].split(list(s[x].op.axis)[-1], 16 // itemsize)
s[x].vectorize(ii)
tvm.te.schedule.AutoInlineInjective(s)

Expand Down
18 changes: 18 additions & 0 deletions src/target/parsers/cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,25 @@ namespace target {
namespace parsers {
namespace cpu {

Optional<String> DetectSystemTriple() {
auto pf = tvm::runtime::Registry::Get("target.llvm_get_system_triple");
if (pf->defined()) {
return (*pf)();
}
return {};
}

TargetJSON ParseTarget(TargetJSON target) {
String kind = Downcast<String>(target.Get("kind"));
Optional<String> mtriple = Downcast<Optional<String>>(target.Get("mtriple"));
Optional<String> mcpu = Downcast<Optional<String>>(target.Get("mcpu"));

// Try to fill in the blanks by detecting target information from the system
if (kind == "llvm" && !mtriple.defined() && !mcpu.defined()) {
String system_triple = DetectSystemTriple().value_or("");
target.Set("mtriple", system_triple);
}
lhutton1 marked this conversation as resolved.
Show resolved Hide resolved

if (mprofile::IsArch(target)) {
return mprofile::ParseTarget(target);
}
Expand Down
17 changes: 16 additions & 1 deletion tests/cpp/target_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -494,10 +494,25 @@ TEST(TargetCreation, DeduplicateKeys) {
ICHECK_EQ(target->keys.size(), 2U);
ICHECK_EQ(target->keys[0], "cpu");
ICHECK_EQ(target->keys[1], "arm_cpu");
ICHECK_EQ(target->attrs.size(), 1U);
ICHECK_EQ(target->attrs.size(), 2U);
ICHECK_EQ(target->GetAttr<String>("device"), "arm_cpu");
}

TEST(TargetCreation, DetectSystemTriple) {
Map<String, ObjectRef> config = {
{"kind", String("llvm")},
};

Target target = Target(config);
ICHECK_EQ(target->kind, TargetKind::Get("llvm").value());

Optional<String> mtriple = target->GetAttr<String>("mtriple");
auto pf = tvm::runtime::Registry::Get("target.llvm_get_system_triple");
if (!pf->defined()) {
GTEST_SKIP() << "LLVM is not available, skipping test";
}
}

TEST(TargetKindRegistry, ListTargetKinds) {
Array<String> names = TargetKindRegEntry::ListTargetKinds();
ICHECK_EQ(names.empty(), false);
Expand Down
19 changes: 14 additions & 5 deletions tests/python/auto_scheduler/test_auto_scheduler_search_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,11 @@ def test_search_task_record():
assert new_task.task_input_names[1] == "test_input_1"

# Log with version 0.5
v5_log = """["[\\\"matmul_auto_scheduler_test\\\", 64, 64, 64]", "llvm -keys=cpu", [6, 64, 64, 0, 0, 0, 0, 0], "", 1]"""
v5_log = (
"""["[\\\"matmul_auto_scheduler_test\\\", 64, 64, 64]", """
f'"{str(tvm.target.Target(target))}"'
""", [6, 64, 64, 0, 0, 0, 0, 0], "", 1]"""
)
new_task = auto_scheduler._ffi_api.DeserializeSearchTask(v5_log)
assert task.workload_key == new_task.workload_key
assert str(task.target) == str(new_task.target)
Expand All @@ -125,12 +129,13 @@ def test_search_task_record():

def test_recover_measure_input_with_task_input():
auto_scheduler.search_task.TASK_INPUT_BUFFER_TABLE.clear()
target = "llvm"

# Since this file is tests for search_task, we only check the search_task here

# Log with no task input
task = auto_scheduler.SearchTask(
func=matmul_auto_scheduler_test, args=(512, 512, 512), target="llvm"
func=matmul_auto_scheduler_test, args=(512, 512, 512), target=target
)
inp = auto_scheduler.measure.MeasureInput(task, task.compute_dag.init_state)
res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1)
Expand All @@ -147,7 +152,7 @@ def test_recover_measure_input_with_task_input():
task = auto_scheduler.SearchTask(
func=matmul_auto_scheduler_test,
args=(512, 512, 512),
target="llvm",
target=target,
task_inputs={
"test_input_0": test_input_0,
},
Expand All @@ -170,7 +175,7 @@ def test_recover_measure_input_with_task_input():
task = auto_scheduler.SearchTask(
func=matmul_auto_scheduler_test,
args=(512, 512, 512),
target="llvm",
target=target,
task_inputs={
"test_input_0": test_input_0,
"test_input_1": test_input_1,
Expand All @@ -191,7 +196,11 @@ def test_recover_measure_input_with_task_input():
assert new_task.task_input_names[1] == "test_input_1"

# Log with version 0.5
v5_log = """{"i": [["[\\\"matmul_auto_scheduler_test\\\", 512, 512, 512]", "llvm -keys=cpu", [6, 64, 64, 0, 0, 0, 0, 0], "", 1], [[], []]], "r": [[0.1], 0, 0.2, 1], "v": "v0.6"}"""
v5_log = (
"""{"i": [["[\\\"matmul_auto_scheduler_test\\\", 512, 512, 512]", """
f'"{str(tvm.target.Target(target))}"'
""", [6, 64, 64, 0, 0, 0, 0, 0], "", 1], [[], []]], "r": [[0.1], 0, 0.2, 1], "v": "v0.6"}"""
)
measure_log = auto_scheduler.measure_record.load_record_from_string(v5_log)
new_task = measure_log[0].task
assert task.workload_key == new_task.workload_key
Expand Down
7 changes: 7 additions & 0 deletions tests/python/autotvm/test_autotvm_graph_tuner_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def _create_data(target, dshape, dtype, layout):
return net, records, ltf_records, ltf_keys, tasks


@tvm.testing.requires_x86
def test_graph_tuner_layout_transform():
log_file = "%s/test_tuner.log" % (os.getcwd())
target = "llvm"
Expand Down Expand Up @@ -188,6 +189,7 @@ def test_graph_tuner_layout_transform():
)


@tvm.testing.requires_x86
def test_graph_tuner_layout_transform_runner():
log_file = "%s/test_tuner.log" % (os.getcwd())
target = "llvm"
Expand Down Expand Up @@ -231,6 +233,7 @@ def test_graph_tuner_layout_transform_runner():
)


@tvm.testing.requires_x86
def test_DPTuner_run():
log_file = "%s/test_tuner.log" % (os.getcwd())
target = "llvm"
Expand Down Expand Up @@ -295,6 +298,7 @@ def test_DPTuner_run():
assert os.path.isfile(log_file), "No log file with name %s exists." % log_file


@tvm.testing.requires_x86
def test_PBQPTuner_run():
target = "llvm"
dtype = "float32"
Expand Down Expand Up @@ -355,6 +359,7 @@ def test_PBQPTuner_run():
)


@tvm.testing.requires_x86
def test_many_sub_graphs():
target = "llvm"
dtype = "float32"
Expand Down Expand Up @@ -517,6 +522,7 @@ def test_many_sub_graphs():
)


@tvm.testing.requires_x86
def test_tuple():
target = "llvm"
dtype = "float32"
Expand Down Expand Up @@ -629,6 +635,7 @@ def test_tuple():
)


@tvm.testing.requires_x86
def test_triangle_block():
target = "llvm"
dtype = "float32"
Expand Down
59 changes: 47 additions & 12 deletions tests/python/frontend/tflite/test_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from __future__ import print_function
from functools import partial
from distutils.version import LooseVersion

import platform
import os
import tempfile
import typing
Expand Down Expand Up @@ -1092,35 +1092,56 @@ def test_forward_quantized_convolution():
)

_test_tflite2_quantized_convolution(
(1, 16, 10, 10),
(3, 3),
2,
(2, 32, 28, 28),
(1, 1),
16,
data_format="NCWH",
int_quant_dtype=int_quant_dtype,
groups=2,
groups=8,
)

if platform.machine() == "aarch64":
pytest.skip(
reason=(
"Grouped convolution type inference error for `arm_cpu`. "
"See https://github.com/apache/tvm/issues/16532"
)
)

_test_tflite2_quantized_convolution(
(2, 32, 28, 28),
(1, 1),
16,
(1, 16, 10, 10),
(3, 3),
2,
data_format="NCWH",
int_quant_dtype=int_quant_dtype,
groups=8,
groups=2,
)


def test_forward_quantized_depthwise_convolution():
"""Test qnn.conv2d depthwise compiled with TVM against TFLite reference."""
for int_quant_dtype in [tf.int8, tf.int16]:
_test_tflite2_quantized_depthwise_convolution(
[1, 8, 8, 128], [1, 1, 128, 1], [1, 1], [1, 1], "SAME", "NHWC", 1, int_quant_dtype
)
_test_tflite2_quantized_depthwise_convolution(
[1, 17, 17, 12], [3, 3, 12, 1], [1, 1], [2, 2], "VALID", "NHWC", 1, int_quant_dtype
)
_test_tflite2_quantized_depthwise_convolution(
[1, 24, 24, 3], [7, 7, 3, 8], [1, 1], [2, 2], "SAME", "NHWC", 8, int_quant_dtype
)
_test_tflite2_quantized_depthwise_convolution(
[1, 8, 8, 128], [1, 1, 128, 1], [1, 1], [1, 1], "SAME", "NHWC", 1, tf.int8
)

if platform.machine() == "aarch64":
pytest.skip(
reason=(
"Tensor intrinsic data type mismatch error. "
"See https://github.com/apache/tvm/issues/16533"
)
)

_test_tflite2_quantized_depthwise_convolution(
[1, 8, 8, 128], [1, 1, 128, 1], [1, 1], [1, 1], "SAME", "NHWC", 1, tf.int16
)


def _test_tflite2_quantized_depthwise_convolution(
Expand Down Expand Up @@ -5090,6 +5111,10 @@ def test_forward_qnn_mobilenet_v3_net():
tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)


@pytest.mark.skipif(
platform.machine() == "aarch64",
reason="Fails with an output mismatch. See https://github.com/apache/tvm/issues/16534",
)
def test_forward_tflite2_qnn_resnet50():
"""Test the Quantized TFLite version 2.1.0 Resnet50 model."""
if package_version.parse(tf.VERSION) >= package_version.parse("2.1.0"):
Expand Down Expand Up @@ -5186,6 +5211,11 @@ def test_forward_tflite_float16():
tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)


@pytest.mark.skipif(
platform.machine() == "aarch64",
reason="Fails during leagalization due to int16 datatype. "
"See https://github.com/apache/tvm/issues/16535",
)
def test_forward_mobilenet_int16():
"""Test int16 quantized model"""
# MobilenetV2
Expand Down Expand Up @@ -5228,6 +5258,11 @@ def representative_dataset():
tvm.testing.assert_allclose(tvm_sorted_labels, tflite_sorted_labels)


@pytest.mark.skipif(
platform.machine() == "aarch64",
reason="Fails during leagalization due to int16 datatype. "
"See https://github.com/apache/tvm/issues/16535",
)
def test_forward_ds_cnn_int16():
"""Test DS_CNN int16 quantized model"""
tflite_model_file = download_testdata(
Expand Down
2 changes: 1 addition & 1 deletion tests/python/integration/test_legacy_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def @main(%a : Tensor[(1, 3, 32, 32), float32], %b : Tensor[(3, 3, 5, 5), float3
tasks = autotvm.task.relay_integration.extract_from_program(
ir_mod, {}, tvm.target.create("llvm")
)
assert len(tasks) == 1, f"Extracted != 1 task from program: {tasks!r}"
assert len(tasks) >= 1, f"Extracted no tasks from program: {tasks!r}"

task = tasks[0]

Expand Down
Loading
Loading