Skip to content

Commit

Permalink
Add Intel APX and AVX10 target flags and LLVM attribute setting. (#8052)
Browse files Browse the repository at this point in the history
* Add target flag and LLVM enables support for Intel AVX10.

* Go ahead and add APX support as well.

Correct spelling of APX target attributes.

* Implement AVX10 and APX cpu feature detection. (As yet untested.)

* Expand target feature flags for AVX10.

---------

Co-authored-by: Steven Johnson <srj@google.com>
  • Loading branch information
Zalman Stern and steven-johnson authored Feb 23, 2024
1 parent 57164df commit 4399ed8
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 7 deletions.
2 changes: 2 additions & 0 deletions python_bindings/src/halide/halide_/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ void define_enums(py::module &m) {
.value("VulkanV12", Target::VulkanV12)
.value("VulkanV13", Target::VulkanV13)
.value("Semihosting", Target::Feature::Semihosting)
.value("AVX10_1", Target::Feature::AVX10_1)
.value("X86APX", Target::Feature::X86APX)
.value("FeatureEnd", Target::Feature::FeatureEnd);

py::enum_<halide_type_code_t>(m, "TypeCode")
Expand Down
43 changes: 38 additions & 5 deletions src/CodeGen_X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ namespace {
// existing flags, so that instruction patterns can just check for the
// oldest feature flag that supports an instruction.
Target complete_x86_target(Target t) {
if (t.has_feature(Target::AVX10_1)) {
if (t.vector_bits >= 256) {
t.set_feature(Target::AVX2);
}
if (t.vector_bits >= 512) {
t.set_feature(Target::AVX512_SapphireRapids);
}
}
if (t.has_feature(Target::AVX512_SapphireRapids)) {
t.set_feature(Target::AVX512_Zen4);
}
Expand All @@ -54,6 +62,7 @@ Target complete_x86_target(Target t) {
if (t.has_feature(Target::AVX)) {
t.set_feature(Target::SSE41);
}

return t;
}

Expand Down Expand Up @@ -1035,9 +1044,31 @@ string CodeGen_X86::mattrs() const {
}
#if LLVM_VERSION >= 180
if (gather_might_be_slow(target)) {
attrs.push_back("+prefer-no-gather");
attrs.emplace_back("+prefer-no-gather");
}
#endif

if (target.has_feature(Target::AVX10_1)) {
switch (target.vector_bits) {
case 256:
attrs.emplace_back("+avx10.1-256");
break;
case 512:
attrs.emplace_back("+avx10.1-512");
break;
default:
user_error << "AVX10 only supports 256 or 512 bit variants at present.\n";
break;
}
}

if (target.has_feature(Target::X86APX)) {
attrs.emplace_back("+egpr");
attrs.emplace_back("+push2pop2");
attrs.emplace_back("+ppx");
attrs.emplace_back("+ndd");
}

return join_strings(attrs, ",");
}

Expand All @@ -1046,10 +1077,12 @@ bool CodeGen_X86::use_soft_float_abi() const {
}

int CodeGen_X86::native_vector_bits() const {
if (target.has_feature(Target::AVX512) ||
target.has_feature(Target::AVX512_Skylake) ||
target.has_feature(Target::AVX512_KNL) ||
target.has_feature(Target::AVX512_Cannonlake)) {
if (target.has_feature(Target::AVX10_1)) {
return target.vector_bits;
} else if (target.has_feature(Target::AVX512) ||
target.has_feature(Target::AVX512_Skylake) ||
target.has_feature(Target::AVX512_KNL) ||
target.has_feature(Target::AVX512_Cannonlake)) {
return 512;
} else if (target.has_feature(Target::AVX) ||
target.has_feature(Target::AVX2)) {
Expand Down
39 changes: 37 additions & 2 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ Target calculate_host_target() {
// Call cpuid with eax=7, ecx=0
int info2[4];
cpuid(info2, 7, 0);
int info3[4];
cpuid(info3, 7, 1);
const uint32_t avx2 = 1U << 5;
const uint32_t avx512f = 1U << 16;
const uint32_t avx512dq = 1U << 17;
Expand Down Expand Up @@ -283,16 +285,47 @@ Target calculate_host_target() {

const uint32_t avxvnni = 1U << 4; // avxvnni (note, not avx512vnni) result in eax
const uint32_t avx512bf16 = 1U << 5; // bf16 result in eax, with cpuid(eax=7, ecx=1)
int info3[4];
cpuid(info3, 7, 1);
// TODO: port to family/model -based detection.
if ((info3[0] & avxvnni) == avxvnni &&
(info3[0] & avx512bf16) == avx512bf16) {
initial_features.push_back(Target::AVX512_SapphireRapids);
}
}
}

// AVX10 converged vector instructions.
const uint32_t avx10 = 1U << 19;
if (info2[3] & avx10) {
int info_avx10[4];
cpuid(info_avx10, 0x24, 0x0);

// This checks that the AVX10 version is greater than zero.
// It isn't really needed as for now only one version exists, but
// the docs indicate bits 0:7 of EBX should be >= 0 so...
if ((info[1] & 0xff) >= 1) {
initial_features.push_back(Target::AVX10_1);

const uint32_t avx10_128 = 1U << 16;
const uint32_t avx10_256 = 1U << 17;
const uint32_t avx10_512 = 1U << 18;
// Choose the maximum one that is available.
if (info[1] & avx10_512) {
vector_bits = 512;
} else if (info[1] & avx10_256) {
vector_bits = 256;
} else if (info[1] & avx10_128) { // Not clear it is worth turning on AVX10 for this case.
vector_bits = 128;
}
}
}

// APX register extensions, etc.
const uint32_t apx = 1U << 21;
if (info3[3] & apx) {
initial_features.push_back(Target::X86APX);
}
}

#endif
#endif
#endif
Expand Down Expand Up @@ -556,6 +589,8 @@ const std::map<std::string, Target::Feature> feature_name_map = {
{"vk_v12", Target::VulkanV12},
{"vk_v13", Target::VulkanV13},
{"semihosting", Target::Semihosting},
{"avx10_1", Target::AVX10_1},
{"x86apx", Target::X86APX},
// NOTE: When adding features to this map, be sure to update PyEnums.cpp as well.
};

Expand Down
2 changes: 2 additions & 0 deletions src/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ struct Target {
VulkanV12 = halide_target_feature_vulkan_version12,
VulkanV13 = halide_target_feature_vulkan_version13,
Semihosting = halide_target_feature_semihosting,
AVX10_1 = halide_target_feature_avx10_1,
X86APX = halide_target_feature_x86_apx,
FeatureEnd = halide_target_feature_end
};
Target() = default;
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1425,6 +1425,8 @@ typedef enum halide_target_feature_t {
halide_target_feature_vulkan_version12, ///< Enable Vulkan v1.2 runtime target support.
halide_target_feature_vulkan_version13, ///< Enable Vulkan v1.3 runtime target support.
halide_target_feature_semihosting, ///< Used together with Target::NoOS for the baremetal target built with semihosting library and run with semihosting mode where minimum I/O communication with a host PC is available.
halide_target_feature_avx10_1, ///< Intel AVX10 version 1 support. vector_bits is used to indicate width.
halide_target_feature_x86_apx, ///< Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,ppx,ndd .
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
} halide_target_feature_t;

Expand Down
2 changes: 2 additions & 0 deletions test/correctness/simd_op_check_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,5 +673,7 @@ int main(int argc, char **argv) {
Target("x86-64-linux-sse41-avx-f16c-fma-avx2-avx512-avx512_skylake-avx512_cannonlake"),
Target("x86-64-linux-sse41-avx-f16c-fma-avx2-avx512-avx512_skylake-avx512_cannonlake-avx512_zen4"),
Target("x86-64-linux-sse41-avx-f16c-fma-avx2-avx512-avx512_skylake-avx512_cannonlake-avx512_zen4-avx512_sapphirerapids"),
// Can be enabled when AVX10 and APX support are stable in LLVM.
// Target("x86-64-linux-avx10_1-vector_bits_256-x86apx"),
});
}

0 comments on commit 4399ed8

Please sign in to comment.