Description
Consider the following code:
#include <immintrin.h>
__attribute__((target("avx512bw")))
static __attribute__((always_inline)) __m512i MM512_MASK_ADD_EPI8(__m512i src,
__mmask64 k,
__m512i a,
__m512i b) {
__asm__("vpaddb\t{%3, %2, %0 %{%1%}" : "+v"(src) : "Yk"(k), "v"(a), "v"(b));
return src;
}
__attribute__((target("avx512bw")))
__m512i F(__m512i src, __mmask64 k, __m512i a, __m512i b) {
return MM512_MASK_ADD_EPI8(src, k, a, b);
}
__attribute__((target("avx512bw,avx512dq")))
__m512i G(__m512i src, __mmask64 k, __m512i a, __m512i b) {
return MM512_MASK_ADD_EPI8(src, k, a, b);
}
__attribute__((target("avx512bw,avx512vl")))
__m512i H(__m512i src, __mmask64 k, __m512i a, __m512i b) {
return MM512_MASK_ADD_EPI8(src, k, a, b);
}
When compiling with previous versions of clang (up to and including version 16), MM512_MASK_ADD_EPI8
is inlined into F
, G
, and H
(as expected) . Testing with LLVM 17 RC yields a different result: only F
allows inlining and horrible code is generated for G
and H
. I believe this regression is caused by d6f994a.
Please revert that change or apply the appropriate fix to X86TargetTransformInfo.cpp
for the LLVM 17 release.
Metadata
Metadata
Assignees
Type
Projects
Status
Done