Skip to content

Commit

Permalink
[Clang][ARM] Make CRC and DSP intrinsics always available. (llvm#107417)
Browse files Browse the repository at this point in the history
Both feature has target feature so can be checked if the usage is valid.
  • Loading branch information
DanielKristofKiss authored Sep 16, 2024
1 parent e88b7ff commit cf2122c
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 24 deletions.
39 changes: 18 additions & 21 deletions clang/lib/Headers/arm_acle.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,28 +264,28 @@ __rbitl(unsigned long __t) {
}

/* 8.3 16-bit multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smulbb(int32_t __a, int32_t __b) {
return __builtin_arm_smulbb(__a, __b);
}
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smulbt(int32_t __a, int32_t __b) {
return __builtin_arm_smulbt(__a, __b);
}
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smultb(int32_t __a, int32_t __b) {
return __builtin_arm_smultb(__a, __b);
}
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smultt(int32_t __a, int32_t __b) {
return __builtin_arm_smultt(__a, __b);
}
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smulwb(int32_t __a, int32_t __b) {
return __builtin_arm_smulwb(__a, __b);
}
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp")))
__smulwt(int32_t __a, int32_t __b) {
return __builtin_arm_smulwt(__a, __b);
}
Expand All @@ -304,46 +304,46 @@ __smulwt(int32_t __a, int32_t __b) {
#endif

/* 8.4.2 Saturating addition and subtraction intrinsics */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__qadd(int32_t __t, int32_t __v) {
return __builtin_arm_qadd(__t, __v);
}

static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__qsub(int32_t __t, int32_t __v) {
return __builtin_arm_qsub(__t, __v);
}

static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__qdbl(int32_t __t) {
return __builtin_arm_qadd(__t, __t);
}
#endif

/* 8.4.3 Accumulating multiplications */
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlabb(__a, __b, __c);
}
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlabt(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlabt(__a, __b, __c);
}
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlatb(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlatb(__a, __b, __c);
}
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlatt(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlatt(__a, __b, __c);
}
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlawb(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlawb(__a, __b, __c);
}
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp")))
__smlawt(int32_t __a, int32_t __b, int32_t __c) {
return __builtin_arm_smlawt(__a, __b, __c);
}
Expand Down Expand Up @@ -621,8 +621,6 @@ __rintnf(float __a) {
#endif

/* 8.8 CRC32 intrinsics */
#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32b(uint32_t __a, uint8_t __b) {
return __builtin_arm_crc32b(__a, __b);
Expand Down Expand Up @@ -662,7 +660,6 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target
__crc32cd(uint32_t __a, uint64_t __b) {
return __builtin_arm_crc32cd(__a, __b);
}
#endif

/* 8.6 Floating-point data-processing intrinsics */
/* Armv8.3-A Javascript conversion intrinsic */
Expand Down
76 changes: 73 additions & 3 deletions clang/test/CodeGen/arm_acle.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -ffreestanding -triple armv8a-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch32
// RUN: %clang_cc1 -ffreestanding -triple armv8a-none-eabi -target-feature +crc -target-feature +dsp -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch32
// RUN: %clang_cc1 -ffreestanding -Wno-error=implicit-function-declaration -triple aarch64-none-elf -target-feature +neon -target-feature +crc -target-feature +crypto -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch64
// RUN: %clang_cc1 -ffreestanding -triple aarch64-none-elf -target-feature +v8.3a -target-feature +crc -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483
Expand Down Expand Up @@ -638,12 +639,15 @@ uint32_t test_usat(int32_t t) {
#endif

/* 9.4.2 Saturating addition and subtraction intrinsics */
#ifdef __ARM_FEATURE_DSP
#ifdef __ARM_32BIT_STATE
// AArch32-LABEL: @test_qadd(
// AArch32-NEXT: entry:
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qadd(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_qadd(int32_t a, int32_t b) {
return __qadd(a, b);
}
Expand All @@ -653,6 +657,9 @@ int32_t test_qadd(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qsub(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_qsub(int32_t a, int32_t b) {
return __qsub(a, b);
}
Expand All @@ -664,6 +671,9 @@ extern int32_t f();
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qadd(i32 [[CALL]], i32 [[CALL]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_qdbl() {
return __qdbl(f());
}
Expand All @@ -672,12 +682,15 @@ int32_t test_qdbl() {
/*
* 9.3 16-bit multiplications
*/
#if __ARM_FEATURE_DSP
#ifdef __ARM_32BIT_STATE
// AArch32-LABEL: @test_smulbb(
// AArch32-NEXT: entry:
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulbb(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smulbb(int32_t a, int32_t b) {
return __smulbb(a, b);
}
Expand All @@ -687,6 +700,9 @@ int32_t test_smulbb(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulbt(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smulbt(int32_t a, int32_t b) {
return __smulbt(a, b);
}
Expand All @@ -696,6 +712,9 @@ int32_t test_smulbt(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smultb(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smultb(int32_t a, int32_t b) {
return __smultb(a, b);
}
Expand All @@ -705,6 +724,9 @@ int32_t test_smultb(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smultt(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smultt(int32_t a, int32_t b) {
return __smultt(a, b);
}
Expand All @@ -714,6 +736,9 @@ int32_t test_smultt(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulwb(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smulwb(int32_t a, int32_t b) {
return __smulwb(a, b);
}
Expand All @@ -723,18 +748,24 @@ int32_t test_smulwb(int32_t a, int32_t b) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulwt(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smulwt(int32_t a, int32_t b) {
return __smulwt(a, b);
}
#endif

/* 9.4.3 Accumultating multiplications */
#if __ARM_FEATURE_DSP
#ifdef __ARM_32BIT_STATE
// AArch32-LABEL: @test_smlabb(
// AArch32-NEXT: entry:
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlabb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smlabb(int32_t a, int32_t b, int32_t c) {
return __smlabb(a, b, c);
}
Expand All @@ -744,6 +775,9 @@ int32_t test_smlabb(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlabt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smlabt(int32_t a, int32_t b, int32_t c) {
return __smlabt(a, b, c);
}
Expand All @@ -753,6 +787,9 @@ int32_t test_smlabt(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlatb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smlatb(int32_t a, int32_t b, int32_t c) {
return __smlatb(a, b, c);
}
Expand All @@ -762,6 +799,9 @@ int32_t test_smlatb(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlatt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smlatt(int32_t a, int32_t b, int32_t c) {
return __smlatt(a, b, c);
}
Expand All @@ -771,6 +811,9 @@ int32_t test_smlatt(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlawb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smlawb(int32_t a, int32_t b, int32_t c) {
return __smlawb(a, b, c);
}
Expand All @@ -780,6 +823,9 @@ int32_t test_smlawb(int32_t a, int32_t b, int32_t c) {
// AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlawt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]])
// AArch32-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_DSP
__attribute__((target("dsp")))
#endif
int32_t test_smlawt(int32_t a, int32_t b, int32_t c) {
return __smlawt(a, b, c);
}
Expand Down Expand Up @@ -1335,6 +1381,9 @@ int32_t test_smusdx(int16x2_t a, int16x2_t b) {
// AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32b(i32 [[A:%.*]], i32 [[TMP0]])
// AArch64-NEXT: ret i32 [[TMP1]]
//
#ifndef __ARM_FEATURE_CRC32
__attribute__((target("crc")))
#endif
uint32_t test_crc32b(uint32_t a, uint8_t b) {
return __crc32b(a, b);
}
Expand All @@ -1351,6 +1400,9 @@ uint32_t test_crc32b(uint32_t a, uint8_t b) {
// AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32h(i32 [[A:%.*]], i32 [[TMP0]])
// AArch64-NEXT: ret i32 [[TMP1]]
//
#ifndef __ARM_FEATURE_CRC32
__attribute__((target("crc")))
#endif
uint32_t test_crc32h(uint32_t a, uint16_t b) {
return __crc32h(a, b);
}
Expand All @@ -1365,6 +1417,9 @@ uint32_t test_crc32h(uint32_t a, uint16_t b) {
// AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32w(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch64-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_CRC32
__attribute__((target("crc")))
#endif
uint32_t test_crc32w(uint32_t a, uint32_t b) {
return __crc32w(a, b);
}
Expand All @@ -1383,6 +1438,9 @@ uint32_t test_crc32w(uint32_t a, uint32_t b) {
// AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32x(i32 [[A:%.*]], i64 [[B:%.*]])
// AArch64-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_CRC32
__attribute__((target("crc")))
#endif
uint32_t test_crc32d(uint32_t a, uint64_t b) {
return __crc32d(a, b);
}
Expand All @@ -1399,6 +1457,9 @@ uint32_t test_crc32d(uint32_t a, uint64_t b) {
// AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32cb(i32 [[A:%.*]], i32 [[TMP0]])
// AArch64-NEXT: ret i32 [[TMP1]]
//
#ifndef __ARM_FEATURE_CRC32
__attribute__((target("crc")))
#endif
uint32_t test_crc32cb(uint32_t a, uint8_t b) {
return __crc32cb(a, b);
}
Expand All @@ -1415,6 +1476,9 @@ uint32_t test_crc32cb(uint32_t a, uint8_t b) {
// AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32ch(i32 [[A:%.*]], i32 [[TMP0]])
// AArch64-NEXT: ret i32 [[TMP1]]
//
#ifndef __ARM_FEATURE_CRC32
__attribute__((target("crc")))
#endif
uint32_t test_crc32ch(uint32_t a, uint16_t b) {
return __crc32ch(a, b);
}
Expand All @@ -1429,6 +1493,9 @@ uint32_t test_crc32ch(uint32_t a, uint16_t b) {
// AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32cw(i32 [[A:%.*]], i32 [[B:%.*]])
// AArch64-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_CRC32
__attribute__((target("crc")))
#endif
uint32_t test_crc32cw(uint32_t a, uint32_t b) {
return __crc32cw(a, b);
}
Expand All @@ -1447,6 +1514,9 @@ uint32_t test_crc32cw(uint32_t a, uint32_t b) {
// AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32cx(i32 [[A:%.*]], i64 [[B:%.*]])
// AArch64-NEXT: ret i32 [[TMP0]]
//
#ifndef __ARM_FEATURE_CRC32
__attribute__((target("crc")))
#endif
uint32_t test_crc32cd(uint32_t a, uint64_t b) {
return __crc32cd(a, b);
}
Expand Down

0 comments on commit cf2122c

Please sign in to comment.