Skip to content

[AArch64][SME2] Add __arm_streaming_compatible attribute to CLAMP bui… #76712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -2052,8 +2052,8 @@ def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone
}

let TargetGuard = "sve2p1|sme" in {
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [IsStreamingCompatible], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [IsStreamingCompatible], []>;

defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">;
}
Expand Down
14 changes: 10 additions & 4 deletions clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve \
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -DTEST_SME2 \
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

#include <arm_sve.h>
Expand All @@ -22,6 +22,12 @@
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
#endif

#ifndef TEST_SME2
#define ATTR
#else
#define ATTR __arm_streaming_compatible
#endif

// CHECK-LABEL: @test_svclamp_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fclamp.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
Expand All @@ -32,7 +38,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fclamp.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) {
svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _f16, , )(op1, op2, op3);
}

Expand All @@ -46,7 +52,7 @@ svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fclamp.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) {
svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _f32, , )(op1, op2, op3);
}

Expand All @@ -60,7 +66,7 @@ svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fclamp.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svclamp_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) {
svfloat64_t test_svclamp_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _f64, , )(op1, op2, op3);
}

17 changes: 12 additions & 5 deletions clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 \
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

#include <arm_sve.h>

Expand All @@ -20,6 +22,12 @@
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
#endif

#ifndef TEST_SME2
#define ATTR
#else
#define ATTR __arm_streaming_compatible
#endif

// CHECK-LABEL: @test_svclamp_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
Expand All @@ -30,7 +38,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) {
svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _s8, , )(op1, op2, op3);
}

Expand All @@ -44,7 +52,7 @@ svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) {
svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _s16, , )(op1, op2, op3);
}

Expand All @@ -58,7 +66,7 @@ svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) {
svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _s32, , )(op1, op2, op3);
}

Expand All @@ -72,7 +80,6 @@ svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svclamp_s64(svint64_t op1, svint64_t op2, svint64_t op3) {
svint64_t test_svclamp_s64(svint64_t op1, svint64_t op2, svint64_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _s64, , )(op1, op2, op3);
}

17 changes: 12 additions & 5 deletions clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -DTEST_SME2 \
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

#include <arm_sve.h>

Expand All @@ -20,6 +22,12 @@
#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
#endif

#ifndef TEST_SME2
#define ATTR
#else
#define ATTR __arm_streaming_compatible
#endif

// CHECK-LABEL: @test_svclamp_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
Expand All @@ -30,7 +38,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) {
svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _u8, , )(op1, op2, op3);
}

Expand All @@ -44,7 +52,7 @@ svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) {
svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _u16, , )(op1, op2, op3);
}

Expand All @@ -58,7 +66,7 @@ svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) {
svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _u32, , )(op1, op2, op3);
}

Expand All @@ -72,7 +80,6 @@ svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svclamp_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3) {
svuint64_t test_svclamp_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3) ATTR {
return SVE_ACLE_FUNC(svclamp, _u64, , )(op1, op2, op3);
}