-
Notifications
You must be signed in to change notification settings - Fork 14k
[Clang][AMDGPU] Add builtins for instrinsic llvm.amdgcn.raw.ptr.buffer.store
#94576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-modules @llvm/pr-subscribers-clang-codegen Author: Shilei Tian (shiltian) ChangesPatch is 25.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94576.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 433c7795325f0..d6866304f8b1b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -148,6 +148,20 @@ BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_i8, "vcV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_i16, "vsV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_i32, "viV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_f16, "vhV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_f32, "vfV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_v2i16, "vV2sV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_v2i32, "vV2iV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_v2f16, "vV2hV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_v2f32, "vV2fV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_v4i16, "vV4sV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_v4i32, "vV4iV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_v4f16, "vV4hV4iIiIiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_store_v4f32, "vV4fV4iIiIiIi", "n")
+
//===----------------------------------------------------------------------===//
// Ballot builtins.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 37d0c478e0330..706141cbc85ce 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19063,6 +19063,29 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_i8:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_i16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_i32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_f32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_f16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_v2i16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_v2i32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_v2f16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_v2f32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_v4i16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_v4i32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_v4f16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_v4f32: {
+ llvm::Value *VData = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Rsrc = EmitScalarExpr(E->getArg(1));
+ llvm::Value *Offset = EmitScalarExpr(E->getArg(2));
+ llvm::Value *SOffset = EmitScalarExpr(E->getArg(3));
+ llvm::Value *Aux = EmitScalarExpr(E->getArg(4));
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_raw_buffer_store, VData->getType());
+ return Builder.CreateCall(F, {VData, Rsrc, Offset, SOffset, Aux});
+ }
default:
return nullptr;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-store.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-store.cl
new file mode 100644
index 0000000000000..0553c53ab41ad
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-store.cl
@@ -0,0 +1,264 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s --check-prefixes=VERDE
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s --check-prefixes=GFX8
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s --check-prefixes=GFX11
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef short v2i16 __attribute__((ext_vector_type(2)));
+typedef int v2i32 __attribute__((ext_vector_type(2)));
+typedef half v2f16 __attribute__((ext_vector_type(2)));
+typedef float v2f32 __attribute__((ext_vector_type(2)));
+typedef short v4i16 __attribute__((ext_vector_type(4)));
+typedef int v4i32 __attribute__((ext_vector_type(4)));
+typedef half v4f16 __attribute__((ext_vector_type(4)));
+typedef float v4f32 __attribute__((ext_vector_type(4)));
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_i8(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i8(i8 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_i8(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i8(i8 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_i8(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i8(i8 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_i8(char vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_i8(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_i16(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i16(i16 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_i16(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i16(i16 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_i16(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i16(i16 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_i16(short vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_i32(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i32(i32 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_i32(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i32(i32 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_i32(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.i32(i32 [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_i32(int vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_f16(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.f16(half [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_f16(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.f16(half [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_f16(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.f16(half [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_f16(half vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_f32(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.f32(float [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_f32(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.f32(float [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_f32(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.f32(float [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_f32(float vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_v2i16(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_v2i16(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_v2i16(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_v2i16(v2i16 vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_v2i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_v2i32(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_v2i32(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_v2i32(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_v2i32(v2i32 vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_v2i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_v2f16(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_v2f16(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_v2f16(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_v2f16(v2f16 vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_v2f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_v2f32(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_v2f32(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_v2f32(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_v2f32(v2f32 vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_v2f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_v4i16(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_v4i16(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_v4i16(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_v4i16(v4i16 vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_v4i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_v4i32(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_v4i32(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_v4i32(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_v4i32(v4i32 vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_v4i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_v4f16(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_v4f16(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_v4f16(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_v4f16(v4f16 vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_v4f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// VERDE-LABEL: @test_amdgcn_raw_buffer_store_v4f32(
+// VERDE-NEXT: entry:
+// VERDE-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// VERDE-NEXT: ret void
+//
+// GFX8-LABEL: @test_amdgcn_raw_buffer_store_v4f32(
+// GFX8-NEXT: entry:
+// GFX8-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX8-NEXT: ret void
+//
+// GFX11-LABEL: @test_amdgcn_raw_buffer_store_v4f32(
+// GFX11-NEXT: entry:
+// GFX11-NEXT: tail call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> [[VDATA:%.*]], <4 x i32> [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// GFX11-NEXT: ret void
+//
+void test_amdgcn_raw_buffer_store_v4f32(v4f32 vdata, v4i32 rsrc) {
+ __builtin_amdgcn_raw_buffer_store_v4f32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-store-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-store-error.cl
new file mode 100644
index 0000000000000..33e20d6dc771a
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-store-error.cl
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef short v2i16 __attribute__((ext_vector_type(2)));
+typedef int v2i32 __attribute__((ext_vector_type(2)));
+typedef half v2f16 __attribute__((ext_vector_type(2)));
+typedef float v2f32 __attribute__((ext_vector_type(2)));
+typedef short v4i16 __attribute__((ext_vector_type(4)));
+typedef int v4i32 __attribute__((ext_vector_type(4)));
+typedef half v4f16 __attribute__((ext_vector_type(4)));
+typedef float v4f32 __attribute__((ext_vector_type(4)));
+
+void test_amdgcn_raw_buffer_store_i8(char vdata, v4i32 rsrc, int offset, int soffset, int aux) {
+ __builtin_amdgcn_raw_buffer_store_i8(vdata, rsrc, offset, /*soffset=*/0, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i8' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_i8(vdata, rsrc, /*offset=*/0, soffset, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i8' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_i8(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i8' must be a constant integer}}
+}
+
+void test_amdgcn_raw_buffer_store_i16(short vdata, v4i32 rsrc, int offset, int soffset, int aux) {
+ __builtin_amdgcn_raw_buffer_store_i16(vdata, rsrc, offset, /*soffset=*/0, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i16' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_i16(vdata, rsrc, /*offset=*/0, soffset, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i16' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_i16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i16' must be a constant integer}}
+}
+
+void test_amdgcn_raw_buffer_store_i32(int vdata, v4i32 rsrc, int offset, int soffset, int aux) {
+ __builtin_amdgcn_raw_buffer_store_i32(vdata, rsrc, offset, /*soffset=*/0, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i32' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_i32(vdata, rsrc, /*offset=*/0, soffset, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i32' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_i32(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_i32' must be a constant integer}}
+}
+
+void test_amdgcn_raw_buffer_store_f16(half vdata, v4i32 rsrc, int offset, int soffset, int aux) {
+ __builtin_amdgcn_raw_buffer_store_f16(vdata, rsrc, offset, /*soffset=*/0, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_f16' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_f16(vdata, rsrc, /*offset=*/0, soffset, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_f16' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_f16(vdata, rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_f16' must be a constant integer}}
+}
+
+void test_amdgcn_raw_buffer_store_f32(float vdata, v4i32 rsrc, int offset, int soffset, int aux) {
+ __builtin_amdgcn_raw_buffer_store_f32(vdata, rsrc, offset, /*soffset=*/0, /*aux=*/0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_store_f32' must be a constant integer}}
+ __builtin_amdgcn_raw_buffer_store_f32(vdata, rsrc, /*offset=*/0, soffset, /*aux=*/0); //expected-error{{argument to '__builtin_amdg...
[truncated]
|
Is there really a good use case for this? Can you use regular stores to addrspace(7) instead? @krzysz00 Also, do you really need a separate builtin for every legal type, or is there some way they can be type-overloaded? |
I see these regularly used via inline asm in various ML code. We need to expose these in some way to stop people from doing that
Yes, I imagined we would handle images similar to the elementwise intrinsics. However, I don't think that approach works for loads. If we have to have overloads for loads, we probably should mirror it for stores. I think it makes more sense to solve the issue for the load case before the stores. They're a bit more complicated because you have the sign vs. zero extended cases to consider, and the overload would be on the return type |
6f7374e
to
d520ea7
Compare
Re addrspace 7, there's one major piece of work missing: arbitrary-typed inputs. If we do want addrspace(7), we'll need to expose And even so, we'll want addrspace(8) to work because there are non-raw buffers (ex. swizzling). |
Yes. We probably should expose some kind of custom type instead of directly using a C address_space(7) attribute |
Actually, even ignoring address space 7, it feels like these builtins if you could |
We could either have a builtin for all the types that would work, or if we want to treat them more like a normal pointer, clang could verify you only use them with types that will work. Although I just realized, we have to have the builtins because of the separate swizzled and non-swizzled operands in the intrinsic |
that should become two |
But the raw buffer intrinsics have both the soffset and voffset parameters though? Not just the struct |
Handling arbitrary aggregates here isn't really reasonable or necessary. We can restrict this to a reasonable set of legal-ish types |
The address space 7 lowering just uses voffset. Re arbitrary aggregates: LLPC has code for handling this (since I'm pretty sure stuff like Vulkan wants it, and "aggregates" here might even be unusual cases like |
|
Right, that's the problem. We need to know the parameters of the SRD in order to make use of the scalar offset. Ideally we would have one pointer operand and be able to addressing mode match into soffset/voffset/imm |
Vectors aren't aggregates and are more reasonable |
The thing is, in all the usecases I've seen, |
(My guesses for how I might use sofffset is if I've got multiple identical buffers concatentated and I need to pick between them without messing with the extent field) |
(The ugly version of the arbitrary types code lives around https://github.com/GPUOpen-Drivers/llpc/blob/6c770c7d276d2c2504aed2a0278aab1610993ecf/lgc/patch/PatchBufferOp.cpp#L1559 and really should be an isel legalization instead) |
#94830 for buffer rsrc data type. Will update this patch afterwards. |
bc3165d
to
450572d
Compare
ping |
maybe add a test for non-constant offset? |
Hmm, I thought I added all tests...My bad. Will add it right away. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm wondering if we should really have all the different typed variants, and if this should be the name. I guess
48d095d
to
e4a7703
Compare
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.ptr.buffer.store
ping |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/594 Here is the relevant piece of the build log for the reference:
|
…er.store` (llvm#94576) Depends on llvm#96313.
Depends on #96313.