Skip to content

Commit 8af94a0

Browse files
authored
[ESIMD] Use LLVM IR instead of GenX intrinsics for block loads/stores - USM and SLM (#10041)
This patch replaces some uses of GenX intrinsics with regular LLVM IR. This change allowed to remove most of restrictions that previously accompanied slm_block_load/store() and block_load/store() accepting USM ptr. GPU RT/driver can lower 'load' instructions to one or several flat-loads and/or gather instructions depending on the target device, alignment and length. Same for 'store' instructions. --------- Signed-off-by: Vyacheslav N Klochkov <vyacheslav.n.klochkov@intel.com>
1 parent 80b7de1 commit 8af94a0

File tree

12 files changed

+712
-324
lines changed

12 files changed

+712
-324
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 96 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,10 @@ struct ESIMDIntrinDesc {
151151
enum class GenXArgConversion : int16_t {
152152
NONE, // no conversion
153153
TO_I1, // convert vector of N-bit integer to 1-bit
154-
TO_I8, // convert vector of N-bit integer to 18-bit
154+
TO_I8, // convert vector of N-bit integer to 8-bit
155155
TO_I16, // convert vector of N-bit integer to 16-bit
156156
TO_I32, // convert vector of N-bit integer to 32-bit
157+
TO_I64, // convert vector of N-bit integer to 64-bit
157158
};
158159

159160
// Denotes GenX intrinsic name suffix creation rule kind.
@@ -252,6 +253,12 @@ class ESIMDIntrinDescTable {
252253
{{N, ESIMDIntrinDesc::GenXArgConversion::TO_I32}}};
253254
}
254255

256+
static constexpr ESIMDIntrinDesc::ArgRule t64(int16_t N) {
257+
return ESIMDIntrinDesc::ArgRule{
258+
ESIMDIntrinDesc::SRC_TMPL_ARG,
259+
{{N, ESIMDIntrinDesc::GenXArgConversion::TO_I64}}};
260+
}
261+
255262
static constexpr ESIMDIntrinDesc::ArgRule a(int16_t N) {
256263
return ESIMDIntrinDesc::ArgRule{
257264
ESIMDIntrinDesc::SRC_CALL_ARG,
@@ -344,10 +351,6 @@ class ESIMDIntrinDescTable {
344351
nk(-1)}},
345352
{"vload", {"vload", {l(0)}}},
346353
{"vstore", {"vstore", {a(1), a(0)}}},
347-
348-
{"svm_block_ld_unaligned", {"svm.block.ld.unaligned", {l(0)}}},
349-
{"svm_block_ld", {"svm.block.ld", {l(0)}}},
350-
{"svm_block_st", {"svm.block.st", {l(1)}}},
351354
{"svm_gather", {"svm.gather", {ai1(1), t(3), a(0), u(-1)}}},
352355
{"svm_gather4_scaled",
353356
{"svm.gather4.scaled", {ai1(1), t(2), c16(0), c64(0), a(0), u(-1)}}},
@@ -797,6 +800,9 @@ static APInt parseTemplateArg(id::FunctionEncoding *FE, unsigned int N,
797800
case ESIMDIntrinDesc::GenXArgConversion::TO_I32:
798801
Ty = IntegerType::getInt32Ty(Ctx);
799802
break;
803+
case ESIMDIntrinDesc::GenXArgConversion::TO_I64:
804+
Ty = IntegerType::getInt64Ty(Ctx);
805+
break;
800806
}
801807

802808
switch (Args[N]->getKind()) {
@@ -805,8 +811,8 @@ static APInt parseTemplateArg(id::FunctionEncoding *FE, unsigned int N,
805811
const std::string_view &TyStr = ValL->getType();
806812
if (Conv == ESIMDIntrinDesc::GenXArgConversion::NONE && TyStr.size() != 0)
807813
// Overwrite Ty with IntegerLiteral's size
808-
Ty =
809-
parsePrimitiveTypeString(StringRef(&*TyStr.begin(), TyStr.size()), Ctx);
814+
Ty = parsePrimitiveTypeString(StringRef(&*TyStr.begin(), TyStr.size()),
815+
Ctx);
810816
Val = ValL->getValue();
811817
break;
812818
}
@@ -823,8 +829,31 @@ static APInt parseTemplateArg(id::FunctionEncoding *FE, unsigned int N,
823829
default:
824830
llvm_unreachable_internal("bad esimd intrinsic template parameter");
825831
}
826-
return APInt(Ty->getPrimitiveSizeInBits(), StringRef(&*Val.begin(), Val.size()),
827-
10);
832+
return APInt(Ty->getPrimitiveSizeInBits(),
833+
StringRef(&*Val.begin(), Val.size()), 10);
834+
}
835+
836+
// Returns the value of the 'ArgIndex' parameter of the template
837+
// function called at 'CI'.
838+
static APInt parseTemplateArg(CallInst &CI, int ArgIndex,
839+
ESIMDIntrinDesc::GenXArgConversion Conv =
840+
ESIMDIntrinDesc::GenXArgConversion::NONE) {
841+
Function *F = CI.getCalledFunction();
842+
llvm::esimd::assert_and_diag(F, "function to translate is invalid");
843+
844+
StringRef MnglName = F->getName();
845+
using Demangler = id::ManglingParser<SimpleAllocator>;
846+
Demangler Parser(MnglName.begin(), MnglName.end());
847+
id::Node *AST = Parser.parse();
848+
llvm::esimd::assert_and_diag(
849+
AST && Parser.ForwardTemplateRefs.empty(),
850+
"failed to demangle ESIMD intrinsic: ", MnglName);
851+
llvm::esimd::assert_and_diag(AST->getKind() == id::Node::KFunctionEncoding,
852+
"bad ESIMD intrinsic: ", MnglName);
853+
854+
auto *FE = static_cast<id::FunctionEncoding *>(AST);
855+
Type *Ty = nullptr;
856+
return parseTemplateArg(FE, ArgIndex, Ty, CI.getContext(), Conv);
828857
}
829858

830859
// Constructs a GenX intrinsic name suffix based on the original C++ name (stem)
@@ -921,6 +950,48 @@ static std::string getESIMDIntrinSuffix(id::FunctionEncoding *FE,
921950
return Suff;
922951
}
923952

953+
static void translateBlockLoad(CallInst &CI, bool IsSLM) {
954+
IRBuilder<> Builder(&CI);
955+
956+
constexpr int AlignmentTemplateArgIdx = 2;
957+
APInt Val = parseTemplateArg(CI, AlignmentTemplateArgIdx,
958+
ESIMDIntrinDesc::GenXArgConversion::TO_I64);
959+
MaybeAlign Align(Val.getZExtValue());
960+
961+
auto Op0 = CI.getArgOperand(0);
962+
auto DataType = CI.getType();
963+
if (IsSLM) {
964+
// Convert 'uint32_t' to 'addrspace(3)*' pointer.
965+
auto PtrType = PointerType::get(DataType, 3);
966+
Op0 = Builder.CreateIntToPtr(Op0, PtrType);
967+
}
968+
969+
auto LI = Builder.CreateAlignedLoad(DataType, Op0, Align, CI.getName());
970+
LI->setDebugLoc(CI.getDebugLoc());
971+
CI.replaceAllUsesWith(LI);
972+
}
973+
974+
static void translateBlockStore(CallInst &CI, bool IsSLM) {
975+
IRBuilder<> Builder(&CI);
976+
977+
constexpr int AlignmentTemplateArgIdx = 2;
978+
APInt Val = parseTemplateArg(CI, AlignmentTemplateArgIdx,
979+
ESIMDIntrinDesc::GenXArgConversion::TO_I64);
980+
MaybeAlign Align(Val.getZExtValue());
981+
982+
auto Op0 = CI.getArgOperand(0);
983+
auto Op1 = CI.getArgOperand(1);
984+
if (IsSLM) {
985+
// Convert 'uint32_t' to 'addrspace(3)*' pointer.
986+
auto DataType = Op1->getType();
987+
auto PtrType = PointerType::get(DataType, 3);
988+
Op0 = Builder.CreateIntToPtr(Op0, PtrType);
989+
}
990+
991+
auto SI = Builder.CreateAlignedStore(Op1, Op0, Align);
992+
SI->setDebugLoc(CI.getDebugLoc());
993+
}
994+
924995
// TODO Specify document behavior for slm_init and nbarrier_init when:
925996
// 1) they are called not from kernels
926997
// 2) there are multiple such calls reachable from a kernel
@@ -945,29 +1016,13 @@ static void translateNbarrierInit(CallInst &CI) {
9451016
}
9461017

9471018
static void translatePackMask(CallInst &CI) {
948-
using Demangler = id::ManglingParser<SimpleAllocator>;
949-
Function *F = CI.getCalledFunction();
950-
llvm::esimd::assert_and_diag(F, "function to translate is invalid");
951-
952-
StringRef MnglName = F->getName();
953-
Demangler Parser(MnglName.begin(), MnglName.end());
954-
id::Node *AST = Parser.parse();
955-
956-
llvm::esimd::assert_and_diag(
957-
AST && Parser.ForwardTemplateRefs.empty(),
958-
"failed to demangle ESIMD intrinsic: ", MnglName);
959-
llvm::esimd::assert_and_diag(AST->getKind() == id::Node::KFunctionEncoding,
960-
"bad ESIMD intrinsic: ", MnglName);
961-
962-
auto *FE = static_cast<id::FunctionEncoding *>(AST);
963-
llvm::LLVMContext &Context = CI.getContext();
964-
Type *TTy = nullptr;
965-
APInt Val = parseTemplateArg(FE, 0, TTy, Context);
1019+
APInt Val = parseTemplateArg(CI, 0);
9661020
unsigned N = Val.getZExtValue();
9671021
Value *Result = CI.getArgOperand(0);
9681022
assert(Result->getType()->isIntOrIntVectorTy());
9691023
Value *Zero = ConstantInt::get(Result->getType(), 0);
9701024
IRBuilder<> Builder(&CI);
1025+
llvm::LLVMContext &Context = CI.getContext();
9711026
// TODO CM_COMPAT
9721027
// In CM non LSB bits in mask elements are ignored, so e.g. '2' is treated as
9731028
// 'false' there. ESIMD adopts C++ semantics, where any non-zero is 'true'.
@@ -985,29 +1040,14 @@ static void translatePackMask(CallInst &CI) {
9851040
}
9861041

9871042
static void translateUnPackMask(CallInst &CI) {
988-
using Demangler = id::ManglingParser<SimpleAllocator>;
989-
Function *F = CI.getCalledFunction();
990-
llvm::esimd::assert_and_diag(F, "function to translate is invalid");
991-
StringRef MnglName = F->getName();
992-
Demangler Parser(MnglName.begin(), MnglName.end());
993-
id::Node *AST = Parser.parse();
994-
995-
llvm::esimd::assert_and_diag(
996-
AST && Parser.ForwardTemplateRefs.empty(),
997-
"failed to demangle ESIMD intrinsic: ", MnglName);
998-
llvm::esimd::assert_and_diag(AST->getKind() == id::Node::KFunctionEncoding,
999-
"bad ESIMD intrinsic: ", MnglName);
1000-
1001-
auto *FE = static_cast<id::FunctionEncoding *>(AST);
1002-
llvm::LLVMContext &Context = CI.getContext();
1003-
Type *TTy = nullptr;
1004-
APInt Val = parseTemplateArg(FE, 0, TTy, Context);
1043+
APInt Val = parseTemplateArg(CI, 0);
10051044
unsigned N = Val.getZExtValue();
10061045
// get N x i1
10071046
assert(CI.arg_size() == 1);
10081047
llvm::Value *Arg0 = CI.getArgOperand(0);
10091048
unsigned Width = Arg0->getType()->getPrimitiveSizeInBits();
10101049
IRBuilder<> Builder(&CI);
1050+
llvm::LLVMContext &Context = CI.getContext();
10111051
if (Width > N) {
10121052
llvm::Type *Ty = llvm::IntegerType::get(Context, N);
10131053
Arg0 = Builder.CreateTrunc(Arg0, Ty);
@@ -1857,6 +1897,18 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
18571897
// process ESIMD builtins that go through special handling instead of
18581898
// the translation procedure
18591899

1900+
if (Name.startswith("__esimd_svm_block_ld") ||
1901+
Name.startswith("__esimd_slm_block_ld")) {
1902+
translateBlockLoad(*CI, Name.startswith("__esimd_slm_block_ld"));
1903+
ToErase.push_back(CI);
1904+
continue;
1905+
}
1906+
if (Name.startswith("__esimd_svm_block_st") ||
1907+
Name.startswith("__esimd_slm_block_st")) {
1908+
translateBlockStore(*CI, Name.startswith("__esimd_slm_block_st"));
1909+
ToErase.push_back(CI);
1910+
continue;
1911+
}
18601912
if (Name.startswith("__esimd_nbarrier_init")) {
18611913
translateNbarrierInit(*CI);
18621914
ToErase.push_back(CI);

sycl/include/sycl/ext/intel/esimd/common.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,13 @@ using SurfaceIndex = unsigned int;
6565

6666
namespace detail {
6767

68-
template <typename T>
69-
struct is_saturation_tag {
68+
// Type used in internal functions to designate SLM access by
69+
// providing dummy accessor of this type. Used to make it possible to delegate
70+
// implemenations of SLM memory accesses to general surface-based memory
71+
// accesses and thus reuse validity checks etc.
72+
struct LocalAccessorMarker {};
73+
74+
template <typename T> struct is_saturation_tag {
7075
static constexpr bool value =
7176
std::is_same_v<T, __ESIMD_NS::saturation_on_tag> ||
7277
std::is_same_v<T, __ESIMD_NS::saturation_off_tag>;

0 commit comments

Comments
 (0)