Skip to content

Commit

Permalink
Translate the llvm.fshr intrinsic function
Browse files Browse the repository at this point in the history
Add llvm.fshr intrinsic translation, similar to llvm.fshr translation.
Add test for that
A detailed description of FSHR can be found at
https://llvm.org/docs/LangRef.html#llvm-fshr-intrinsic

Signed-off-by: amochalo <anastasiya.mochalova@intel.com>

Original commit:
KhronosGroup/SPIRV-LLVM-Translator@5071ae9
  • Loading branch information
MochalovaAn authored and vmaksimo committed May 20, 2021
1 parent b6fcaff commit 424c80c
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 72 deletions.
142 changes: 70 additions & 72 deletions llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,22 @@ class SPIRVRegularizeLLVMBase {
/// @spirv.llvm_memset_* and replace it with @llvm.memset.
void lowerMemset(MemSetInst *MSI);

/// No SPIR-V counterpart for @llvm.fshl.i* intrinsic. It will be lowered
/// to a newly generated @spirv.llvm_fshl_i* function.
/// Conceptually, FSHL:
/// No SPIR-V counterpart for @llvm.fshl.*(@llvm.fshr.*) intrinsic. It will be
/// lowered to a newly generated @spirv.llvm_fshl_*(@spirv.llvm_fshr_*)
/// function.
///
/// Conceptually, FSHL (FSHR):
/// 1. concatenates the ints, the first one being the more significant;
/// 2. performs a left shift-rotate on the resulting doubled-sized int;
/// 3. returns the most significant bits of the shift-rotate result,
/// 2. performs a left (right) shift-rotate on the resulting doubled-sized
/// int;
/// 3. returns the most (least) significant bits of the shift-rotate result,
/// the number of bits being equal to the size of the original integers.
/// The actual implementation algorithm will be slightly different to speed
/// things up.
void lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic);
void buildFunnelShiftLeftFunc(Function *FSHLFunc);
/// If FSHL (FSHR) operates on a vector type instead, the same operations are
/// performed for each set of corresponding vector elements.
///
/// The actual implementation algorithm will be slightly different for
/// simplification purposes.
void lowerFunnelShift(IntrinsicInst *FSHIntrinsic);

void lowerUMulWithOverflow(IntrinsicInst *UMulIntrinsic);
void buildUMulWithOverflowFunc(Function *UMulFunc);
Expand Down Expand Up @@ -184,74 +189,66 @@ void SPIRVRegularizeLLVMBase::lowerMemset(MemSetInst *MSI) {
return;
}

void SPIRVRegularizeLLVMBase::buildFunnelShiftLeftFunc(Function *FSHLFunc) {
if (!FSHLFunc->empty())
void SPIRVRegularizeLLVMBase::lowerFunnelShift(IntrinsicInst *FSHIntrinsic) {
// Get a separate function - otherwise, we'd have to rework the CFG of the
// current one. Then simply replace the intrinsic uses with a call to the new
// function.
// Expected LLVM IR for the function: i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i*
// %c)
FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType();
Type *FSHRetTy = FSHFuncTy->getReturnType();
const std::string FuncName = lowerLLVMIntrinsicName(FSHIntrinsic);
Function *FSHFunc =
getOrCreateFunction(M, FSHRetTy, FSHFuncTy->params(), FuncName);

if (!FSHFunc->empty()) {
FSHIntrinsic->setCalledFunction(FSHFunc);
return;

auto *IntTy = dyn_cast<IntegerType>(FSHLFunc->getReturnType());
assert(IntTy && "llvm.fshl: expected an integer return type");
assert(FSHLFunc->arg_size() == 3 && "llvm.fshl: expected 3 arguments");
for (Argument &Arg : FSHLFunc->args())
assert(Arg.getType()->getTypeID() == IntTy->getTypeID() &&
"llvm.fshl: mismatched return type and argument types");

// Our function will require 3 basic blocks; the purpose of each will be
// clarified below.
auto *CondBB = BasicBlock::Create(M->getContext(), "cond", FSHLFunc);
auto *RotateBB =
BasicBlock::Create(M->getContext(), "rotate", FSHLFunc); // Main logic
auto *PhiBB = BasicBlock::Create(M->getContext(), "phi", FSHLFunc);

IRBuilder<> Builder(CondBB);
// If the number of bits to rotate for is divisible by the bitsize,
// the shift becomes useless, and we should bypass the main logic in that
// case.
}
auto *RotateBB = BasicBlock::Create(M->getContext(), "rotate", FSHFunc);
IRBuilder<> Builder(RotateBB);
Type *Ty = FSHFunc->getReturnType();
// Build the actual funnel shift rotate logic.
// In the comments, "int" is used interchangeably with "vector of int
// elements".
FixedVectorType *VectorTy = dyn_cast<FixedVectorType>(Ty);
Type *IntTy = VectorTy ? VectorTy->getElementType() : Ty;
unsigned BitWidth = IntTy->getIntegerBitWidth();
ConstantInt *BitWidthConstant = Builder.getInt({BitWidth, BitWidth});
Value *BitWidthForInsts =
VectorTy ? Builder.CreateVectorSplat(VectorTy->getNumElements(),
BitWidthConstant)
: BitWidthConstant;
auto *RotateModVal =
Builder.CreateURem(/*Rotate*/ FSHLFunc->getArg(2), BitWidthConstant);
ConstantInt *ZeroConstant = Builder.getInt({BitWidth, 0});
auto *CheckRotateModIfZero = Builder.CreateICmpEQ(RotateModVal, ZeroConstant);
Builder.CreateCondBr(CheckRotateModIfZero, /*True*/ PhiBB,
/*False*/ RotateBB);
Builder.CreateURem(/*Rotate*/ FSHFunc->getArg(2), BitWidthForInsts);
Value *FirstShift = nullptr, *SecShift = nullptr;
if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr)
// Shift the less significant number right, the "rotate" number of bits
// will be 0-filled on the left as a result of this regular shift.
FirstShift = Builder.CreateLShr(FSHFunc->getArg(1), RotateModVal);
else
// Shift the more significant number left, the "rotate" number of bits
// will be 0-filled on the right as a result of this regular shift.
FirstShift = Builder.CreateShl(FSHFunc->getArg(0), RotateModVal);

// We want the "rotate" number of the more significant int's LSBs (MSBs) to
// occupy the leftmost (rightmost) "0 space" left by the previous operation.
// Therefore, subtract the "rotate" number from the integer bitsize...
auto *SubRotateVal = Builder.CreateSub(BitWidthForInsts, RotateModVal);
if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr)
// ...and left-shift the more significant int by this number, zero-filling
// the LSBs.
SecShift = Builder.CreateShl(FSHFunc->getArg(0), SubRotateVal);
else
// ...and right-shift the less significant int by this number, zero-filling
// the MSBs.
SecShift = Builder.CreateLShr(FSHFunc->getArg(1), SubRotateVal);

// Build the actual funnel shift rotate logic.
Builder.SetInsertPoint(RotateBB);
// Shift the more significant number left, the "rotate" number of bits
// will be 0-filled on the right as a result of this regular shift.
auto *ShiftLeft = Builder.CreateShl(FSHLFunc->getArg(0), RotateModVal);
// We want the "rotate" number of the second int's MSBs to occupy the
// rightmost "0 space" left by the previous operation. Therefore,
// subtract the "rotate" number from the integer bitsize...
auto *SubRotateVal = Builder.CreateSub(BitWidthConstant, RotateModVal);
// ...and right-shift the second int by this number, zero-filling the MSBs.
auto *ShiftRight = Builder.CreateLShr(FSHLFunc->getArg(1), SubRotateVal);
// A simple binary addition of the shifted ints yields the final result.
auto *FunnelShiftRes = Builder.CreateOr(ShiftLeft, ShiftRight);
Builder.CreateBr(PhiBB);

// PHI basic block. If no actual rotate was required, return the first, more
// significant int. E.g. for 32-bit integers, it's equivalent to concatenating
// the 2 ints and taking 32 MSBs.
Builder.SetInsertPoint(PhiBB);
PHINode *Phi = Builder.CreatePHI(IntTy, 0);
Phi->addIncoming(FunnelShiftRes, RotateBB);
Phi->addIncoming(FSHLFunc->getArg(0), CondBB);
Builder.CreateRet(Phi);
}
auto *FunnelShiftRes = Builder.CreateOr(FirstShift, SecShift);
Builder.CreateRet(FunnelShiftRes);

void SPIRVRegularizeLLVMBase::lowerFunnelShiftLeft(
IntrinsicInst *FSHLIntrinsic) {
// Get a separate function - otherwise, we'd have to rework the CFG of the
// current one. Then simply replace the intrinsic uses with a call to the new
// function.
FunctionType *FSHLFuncTy = FSHLIntrinsic->getFunctionType();
Type *FSHLRetTy = FSHLFuncTy->getReturnType();
const std::string FuncName = lowerLLVMIntrinsicName(FSHLIntrinsic);
Function *FSHLFunc =
getOrCreateFunction(M, FSHLRetTy, FSHLFuncTy->params(), FuncName);
buildFunnelShiftLeftFunc(FSHLFunc);
FSHLIntrinsic->setCalledFunction(FSHLFunc);
FSHIntrinsic->setCalledFunction(FSHFunc);
}

void SPIRVRegularizeLLVMBase::buildUMulWithOverflowFunc(Function *UMulFunc) {
Expand Down Expand Up @@ -330,8 +327,9 @@ bool SPIRVRegularizeLLVMBase::regularize() {
auto *II = cast<IntrinsicInst>(Call);
if (auto *MSI = dyn_cast<MemSetInst>(II))
lowerMemset(MSI);
else if (II->getIntrinsicID() == Intrinsic::fshl)
lowerFunnelShiftLeft(II);
else if (II->getIntrinsicID() == Intrinsic::fshl ||
II->getIntrinsicID() == Intrinsic::fshr)
lowerFunnelShift(II);
else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow)
lowerUMulWithOverflow(II);
}
Expand Down
116 changes: 116 additions & 0 deletions llvm-spirv/test/llvm-intrinsics/fshr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
; RUN: llvm-as %s -o %t.bc
; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV
; RUN: llvm-spirv %t.bc -o %t.spv
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-unknown"

; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_32:[0-9]+]] "spirv.llvm_fshr_i32"
; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_16:[0-9]+]] "spirv.llvm_fshr_i16"
; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_VEC_INT_16:[0-9]+]] "spirv.llvm_fshr_v2i16"
; CHECK-SPIRV: TypeInt [[TYPE_INT_32:[0-9]+]] 32 0
; CHECK-SPIRV: TypeInt [[TYPE_INT_16:[0-9]+]] 16 0
; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[CONST_ROTATE_32:[0-9]+]] 8
; CHECK-SPIRV-DAG: Constant [[TYPE_INT_16]] [[CONST_ROTATE_16:[0-9]+]] 8
; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[CONST_TYPE_SIZE_32:[0-9]+]] 32
; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_32:[0-9]+]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]]
; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_32:[0-9]+]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]]
; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_16:[0-9]+]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]]
; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_16:[0-9]+]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]]
; CHECK-SPIRV: TypeVector [[TYPE_VEC_INT_16:[0-9]+]] [[TYPE_INT_16]] 2
; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_VEC_INT_16:[0-9]+]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]]
; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_VEC_INT_16:[0-9]+]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]]
; CHECK-SPIRV: ConstantComposite [[TYPE_VEC_INT_16]] [[CONST_ROTATE_VEC_INT_16:[0-9]+]] [[CONST_ROTATE_16]] [[CONST_ROTATE_16]]

; On LLVM level, we'll check that the intrinsics were generated again in reverse translation,
; replacing the SPIR-V level implementations.
; CHECK-LLVM-NOT: declare {{.*}} @spirv.llvm_fshr_{{.*}}

; Function Attrs: nounwind readnone
; CHECK-SPIRV: Function [[TYPE_INT_32]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_32]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[X:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[Y:[0-9]+]]
define spir_func i32 @Test_i32(i32 %x, i32 %y) local_unnamed_addr #0 {
entry:
; CHECK-SPIRV: FunctionCall [[TYPE_INT_32]] [[CALL_32_X_Y:[0-9]+]] [[NAME_FSHR_FUNC_32]] [[X]] [[Y]] [[CONST_ROTATE_32]]
; CHECK-LLVM: call i32 @llvm.fshr.i32
%0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 8)
; CHECK-SPIRV: FunctionCall [[TYPE_INT_32]] [[CALL_32_Y_X:[0-9]+]] [[NAME_FSHR_FUNC_32]] [[Y]] [[X]] [[CONST_ROTATE_32]]
; CHECK-LLVM: call i32 @llvm.fshr.i32
%1 = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 8)
; CHECK-SPIRV: IAdd [[TYPE_INT_32]] [[ADD_32:[0-9]+]] [[CALL_32_X_Y]] [[CALL_32_Y_X]]
%sum = add i32 %0, %1
; CHECK-SPIRV: ReturnValue [[ADD_32]]
ret i32 %sum
}

; CHECK-SPIRV: Function [[TYPE_INT_32]] [[NAME_FSHR_FUNC_32]] {{[0-9]+}} [[TYPE_FSHR_FUNC_32]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[X_ARG:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[Y_ARG:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[ROT:[0-9]+]]

; CHECK-SPIRV: UMod [[TYPE_INT_32]] [[ROTATE_MOD_SIZE:[0-9]+]] [[ROT]] [[CONST_TYPE_SIZE_32]]
; CHECK-SPIRV: ShiftRightLogical [[TYPE_INT_32]] [[Y_SHIFT_RIGHT:[0-9]+]] [[Y_ARG]] [[ROTATE_MOD_SIZE]]
; CHECK-SPIRV: ISub [[TYPE_INT_32]] [[NEG_ROTATE:[0-9]+]] [[CONST_TYPE_SIZE_32]] [[ROTATE_MOD_SIZE]]
; CHECK-SPIRV: ShiftLeftLogical [[TYPE_INT_32]] [[X_SHIFT_LEFT:[0-9]+]] [[X_ARG]] [[NEG_ROTATE]]
; CHECK-SPIRV: BitwiseOr [[TYPE_INT_32]] [[FSHR_RESULT:[0-9]+]] [[Y_SHIFT_RIGHT]] [[X_SHIFT_LEFT]]
; CHECK-SPIRV: ReturnValue [[FSHR_RESULT]]

; Function Attrs: nounwind readnone
; CHECK-SPIRV: Function [[TYPE_INT_16]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_16]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[X:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[Y:[0-9]+]]
define spir_func i16 @Test_i16(i16 %x, i16 %y) local_unnamed_addr #0 {
entry:
; CHECK-SPIRV: FunctionCall [[TYPE_INT_16]] [[CALL_16:[0-9]+]] [[NAME_FSHR_FUNC_16]] [[X]] [[Y]] [[CONST_ROTATE_16]]
; CHECK-LLVM: call i16 @llvm.fshr.i16
%0 = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 8)
; CHECK-SPIRV: ReturnValue [[CALL_16]]
ret i16 %0
}

; Just check that the function for i16 was generated as such - we've checked the logic for another type.
; CHECK-SPIRV: Function [[TYPE_INT_16]] [[NAME_FSHR_FUNC_16]] {{[0-9]+}} [[TYPE_FSHR_FUNC_16]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[X_ARG:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[Y_ARG:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[ROT:[0-9]+]]

; CHECK-SPIRV: Function [[TYPE_VEC_INT_16]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_VEC_INT_16]]
; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[X:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[Y:[0-9]+]]
define spir_func <2 x i16> @Test_v2i16(<2 x i16> %x, <2 x i16> %y) local_unnamed_addr #0 {
entry:
; CHECK-SPIRV: FunctionCall [[TYPE_VEC_INT_16]] [[CALL_VEC_INT_16:[0-9]+]] [[NAME_FSHR_FUNC_VEC_INT_16]] [[X]] [[Y]] [[CONST_ROTATE_VEC_INT_16]]
; CHECK-LLVM: call <2 x i16> @llvm.fshr.v2i16
%0 = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> <i16 8, i16 8>)
; CHECK-SPIRV: ReturnValue [[CALL_VEC_INT_16]]
ret <2 x i16> %0
}

; Just check that the function for v2i16 was generated as such - we've checked the logic for another type.
; CHECK-SPIRV: Function [[TYPE_VEC_INT_16]] [[NAME_FSHR_FUNC_VEC_INT_16]] {{[0-9]+}} [[TYPE_FSHR_FUNC_VEC_INT_16]]
; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[X_ARG:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[Y_ARG:[0-9]+]]
; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[ROT:[0-9]+]]

; Function Attrs: nounwind readnone speculatable willreturn
declare i32 @llvm.fshr.i32(i32, i32, i32) #1

; Function Attrs: nounwind readnone speculatable willreturn
declare i16 @llvm.fshr.i16(i16, i16, i16) #1

; Function Attrs: nounwind readnone speculatable willreturn
declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #1

attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }

!llvm.module.flags = !{!0}
!opencl.ocl.version = !{!1}
!opencl.spir.version = !{!2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, i32 0}
!2 = !{i32 1, i32 2}

0 comments on commit 424c80c

Please sign in to comment.