diff --git a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp index 154c8236f306f..5edf5e4389bc3 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp @@ -83,17 +83,22 @@ class SPIRVRegularizeLLVMBase { /// @spirv.llvm_memset_* and replace it with @llvm.memset. void lowerMemset(MemSetInst *MSI); - /// No SPIR-V counterpart for @llvm.fshl.i* intrinsic. It will be lowered - /// to a newly generated @spirv.llvm_fshl_i* function. - /// Conceptually, FSHL: + /// No SPIR-V counterpart for @llvm.fshl.*(@llvm.fshr.*) intrinsic. It will be + /// lowered to a newly generated @spirv.llvm_fshl_*(@spirv.llvm_fshr_*) + /// function. + /// + /// Conceptually, FSHL (FSHR): /// 1. concatenates the ints, the first one being the more significant; - /// 2. performs a left shift-rotate on the resulting doubled-sized int; - /// 3. returns the most significant bits of the shift-rotate result, + /// 2. performs a left (right) shift-rotate on the resulting doubled-sized + /// int; + /// 3. returns the most (least) significant bits of the shift-rotate result, /// the number of bits being equal to the size of the original integers. - /// The actual implementation algorithm will be slightly different to speed - /// things up. - void lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic); - void buildFunnelShiftLeftFunc(Function *FSHLFunc); + /// If FSHL (FSHR) operates on a vector type instead, the same operations are + /// performed for each set of corresponding vector elements. + /// + /// The actual implementation algorithm will be slightly different for + /// simplification purposes. + void lowerFunnelShift(IntrinsicInst *FSHIntrinsic); void lowerUMulWithOverflow(IntrinsicInst *UMulIntrinsic); void buildUMulWithOverflowFunc(Function *UMulFunc); @@ -184,74 +189,66 @@ void SPIRVRegularizeLLVMBase::lowerMemset(MemSetInst *MSI) { return; } -void SPIRVRegularizeLLVMBase::buildFunnelShiftLeftFunc(Function *FSHLFunc) { - if (!FSHLFunc->empty()) +void SPIRVRegularizeLLVMBase::lowerFunnelShift(IntrinsicInst *FSHIntrinsic) { + // Get a separate function - otherwise, we'd have to rework the CFG of the + // current one. Then simply replace the intrinsic uses with a call to the new + // function. + // Expected LLVM IR for the function: i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i* + // %c) + FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType(); + Type *FSHRetTy = FSHFuncTy->getReturnType(); + const std::string FuncName = lowerLLVMIntrinsicName(FSHIntrinsic); + Function *FSHFunc = + getOrCreateFunction(M, FSHRetTy, FSHFuncTy->params(), FuncName); + + if (!FSHFunc->empty()) { + FSHIntrinsic->setCalledFunction(FSHFunc); return; - - auto *IntTy = dyn_cast(FSHLFunc->getReturnType()); - assert(IntTy && "llvm.fshl: expected an integer return type"); - assert(FSHLFunc->arg_size() == 3 && "llvm.fshl: expected 3 arguments"); - for (Argument &Arg : FSHLFunc->args()) - assert(Arg.getType()->getTypeID() == IntTy->getTypeID() && - "llvm.fshl: mismatched return type and argument types"); - - // Our function will require 3 basic blocks; the purpose of each will be - // clarified below. - auto *CondBB = BasicBlock::Create(M->getContext(), "cond", FSHLFunc); - auto *RotateBB = - BasicBlock::Create(M->getContext(), "rotate", FSHLFunc); // Main logic - auto *PhiBB = BasicBlock::Create(M->getContext(), "phi", FSHLFunc); - - IRBuilder<> Builder(CondBB); - // If the number of bits to rotate for is divisible by the bitsize, - // the shift becomes useless, and we should bypass the main logic in that - // case. + } + auto *RotateBB = BasicBlock::Create(M->getContext(), "rotate", FSHFunc); + IRBuilder<> Builder(RotateBB); + Type *Ty = FSHFunc->getReturnType(); + // Build the actual funnel shift rotate logic. + // In the comments, "int" is used interchangeably with "vector of int + // elements". + FixedVectorType *VectorTy = dyn_cast(Ty); + Type *IntTy = VectorTy ? VectorTy->getElementType() : Ty; unsigned BitWidth = IntTy->getIntegerBitWidth(); ConstantInt *BitWidthConstant = Builder.getInt({BitWidth, BitWidth}); + Value *BitWidthForInsts = + VectorTy ? Builder.CreateVectorSplat(VectorTy->getNumElements(), + BitWidthConstant) + : BitWidthConstant; auto *RotateModVal = - Builder.CreateURem(/*Rotate*/ FSHLFunc->getArg(2), BitWidthConstant); - ConstantInt *ZeroConstant = Builder.getInt({BitWidth, 0}); - auto *CheckRotateModIfZero = Builder.CreateICmpEQ(RotateModVal, ZeroConstant); - Builder.CreateCondBr(CheckRotateModIfZero, /*True*/ PhiBB, - /*False*/ RotateBB); + Builder.CreateURem(/*Rotate*/ FSHFunc->getArg(2), BitWidthForInsts); + Value *FirstShift = nullptr, *SecShift = nullptr; + if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr) + // Shift the less significant number right, the "rotate" number of bits + // will be 0-filled on the left as a result of this regular shift. + FirstShift = Builder.CreateLShr(FSHFunc->getArg(1), RotateModVal); + else + // Shift the more significant number left, the "rotate" number of bits + // will be 0-filled on the right as a result of this regular shift. + FirstShift = Builder.CreateShl(FSHFunc->getArg(0), RotateModVal); + + // We want the "rotate" number of the more significant int's LSBs (MSBs) to + // occupy the leftmost (rightmost) "0 space" left by the previous operation. + // Therefore, subtract the "rotate" number from the integer bitsize... + auto *SubRotateVal = Builder.CreateSub(BitWidthForInsts, RotateModVal); + if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr) + // ...and left-shift the more significant int by this number, zero-filling + // the LSBs. + SecShift = Builder.CreateShl(FSHFunc->getArg(0), SubRotateVal); + else + // ...and right-shift the less significant int by this number, zero-filling + // the MSBs. + SecShift = Builder.CreateLShr(FSHFunc->getArg(1), SubRotateVal); - // Build the actual funnel shift rotate logic. - Builder.SetInsertPoint(RotateBB); - // Shift the more significant number left, the "rotate" number of bits - // will be 0-filled on the right as a result of this regular shift. - auto *ShiftLeft = Builder.CreateShl(FSHLFunc->getArg(0), RotateModVal); - // We want the "rotate" number of the second int's MSBs to occupy the - // rightmost "0 space" left by the previous operation. Therefore, - // subtract the "rotate" number from the integer bitsize... - auto *SubRotateVal = Builder.CreateSub(BitWidthConstant, RotateModVal); - // ...and right-shift the second int by this number, zero-filling the MSBs. - auto *ShiftRight = Builder.CreateLShr(FSHLFunc->getArg(1), SubRotateVal); // A simple binary addition of the shifted ints yields the final result. - auto *FunnelShiftRes = Builder.CreateOr(ShiftLeft, ShiftRight); - Builder.CreateBr(PhiBB); - - // PHI basic block. If no actual rotate was required, return the first, more - // significant int. E.g. for 32-bit integers, it's equivalent to concatenating - // the 2 ints and taking 32 MSBs. - Builder.SetInsertPoint(PhiBB); - PHINode *Phi = Builder.CreatePHI(IntTy, 0); - Phi->addIncoming(FunnelShiftRes, RotateBB); - Phi->addIncoming(FSHLFunc->getArg(0), CondBB); - Builder.CreateRet(Phi); -} + auto *FunnelShiftRes = Builder.CreateOr(FirstShift, SecShift); + Builder.CreateRet(FunnelShiftRes); -void SPIRVRegularizeLLVMBase::lowerFunnelShiftLeft( - IntrinsicInst *FSHLIntrinsic) { - // Get a separate function - otherwise, we'd have to rework the CFG of the - // current one. Then simply replace the intrinsic uses with a call to the new - // function. - FunctionType *FSHLFuncTy = FSHLIntrinsic->getFunctionType(); - Type *FSHLRetTy = FSHLFuncTy->getReturnType(); - const std::string FuncName = lowerLLVMIntrinsicName(FSHLIntrinsic); - Function *FSHLFunc = - getOrCreateFunction(M, FSHLRetTy, FSHLFuncTy->params(), FuncName); - buildFunnelShiftLeftFunc(FSHLFunc); - FSHLIntrinsic->setCalledFunction(FSHLFunc); + FSHIntrinsic->setCalledFunction(FSHFunc); } void SPIRVRegularizeLLVMBase::buildUMulWithOverflowFunc(Function *UMulFunc) { @@ -330,8 +327,9 @@ bool SPIRVRegularizeLLVMBase::regularize() { auto *II = cast(Call); if (auto *MSI = dyn_cast(II)) lowerMemset(MSI); - else if (II->getIntrinsicID() == Intrinsic::fshl) - lowerFunnelShiftLeft(II); + else if (II->getIntrinsicID() == Intrinsic::fshl || + II->getIntrinsicID() == Intrinsic::fshr) + lowerFunnelShift(II); else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow) lowerUMulWithOverflow(II); } diff --git a/llvm-spirv/test/llvm-intrinsics/fshr.ll b/llvm-spirv/test/llvm-intrinsics/fshr.ll new file mode 100644 index 0000000000000..38e52c5dfb10c --- /dev/null +++ b/llvm-spirv/test/llvm-intrinsics/fshr.ll @@ -0,0 +1,116 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-unknown" + +; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_32:[0-9]+]] "spirv.llvm_fshr_i32" +; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_16:[0-9]+]] "spirv.llvm_fshr_i16" +; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_VEC_INT_16:[0-9]+]] "spirv.llvm_fshr_v2i16" +; CHECK-SPIRV: TypeInt [[TYPE_INT_32:[0-9]+]] 32 0 +; CHECK-SPIRV: TypeInt [[TYPE_INT_16:[0-9]+]] 16 0 +; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[CONST_ROTATE_32:[0-9]+]] 8 +; CHECK-SPIRV-DAG: Constant [[TYPE_INT_16]] [[CONST_ROTATE_16:[0-9]+]] 8 +; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[CONST_TYPE_SIZE_32:[0-9]+]] 32 +; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_32:[0-9]+]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]] +; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_32:[0-9]+]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]] +; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_16:[0-9]+]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]] +; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_16:[0-9]+]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]] +; CHECK-SPIRV: TypeVector [[TYPE_VEC_INT_16:[0-9]+]] [[TYPE_INT_16]] 2 +; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_VEC_INT_16:[0-9]+]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] +; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_VEC_INT_16:[0-9]+]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] +; CHECK-SPIRV: ConstantComposite [[TYPE_VEC_INT_16]] [[CONST_ROTATE_VEC_INT_16:[0-9]+]] [[CONST_ROTATE_16]] [[CONST_ROTATE_16]] + +; On LLVM level, we'll check that the intrinsics were generated again in reverse translation, +; replacing the SPIR-V level implementations. +; CHECK-LLVM-NOT: declare {{.*}} @spirv.llvm_fshr_{{.*}} + +; Function Attrs: nounwind readnone +; CHECK-SPIRV: Function [[TYPE_INT_32]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_32]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[X:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[Y:[0-9]+]] +define spir_func i32 @Test_i32(i32 %x, i32 %y) local_unnamed_addr #0 { +entry: + ; CHECK-SPIRV: FunctionCall [[TYPE_INT_32]] [[CALL_32_X_Y:[0-9]+]] [[NAME_FSHR_FUNC_32]] [[X]] [[Y]] [[CONST_ROTATE_32]] + ; CHECK-LLVM: call i32 @llvm.fshr.i32 + %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 8) + ; CHECK-SPIRV: FunctionCall [[TYPE_INT_32]] [[CALL_32_Y_X:[0-9]+]] [[NAME_FSHR_FUNC_32]] [[Y]] [[X]] [[CONST_ROTATE_32]] + ; CHECK-LLVM: call i32 @llvm.fshr.i32 + %1 = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 8) + ; CHECK-SPIRV: IAdd [[TYPE_INT_32]] [[ADD_32:[0-9]+]] [[CALL_32_X_Y]] [[CALL_32_Y_X]] + %sum = add i32 %0, %1 + ; CHECK-SPIRV: ReturnValue [[ADD_32]] + ret i32 %sum +} + +; CHECK-SPIRV: Function [[TYPE_INT_32]] [[NAME_FSHR_FUNC_32]] {{[0-9]+}} [[TYPE_FSHR_FUNC_32]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[X_ARG:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[Y_ARG:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[ROT:[0-9]+]] + +; CHECK-SPIRV: UMod [[TYPE_INT_32]] [[ROTATE_MOD_SIZE:[0-9]+]] [[ROT]] [[CONST_TYPE_SIZE_32]] +; CHECK-SPIRV: ShiftRightLogical [[TYPE_INT_32]] [[Y_SHIFT_RIGHT:[0-9]+]] [[Y_ARG]] [[ROTATE_MOD_SIZE]] +; CHECK-SPIRV: ISub [[TYPE_INT_32]] [[NEG_ROTATE:[0-9]+]] [[CONST_TYPE_SIZE_32]] [[ROTATE_MOD_SIZE]] +; CHECK-SPIRV: ShiftLeftLogical [[TYPE_INT_32]] [[X_SHIFT_LEFT:[0-9]+]] [[X_ARG]] [[NEG_ROTATE]] +; CHECK-SPIRV: BitwiseOr [[TYPE_INT_32]] [[FSHR_RESULT:[0-9]+]] [[Y_SHIFT_RIGHT]] [[X_SHIFT_LEFT]] +; CHECK-SPIRV: ReturnValue [[FSHR_RESULT]] + +; Function Attrs: nounwind readnone +; CHECK-SPIRV: Function [[TYPE_INT_16]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_16]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[X:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[Y:[0-9]+]] +define spir_func i16 @Test_i16(i16 %x, i16 %y) local_unnamed_addr #0 { +entry: + ; CHECK-SPIRV: FunctionCall [[TYPE_INT_16]] [[CALL_16:[0-9]+]] [[NAME_FSHR_FUNC_16]] [[X]] [[Y]] [[CONST_ROTATE_16]] + ; CHECK-LLVM: call i16 @llvm.fshr.i16 + %0 = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 8) + ; CHECK-SPIRV: ReturnValue [[CALL_16]] + ret i16 %0 +} + +; Just check that the function for i16 was generated as such - we've checked the logic for another type. +; CHECK-SPIRV: Function [[TYPE_INT_16]] [[NAME_FSHR_FUNC_16]] {{[0-9]+}} [[TYPE_FSHR_FUNC_16]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[X_ARG:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[Y_ARG:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[ROT:[0-9]+]] + +; CHECK-SPIRV: Function [[TYPE_VEC_INT_16]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_VEC_INT_16]] +; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[X:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[Y:[0-9]+]] +define spir_func <2 x i16> @Test_v2i16(<2 x i16> %x, <2 x i16> %y) local_unnamed_addr #0 { +entry: + ; CHECK-SPIRV: FunctionCall [[TYPE_VEC_INT_16]] [[CALL_VEC_INT_16:[0-9]+]] [[NAME_FSHR_FUNC_VEC_INT_16]] [[X]] [[Y]] [[CONST_ROTATE_VEC_INT_16]] + ; CHECK-LLVM: call <2 x i16> @llvm.fshr.v2i16 + %0 = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> ) + ; CHECK-SPIRV: ReturnValue [[CALL_VEC_INT_16]] + ret <2 x i16> %0 +} + +; Just check that the function for v2i16 was generated as such - we've checked the logic for another type. +; CHECK-SPIRV: Function [[TYPE_VEC_INT_16]] [[NAME_FSHR_FUNC_VEC_INT_16]] {{[0-9]+}} [[TYPE_FSHR_FUNC_VEC_INT_16]] +; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[X_ARG:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[Y_ARG:[0-9]+]] +; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[ROT:[0-9]+]] + +; Function Attrs: nounwind readnone speculatable willreturn +declare i32 @llvm.fshr.i32(i32, i32, i32) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare i16 @llvm.fshr.i16(i16, i16, i16) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2}