Translate the llvm.fshr intrinsic function

Add llvm.fshr intrinsic translation, similar to llvm.fshr translation. Add test for that A detailed description of FSHR can be found at https://llvm.org/docs/LangRef.html#llvm-fshr-intrinsic Signed-off-by: amochalo <anastasiya.mochalova@intel.com>
KhronosGroup · Apr 16, 2021 · 5071ae9 · 5071ae9
1 parent 4418e70
commit 5071ae9
Show file tree

Hide file tree

Showing 2 changed files with 170 additions and 38 deletions.
diff --git a/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/lib/SPIRV/SPIRVRegularizeLLVM.cpp
@@ -83,21 +83,22 @@ class SPIRVRegularizeLLVMBase {
   /// @spirv.llvm_memset_* and replace it with @llvm.memset.
   void lowerMemset(MemSetInst *MSI);
 
-  /// No SPIR-V counterpart for @llvm.fshl.* intrinsic. It will be lowered
-  /// to a newly generated @spirv.llvm_fshl_* function.
+  /// No SPIR-V counterpart for @llvm.fshl.*(@llvm.fshr.*) intrinsic. It will be
+  /// lowered to a newly generated @spirv.llvm_fshl_*(@spirv.llvm_fshr_*)
+  /// function.
   ///
-  /// Conceptually, FSHL:
+  /// Conceptually, FSHL (FSHR):
   /// 1. concatenates the ints, the first one being the more significant;
-  /// 2. performs a left shift-rotate on the resulting doubled-sized int;
-  /// 3. returns the most significant bits of the shift-rotate result,
+  /// 2. performs a left (right) shift-rotate on the resulting doubled-sized
+  /// int;
+  /// 3. returns the most (least) significant bits of the shift-rotate result,
   ///    the number of bits being equal to the size of the original integers.
-  /// If FSHL operates on a vector type instead, the same operations are
+  /// If FSHL (FSHR) operates on a vector type instead, the same operations are
   /// performed for each set of corresponding vector elements.
   ///
   /// The actual implementation algorithm will be slightly different for
   /// simplification purposes.
-  void lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic);
-  void buildFunnelShiftLeftFunc(Function *FSHLFunc);
+  void lowerFunnelShift(IntrinsicInst *FSHIntrinsic);
 
   void lowerUMulWithOverflow(IntrinsicInst *UMulIntrinsic);
   void buildUMulWithOverflowFunc(Function *UMulFunc);
@@ -188,13 +189,25 @@ void SPIRVRegularizeLLVMBase::lowerMemset(MemSetInst *MSI) {
   return;
 }
 
-void SPIRVRegularizeLLVMBase::buildFunnelShiftLeftFunc(Function *FSHLFunc) {
-  if (!FSHLFunc->empty())
+void SPIRVRegularizeLLVMBase::lowerFunnelShift(IntrinsicInst *FSHIntrinsic) {
+  // Get a separate function - otherwise, we'd have to rework the CFG of the
+  // current one. Then simply replace the intrinsic uses with a call to the new
+  // function.
+  // Expected LLVM IR for the function: i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i*
+  // %c)
+  FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType();
+  Type *FSHRetTy = FSHFuncTy->getReturnType();
+  const std::string FuncName = lowerLLVMIntrinsicName(FSHIntrinsic);
+  Function *FSHFunc =
+      getOrCreateFunction(M, FSHRetTy, FSHFuncTy->params(), FuncName);
+
+  if (!FSHFunc->empty()) {
+    FSHIntrinsic->setCalledFunction(FSHFunc);
     return;
-
-  auto *RotateBB = BasicBlock::Create(M->getContext(), "rotate", FSHLFunc);
+  }
+  auto *RotateBB = BasicBlock::Create(M->getContext(), "rotate", FSHFunc);
   IRBuilder<> Builder(RotateBB);
-  Type *Ty = FSHLFunc->getReturnType();
+  Type *Ty = FSHFunc->getReturnType();
   // Build the actual funnel shift rotate logic.
   // In the comments, "int" is used interchangeably with "vector of int
   // elements".
@@ -207,33 +220,35 @@ void SPIRVRegularizeLLVMBase::buildFunnelShiftLeftFunc(Function *FSHLFunc) {
                                            BitWidthConstant)
                : BitWidthConstant;
   auto *RotateModVal =
-      Builder.CreateURem(/*Rotate*/ FSHLFunc->getArg(2), BitWidthForInsts);
-  // Shift the more significant number left, the "rotate" number of bits
-  // will be 0-filled on the right as a result of this regular shift.
-  auto *ShiftLeft = Builder.CreateShl(FSHLFunc->getArg(0), RotateModVal);
-  // We want the "rotate" number of the second int's MSBs to occupy the
-  // rightmost "0 space" left by the previous operation. Therefore,
-  // subtract the "rotate" number from the integer bitsize...
+      Builder.CreateURem(/*Rotate*/ FSHFunc->getArg(2), BitWidthForInsts);
+  Value *FirstShift = nullptr, *SecShift = nullptr;
+  if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr)
+    // Shift the less significant number right, the "rotate" number of bits
+    // will be 0-filled on the left as a result of this regular shift.
+    FirstShift = Builder.CreateLShr(FSHFunc->getArg(1), RotateModVal);
+  else
+    // Shift the more significant number left, the "rotate" number of bits
+    // will be 0-filled on the right as a result of this regular shift.
+    FirstShift = Builder.CreateShl(FSHFunc->getArg(0), RotateModVal);
+
+  // We want the "rotate" number of the more significant int's LSBs (MSBs) to
+  // occupy the leftmost (rightmost) "0 space" left by the previous operation.
+  // Therefore, subtract the "rotate" number from the integer bitsize...
   auto *SubRotateVal = Builder.CreateSub(BitWidthForInsts, RotateModVal);
-  // ...and right-shift the second int by this number, zero-filling the MSBs.
-  auto *ShiftRight = Builder.CreateLShr(FSHLFunc->getArg(1), SubRotateVal);
+  if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr)
+    // ...and left-shift the more significant int by this number, zero-filling
+    // the LSBs.
+    SecShift = Builder.CreateShl(FSHFunc->getArg(0), SubRotateVal);
+  else
+    // ...and right-shift the less significant int by this number, zero-filling
+    // the MSBs.
+    SecShift = Builder.CreateLShr(FSHFunc->getArg(1), SubRotateVal);
+
   // A simple binary addition of the shifted ints yields the final result.
-  auto *FunnelShiftRes = Builder.CreateOr(ShiftLeft, ShiftRight);
+  auto *FunnelShiftRes = Builder.CreateOr(FirstShift, SecShift);
   Builder.CreateRet(FunnelShiftRes);
-}
 
-void SPIRVRegularizeLLVMBase::lowerFunnelShiftLeft(
-    IntrinsicInst *FSHLIntrinsic) {
-  // Get a separate function - otherwise, we'd have to rework the CFG of the
-  // current one. Then simply replace the intrinsic uses with a call to the new
-  // function.
-  FunctionType *FSHLFuncTy = FSHLIntrinsic->getFunctionType();
-  Type *FSHLRetTy = FSHLFuncTy->getReturnType();
-  const std::string FuncName = lowerLLVMIntrinsicName(FSHLIntrinsic);
-  Function *FSHLFunc =
-      getOrCreateFunction(M, FSHLRetTy, FSHLFuncTy->params(), FuncName);
-  buildFunnelShiftLeftFunc(FSHLFunc);
-  FSHLIntrinsic->setCalledFunction(FSHLFunc);
+  FSHIntrinsic->setCalledFunction(FSHFunc);
 }
 
 void SPIRVRegularizeLLVMBase::buildUMulWithOverflowFunc(Function *UMulFunc) {
@@ -312,8 +327,9 @@ bool SPIRVRegularizeLLVMBase::regularize() {
             auto *II = cast<IntrinsicInst>(Call);
             if (auto *MSI = dyn_cast<MemSetInst>(II))
               lowerMemset(MSI);
-            else if (II->getIntrinsicID() == Intrinsic::fshl)
-              lowerFunnelShiftLeft(II);
+            else if (II->getIntrinsicID() == Intrinsic::fshl ||
+                     II->getIntrinsicID() == Intrinsic::fshr)
+              lowerFunnelShift(II);
             else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow)
               lowerUMulWithOverflow(II);
           }

diff --git a/test/llvm-intrinsics/fshr.ll b/test/llvm-intrinsics/fshr.ll
@@ -0,0 +1,116 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: llvm-spirv %t.bc -o %t.spv
+; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknown-unknown"
+
+; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_32:[0-9]+]] "spirv.llvm_fshr_i32"
+; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_16:[0-9]+]] "spirv.llvm_fshr_i16"
+; CHECK-SPIRV: Name [[NAME_FSHR_FUNC_VEC_INT_16:[0-9]+]] "spirv.llvm_fshr_v2i16"
+; CHECK-SPIRV: TypeInt [[TYPE_INT_32:[0-9]+]] 32 0
+; CHECK-SPIRV: TypeInt [[TYPE_INT_16:[0-9]+]] 16 0
+; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[CONST_ROTATE_32:[0-9]+]] 8
+; CHECK-SPIRV-DAG: Constant [[TYPE_INT_16]] [[CONST_ROTATE_16:[0-9]+]] 8
+; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[CONST_TYPE_SIZE_32:[0-9]+]] 32
+; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_32:[0-9]+]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]]
+; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_32:[0-9]+]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]]
+; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_16:[0-9]+]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]]
+; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_16:[0-9]+]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]]
+; CHECK-SPIRV: TypeVector [[TYPE_VEC_INT_16:[0-9]+]] [[TYPE_INT_16]] 2
+; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_VEC_INT_16:[0-9]+]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]]
+; CHECK-SPIRV: TypeFunction [[TYPE_FSHR_FUNC_VEC_INT_16:[0-9]+]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]]
+; CHECK-SPIRV: ConstantComposite [[TYPE_VEC_INT_16]] [[CONST_ROTATE_VEC_INT_16:[0-9]+]] [[CONST_ROTATE_16]] [[CONST_ROTATE_16]]
+
+; On LLVM level, we'll check that the intrinsics were generated again in reverse translation,
+; replacing the SPIR-V level implementations.
+; CHECK-LLVM-NOT: declare {{.*}} @spirv.llvm_fshr_{{.*}}
+
+; Function Attrs: nounwind readnone
+; CHECK-SPIRV: Function [[TYPE_INT_32]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_32]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[X:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[Y:[0-9]+]]
+define spir_func i32 @Test_i32(i32 %x, i32 %y) local_unnamed_addr #0 {
+entry:
+  ; CHECK-SPIRV: FunctionCall [[TYPE_INT_32]] [[CALL_32_X_Y:[0-9]+]] [[NAME_FSHR_FUNC_32]] [[X]] [[Y]] [[CONST_ROTATE_32]]
+  ; CHECK-LLVM: call i32 @llvm.fshr.i32
+  %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 8)
+  ; CHECK-SPIRV: FunctionCall [[TYPE_INT_32]] [[CALL_32_Y_X:[0-9]+]] [[NAME_FSHR_FUNC_32]] [[Y]] [[X]] [[CONST_ROTATE_32]]
+  ; CHECK-LLVM: call i32 @llvm.fshr.i32
+  %1 = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 8)
+  ; CHECK-SPIRV: IAdd [[TYPE_INT_32]] [[ADD_32:[0-9]+]] [[CALL_32_X_Y]] [[CALL_32_Y_X]]
+  %sum = add i32 %0, %1
+  ; CHECK-SPIRV: ReturnValue [[ADD_32]]
+  ret i32 %sum
+}
+
+; CHECK-SPIRV: Function [[TYPE_INT_32]] [[NAME_FSHR_FUNC_32]] {{[0-9]+}} [[TYPE_FSHR_FUNC_32]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[X_ARG:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[Y_ARG:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[ROT:[0-9]+]]
+
+; CHECK-SPIRV: UMod [[TYPE_INT_32]] [[ROTATE_MOD_SIZE:[0-9]+]] [[ROT]] [[CONST_TYPE_SIZE_32]]
+; CHECK-SPIRV: ShiftRightLogical [[TYPE_INT_32]] [[Y_SHIFT_RIGHT:[0-9]+]] [[Y_ARG]] [[ROTATE_MOD_SIZE]]
+; CHECK-SPIRV: ISub [[TYPE_INT_32]] [[NEG_ROTATE:[0-9]+]] [[CONST_TYPE_SIZE_32]] [[ROTATE_MOD_SIZE]]
+; CHECK-SPIRV: ShiftLeftLogical [[TYPE_INT_32]] [[X_SHIFT_LEFT:[0-9]+]] [[X_ARG]] [[NEG_ROTATE]]
+; CHECK-SPIRV: BitwiseOr [[TYPE_INT_32]] [[FSHR_RESULT:[0-9]+]] [[Y_SHIFT_RIGHT]] [[X_SHIFT_LEFT]]
+; CHECK-SPIRV: ReturnValue [[FSHR_RESULT]]
+
+; Function Attrs: nounwind readnone
+; CHECK-SPIRV: Function [[TYPE_INT_16]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_16]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[X:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[Y:[0-9]+]]
+define spir_func i16 @Test_i16(i16 %x, i16 %y) local_unnamed_addr #0 {
+entry:
+  ; CHECK-SPIRV: FunctionCall [[TYPE_INT_16]] [[CALL_16:[0-9]+]] [[NAME_FSHR_FUNC_16]] [[X]] [[Y]] [[CONST_ROTATE_16]]
+  ; CHECK-LLVM: call i16 @llvm.fshr.i16
+  %0 = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 8)
+  ; CHECK-SPIRV: ReturnValue [[CALL_16]]
+  ret i16 %0
+}
+
+; Just check that the function for i16 was generated as such - we've checked the logic for another type.
+; CHECK-SPIRV: Function [[TYPE_INT_16]] [[NAME_FSHR_FUNC_16]] {{[0-9]+}} [[TYPE_FSHR_FUNC_16]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[X_ARG:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[Y_ARG:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[ROT:[0-9]+]]
+
+; CHECK-SPIRV: Function [[TYPE_VEC_INT_16]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_VEC_INT_16]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[X:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[Y:[0-9]+]]
+define spir_func <2 x i16> @Test_v2i16(<2 x i16> %x, <2 x i16> %y) local_unnamed_addr #0 {
+entry:
+  ; CHECK-SPIRV: FunctionCall [[TYPE_VEC_INT_16]] [[CALL_VEC_INT_16:[0-9]+]] [[NAME_FSHR_FUNC_VEC_INT_16]] [[X]] [[Y]] [[CONST_ROTATE_VEC_INT_16]]
+  ; CHECK-LLVM: call <2 x i16> @llvm.fshr.v2i16
+  %0 = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> <i16 8, i16 8>)
+  ; CHECK-SPIRV: ReturnValue [[CALL_VEC_INT_16]]
+  ret <2 x i16> %0
+}
+
+; Just check that the function for v2i16 was generated as such - we've checked the logic for another type.
+; CHECK-SPIRV: Function [[TYPE_VEC_INT_16]] [[NAME_FSHR_FUNC_VEC_INT_16]] {{[0-9]+}} [[TYPE_FSHR_FUNC_VEC_INT_16]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[X_ARG:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[Y_ARG:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[ROT:[0-9]+]]
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare i32 @llvm.fshr.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare i16 @llvm.fshr.i16(i16, i16, i16) #1
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #1
+
+attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 0}
+!2 = !{i32 1, i32 2}