llvm · dtcxzyw · May 13, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 12, 2024
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
@@ -1838,7 +1838,7 @@ template <typename Op_t> struct ElementWiseBitCast_match {
   ElementWiseBitCast_match(const Op_t &OpMatch) : Op(OpMatch) {}
 
   template <typename OpTy> bool match(OpTy *V) {
-    BitCastInst *I = dyn_cast<BitCastInst>(V);
+    auto *I = dyn_cast<BitCastInst>(V);
     if (!I)
       return false;
     Type *SrcType = I->getSrcTy();

@@ -1111,6 +1111,42 @@ static void computeKnownBitsFromOperator(const Operator *I,
       break;
     }
 
+    const Value *V;
+    // Handle bitcast from floating point to integer.
+    if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
+        V->getType()->isFPOrFPVectorTy()) {
+      Type *FPType = V->getType()->getScalarType();
+      KnownFPClass Result = computeKnownFPClass(V, fcAllFlags, Depth + 1, Q);
+      FPClassTest FPClasses = Result.KnownFPClasses;
+
+      if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
+        Known.Zero.setAllBits();
+        Known.One.setAllBits();
+
+        if (FPClasses & fcInf)
+          Known = Known.intersectWith(KnownBits::makeConstant(
+              APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
+
+        if (FPClasses & fcZero)
+          Known = Known.intersectWith(KnownBits::makeConstant(
+              APInt::getZero(FPType->getScalarSizeInBits())));
+      }
+
+      if (Result.SignBit) {
+        if (*Result.SignBit)
+          Known.makeNegative();
+        else
+          Known.makeNonNegative();
+      } else {
+        Known.Zero.clearSignBit();
+        Known.One.clearSignBit();
+      }
+
+      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+      break;
+    }
+
     // Handle cast from vector integer type to scalar or vector integer.
     auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
     if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -2216,7 +2216,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
 ; CHECK-NEXT:    ret float [[TMP5]]
 ;
@@ -2304,7 +2304,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
 ; CHECK-NEXT:    ret float [[TMP5]]
 ;
@@ -2353,7 +2353,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
 ; CHECK-NEXT:    ret float [[TMP5]]
 ;
@@ -2376,7 +2376,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
 ; CHECK-NEXT:    ret float [[TMP5]]
 ;
@@ -2399,7 +2399,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[TMP5]]
 ;
@@ -2448,7 +2448,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[TMP5]]
 ;
@@ -2560,7 +2560,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float [[X]] to i32
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
 ; CHECK-NEXT:    ret float [[TMP5]]
 ;

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -680,7 +680,7 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[X]] to i32
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
@@ -703,7 +703,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[TMP3]]
 ;
@@ -772,7 +772,7 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half [[X]] to i16
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = or i16 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint i16 [[__POW_SIGN]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
 ; CHECK-NEXT:    ret half [[TMP3]]
 ;
@@ -795,7 +795,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
-; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i16> [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint <2 x i16> [[__POW_SIGN]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
 ; CHECK-NEXT:    ret <2 x half> [[TMP3]]
 ;
@@ -829,7 +829,7 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[X]] to i32
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
@@ -1075,7 +1075,7 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[X]] to i32
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;

diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -360,7 +360,7 @@ declare half @_Z4pownDhi(half, i32)
 ; GCN-NATIVE: %0 = bitcast half %x to i16
 ; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
 ; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
-; GCN-NATIVE: %2 = or i16 %__pow_sign, %1
+; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
 ; GCN-NATIVE: %3 = bitcast i16 %2 to half
 define half @test_pown_f16(half %x, i32 %y) {
 entry:
@@ -378,7 +378,7 @@ declare float @_Z4pownfi(float, i32)
 ; GCN: %[[r0:.*]] = bitcast float %tmp to i32
 ; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
 ; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
-; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
 ; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
 define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
 entry:
@@ -414,7 +414,7 @@ entry:
 ; GCN: %[[r0:.*]] = bitcast float %tmp to i32
 ; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
 ; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
-; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
 ; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
 define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
 entry:
@@ -438,7 +438,7 @@ declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
 ; GCN: %1 = bitcast half %x to i16
 ; GCN: %__pow_sign = and i16 %1, -32768
 ; GCN: %2 = bitcast half %__exp2 to i16
-; GCN: %3 = or i16 %__pow_sign, %2
+; GCN: %3 = or disjoint i16 %__pow_sign, %2
 ; GCN: %4 = bitcast i16 %3 to half
 define half @test_pow_fast_f16__y_13(half %x) {
   %powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
@@ -453,7 +453,7 @@ define half @test_pow_fast_f16__y_13(half %x) {
 ; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
 ; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
 ; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
-; GCN: %3 = or <2 x i16> %__pow_sign, %2
+; GCN: %3 = or disjoint <2 x i16> %__pow_sign, %2
 ; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
 define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
   %powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)