-
Notifications
You must be signed in to change notification settings - Fork 13.5k
Reland "[ValueTracking] Compute knownbits from known fp classes" #92084
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch relands #86409. I mistakenly thought that Patch is 21.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92084.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 171ddab977dea..0d6d86cb47e67 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -1904,7 +1904,7 @@ template <typename Op_t> struct ElementWiseBitCast_match {
ElementWiseBitCast_match(const Op_t &OpMatch) : Op(OpMatch) {}
template <typename OpTy> bool match(OpTy *V) {
- BitCastInst *I = dyn_cast<BitCastInst>(V);
+ auto *I = dyn_cast<BitCastInst>(V);
if (!I)
return false;
Type *SrcType = I->getSrcTy();
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 375385aca7a39..99b621461fcdc 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1118,6 +1118,42 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
+ const Value *V;
+ // Handle bitcast from floating point to integer.
+ if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
+ V->getType()->isFPOrFPVectorTy()) {
+ Type *FPType = V->getType()->getScalarType();
+ KnownFPClass Result = computeKnownFPClass(V, fcAllFlags, Depth + 1, Q);
+ FPClassTest FPClasses = Result.KnownFPClasses;
+
+ if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+
+ if (FPClasses & fcInf)
+ Known = Known.intersectWith(KnownBits::makeConstant(
+ APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
+
+ if (FPClasses & fcZero)
+ Known = Known.intersectWith(KnownBits::makeConstant(
+ APInt::getZero(FPType->getScalarSizeInBits())));
+
+ Known.Zero.clearSignBit();
+ Known.One.clearSignBit();
+ }
+
+ if (Result.SignBit) {
+ if (*Result.SignBit)
+ Known.makeNegative();
+ else
+ Known.makeNonNegative();
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ break;
+ }
+
// Handle cast from vector integer type to scalar or vector integer.
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index c4bd4bc126f73..5db25a59d33fc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -2216,7 +2216,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2304,7 +2304,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2353,7 +2353,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2376,7 +2376,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2399,7 +2399,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP5]]
;
@@ -2448,7 +2448,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP5]]
;
@@ -2560,7 +2560,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index 8ddaf243db92c..e298226ee7ccd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -680,7 +680,7 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
@@ -703,7 +703,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP3]]
;
@@ -772,7 +772,7 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
-; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i16 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
; CHECK-NEXT: ret half [[TMP3]]
;
@@ -795,7 +795,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i16> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
; CHECK-NEXT: ret <2 x half> [[TMP3]]
;
@@ -829,7 +829,7 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
@@ -1075,7 +1075,7 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 204c8140d3f17..54ca33401ccf4 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -360,7 +360,7 @@ declare half @_Z4pownDhi(half, i32)
; GCN-NATIVE: %0 = bitcast half %x to i16
; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
-; GCN-NATIVE: %2 = or i16 %__pow_sign, %1
+; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
; GCN-NATIVE: %3 = bitcast i16 %2 to half
define half @test_pown_f16(half %x, i32 %y) {
entry:
@@ -378,7 +378,7 @@ declare float @_Z4pownfi(float, i32)
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
-; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
entry:
@@ -414,7 +414,7 @@ entry:
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
-; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
entry:
@@ -438,7 +438,7 @@ declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
; GCN: %1 = bitcast half %x to i16
; GCN: %__pow_sign = and i16 %1, -32768
; GCN: %2 = bitcast half %__exp2 to i16
-; GCN: %3 = or i16 %__pow_sign, %2
+; GCN: %3 = or disjoint i16 %__pow_sign, %2
; GCN: %4 = bitcast i16 %3 to half
define half @test_pow_fast_f16__y_13(half %x) {
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
@@ -453,7 +453,7 @@ define half @test_pow_fast_f16__y_13(half %x) {
; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
-; GCN: %3 = or <2 x i16> %__pow_sign, %2
+; GCN: %3 = or disjoint <2 x i16> %__pow_sign, %2
; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index 8b4249b2c25a9..7a38020500517 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -1374,5 +1374,289 @@ define i8 @nonzero_reduce_xor_vscale_odd(<vscale x 3 x i8> %xx) {
ret i8 %r
}
+define i1 @test_sign_pos(float %x) {
+; CHECK-LABEL: @test_sign_pos(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %y = bitcast float %fabs to i32
+ %sign = icmp sgt i32 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_pos_half(half %x) {
+; CHECK-LABEL: @test_sign_pos_half(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call half @llvm.fabs.f16(half %x)
+ %y = bitcast half %fabs to i16
+ %sign = icmp sgt i16 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_pos_half_non_elementwise(<2 x half> %x) {
+; CHECK-LABEL: @test_sign_pos_half_non_elementwise(
+; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
+; CHECK-NEXT: [[Y:%.*]] = bitcast <2 x half> [[FABS]] to i32
+; CHECK-NEXT: [[SIGN:%.*]] = icmp sgt i32 [[Y]], -1
+; CHECK-NEXT: ret i1 [[SIGN]]
+;
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
+ %y = bitcast <2 x half> %fabs to i32
+ %sign = icmp sgt i32 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_neg(float %x) {
+; CHECK-LABEL: @test_sign_neg(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %fnabs = fneg float %fabs
+ %y = bitcast float %fnabs to i32
+ %sign = icmp slt i32 %y, 0
+ ret i1 %sign
+}
+
+define <2 x i1> @test_sign_pos_vec(<2 x float> %x) {
+; CHECK-LABEL: @test_sign_pos_vec(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+;
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
+ %y = bitcast <2 x float> %fabs to <2 x i32>
+ %sign = icmp slt <2 x i32> %y, zeroinitializer
+ ret <2 x i1> %sign
+}
+
+define i32 @test_inf_only(float nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only(
+; CHECK-NEXT: ret i32 2139095040
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2147483647
+ ret i32 %and
+}
+
+define i16 @test_inf_only_bfloat(bfloat nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only_bfloat(
+; CHECK-NEXT: ret i16 32640
+;
+ %y = bitcast bfloat %x to i16
+ %and = and i16 %y, 32767
+ ret i16 %and
+}
+
+define i128 @test_inf_only_ppc_fp128(ppc_fp128 nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only_ppc_fp128(
+; CHECK-NEXT: ret i128 9218868437227405312
+;
+ %y = bitcast ppc_fp128 %x to i128
+ %and = and i128 %y, 170141183460469231731687303715884105727
+ ret i128 %and
+}
+
+define i32 @test_zero_only(float nofpclass(nan sub norm inf) %x) {
+; CHECK-LABEL: @test_zero_only(
+; CHECK-NEXT: ret i32 0
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2147483647
+ ret i32 %and
+}
+
+define i80 @test_zero_only_non_ieee(x86_fp80 nofpclass(nan sub norm inf) %x) {
+; CHECK-LABEL: @test_zero_only_non_ieee(
+; CHECK-NEXT: ret i80 0
+;
+ %y = bitcast x86_fp80 %x to i80
+ %and = and i80 %y, 604462909807314587353087
+ ret i80 %and
+}
+
+define i32 @test_inf_nan_only(float nofpclass(sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_nan_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 2130706432
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2130706432
+ ret i32 %and
+}
+
+define i32 @test_sub_zero_only(float nofpclass(nan norm inf) %x) {
+; CHECK-LABEL: @test_sub_zero_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 2130706432
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2130706432
+ ret i32 %and
+}
+
+define i32 @test_inf_zero_only(float nofpclass(nan norm sub) %x) {
+; CHECK-LABEL: @test_inf_zero_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 8388608
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 16777215
+ ret i32 %and
+}
+
+; Make sure that the signbit is cleared.
+define i32 @test_ninf_only(double %x) {
+; CHECK-LABEL: @test_ninf_only(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[X:%.*]], 0xFFF0000000000000
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: ret i32 0
+; CHECK: if.else:
+; CHECK-NEXT: ret i32 0
+;
+ %cmp = fcmp oeq double %x, 0xFFF0000000000000
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %cast = bitcast double %x to i64
+ %trunc = trunc i64 %cast to i32
+ ret i32 %trunc
+
+if.else:
+ ret i32 0
+}
+
+define i1 @test_simplify_icmp(i32 %x) {
+; CHECK-LABEL: @test_simplify_icmp(
+; CHECK-NEXT: ret i1 false
+;
+ %cast1 = uitofp i32 %x to double
+ %cast2 = bitcast double %cast1 to i64
+ %mask = and i64 %cast2, -140737488355328
+ %cmp = icmp eq i64 %mask, -1970324836974592
+ ret i1 %cmp
+}
+
+define i32 @test_snan_quiet_bit1(float nofpclass(sub norm inf qnan) %x) {
+; CHECK-LABEL: @test_snan_quiet_bit1(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 4194304
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 4194304
+ ret i32 %masked
+}
+
+define i32 @test_snan_quiet_bit2(float nofpclass(sub norm inf qnan) %x) {
+; CHECK-LABEL: @test_snan_quiet_bit2(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 2097152
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 2097152
+ ret i32 %masked
+}
+
+define i32 @test_qnan_quiet_bit1(float nofpclass(sub norm inf snan) %x) {
+; CHECK-LABEL: @test_qnan_quiet_bit1(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 4194304
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 4194304
+ ret i32 %masked
+}
+
+define i32 @test_qnan_quiet_bit2(float nofpclass(sub norm inf snan) %x) {
+; CHECK-LABEL: @test_qnan_quiet_bit2(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 2097152
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 2097152
+ ret i32 %masked
+}
+
+define i16 @test_simplify_mask(i32 %ui, float %x) {
+; CHECK-LABEL: @test_simplify_mask(
+; CHECK-NEXT: [[CONV:%.*]] = uitofp i32 [[UI:%.*]] to float
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[CONV]], [[X:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: ret i16 31744
+; CHECK: if.else:
+; CHECK-NEXT: ret i16 0
+;
+ %conv = uitofp i32 %ui to float
+ %cmp = fcmp olt float %x, %conv
+ br i1 %cmp, label %if.else, label %if.end
+
+if.end:
+ %cast = bitcast float %conv to i32
+ %shr = lshr i32 %cast, 16
+ %trunc = trunc i32 %shr to i16
+ %and = and i16 %trunc, -32768
+ %or = or disjoint i16 %and, 31744
+ ret i16 %or
+
+if.else:
+ ret i16 0
+}
+
+; TODO: %cmp always evaluates to false
+
+define i1 @test_simplify_icmp2(double %x) {
+; CHECK-LABEL: @test_simplify_icmp2(
+; CHECK-NEXT: [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[CAST:%.*]] = bitcast double [[X]] to i64
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[CAST]], 3458764513820540928
+; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK: if.else:
+; CHECK-NEXT: ret i1 false
+;
+ %ab...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Yingwei Zheng (dtcxzyw) ChangesThis patch relands #86409. I mistakenly thought that Patch is 21.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92084.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 171ddab977dea..0d6d86cb47e67 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -1904,7 +1904,7 @@ template <typename Op_t> struct ElementWiseBitCast_match {
ElementWiseBitCast_match(const Op_t &OpMatch) : Op(OpMatch) {}
template <typename OpTy> bool match(OpTy *V) {
- BitCastInst *I = dyn_cast<BitCastInst>(V);
+ auto *I = dyn_cast<BitCastInst>(V);
if (!I)
return false;
Type *SrcType = I->getSrcTy();
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 375385aca7a39..99b621461fcdc 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1118,6 +1118,42 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
+ const Value *V;
+ // Handle bitcast from floating point to integer.
+ if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
+ V->getType()->isFPOrFPVectorTy()) {
+ Type *FPType = V->getType()->getScalarType();
+ KnownFPClass Result = computeKnownFPClass(V, fcAllFlags, Depth + 1, Q);
+ FPClassTest FPClasses = Result.KnownFPClasses;
+
+ if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+
+ if (FPClasses & fcInf)
+ Known = Known.intersectWith(KnownBits::makeConstant(
+ APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
+
+ if (FPClasses & fcZero)
+ Known = Known.intersectWith(KnownBits::makeConstant(
+ APInt::getZero(FPType->getScalarSizeInBits())));
+
+ Known.Zero.clearSignBit();
+ Known.One.clearSignBit();
+ }
+
+ if (Result.SignBit) {
+ if (*Result.SignBit)
+ Known.makeNegative();
+ else
+ Known.makeNonNegative();
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ break;
+ }
+
// Handle cast from vector integer type to scalar or vector integer.
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index c4bd4bc126f73..5db25a59d33fc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -2216,7 +2216,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2304,7 +2304,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2353,7 +2353,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2376,7 +2376,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2399,7 +2399,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP5]]
;
@@ -2448,7 +2448,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP5]]
;
@@ -2560,7 +2560,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index 8ddaf243db92c..e298226ee7ccd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -680,7 +680,7 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
@@ -703,7 +703,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP3]]
;
@@ -772,7 +772,7 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
-; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i16 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
; CHECK-NEXT: ret half [[TMP3]]
;
@@ -795,7 +795,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i16> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
; CHECK-NEXT: ret <2 x half> [[TMP3]]
;
@@ -829,7 +829,7 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
@@ -1075,7 +1075,7 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 204c8140d3f17..54ca33401ccf4 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -360,7 +360,7 @@ declare half @_Z4pownDhi(half, i32)
; GCN-NATIVE: %0 = bitcast half %x to i16
; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
-; GCN-NATIVE: %2 = or i16 %__pow_sign, %1
+; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
; GCN-NATIVE: %3 = bitcast i16 %2 to half
define half @test_pown_f16(half %x, i32 %y) {
entry:
@@ -378,7 +378,7 @@ declare float @_Z4pownfi(float, i32)
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
-; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
entry:
@@ -414,7 +414,7 @@ entry:
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
-; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
entry:
@@ -438,7 +438,7 @@ declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
; GCN: %1 = bitcast half %x to i16
; GCN: %__pow_sign = and i16 %1, -32768
; GCN: %2 = bitcast half %__exp2 to i16
-; GCN: %3 = or i16 %__pow_sign, %2
+; GCN: %3 = or disjoint i16 %__pow_sign, %2
; GCN: %4 = bitcast i16 %3 to half
define half @test_pow_fast_f16__y_13(half %x) {
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
@@ -453,7 +453,7 @@ define half @test_pow_fast_f16__y_13(half %x) {
; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
-; GCN: %3 = or <2 x i16> %__pow_sign, %2
+; GCN: %3 = or disjoint <2 x i16> %__pow_sign, %2
; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index 8b4249b2c25a9..7a38020500517 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -1374,5 +1374,289 @@ define i8 @nonzero_reduce_xor_vscale_odd(<vscale x 3 x i8> %xx) {
ret i8 %r
}
+define i1 @test_sign_pos(float %x) {
+; CHECK-LABEL: @test_sign_pos(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %y = bitcast float %fabs to i32
+ %sign = icmp sgt i32 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_pos_half(half %x) {
+; CHECK-LABEL: @test_sign_pos_half(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call half @llvm.fabs.f16(half %x)
+ %y = bitcast half %fabs to i16
+ %sign = icmp sgt i16 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_pos_half_non_elementwise(<2 x half> %x) {
+; CHECK-LABEL: @test_sign_pos_half_non_elementwise(
+; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
+; CHECK-NEXT: [[Y:%.*]] = bitcast <2 x half> [[FABS]] to i32
+; CHECK-NEXT: [[SIGN:%.*]] = icmp sgt i32 [[Y]], -1
+; CHECK-NEXT: ret i1 [[SIGN]]
+;
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
+ %y = bitcast <2 x half> %fabs to i32
+ %sign = icmp sgt i32 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_neg(float %x) {
+; CHECK-LABEL: @test_sign_neg(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %fnabs = fneg float %fabs
+ %y = bitcast float %fnabs to i32
+ %sign = icmp slt i32 %y, 0
+ ret i1 %sign
+}
+
+define <2 x i1> @test_sign_pos_vec(<2 x float> %x) {
+; CHECK-LABEL: @test_sign_pos_vec(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+;
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
+ %y = bitcast <2 x float> %fabs to <2 x i32>
+ %sign = icmp slt <2 x i32> %y, zeroinitializer
+ ret <2 x i1> %sign
+}
+
+define i32 @test_inf_only(float nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only(
+; CHECK-NEXT: ret i32 2139095040
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2147483647
+ ret i32 %and
+}
+
+define i16 @test_inf_only_bfloat(bfloat nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only_bfloat(
+; CHECK-NEXT: ret i16 32640
+;
+ %y = bitcast bfloat %x to i16
+ %and = and i16 %y, 32767
+ ret i16 %and
+}
+
+define i128 @test_inf_only_ppc_fp128(ppc_fp128 nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only_ppc_fp128(
+; CHECK-NEXT: ret i128 9218868437227405312
+;
+ %y = bitcast ppc_fp128 %x to i128
+ %and = and i128 %y, 170141183460469231731687303715884105727
+ ret i128 %and
+}
+
+define i32 @test_zero_only(float nofpclass(nan sub norm inf) %x) {
+; CHECK-LABEL: @test_zero_only(
+; CHECK-NEXT: ret i32 0
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2147483647
+ ret i32 %and
+}
+
+define i80 @test_zero_only_non_ieee(x86_fp80 nofpclass(nan sub norm inf) %x) {
+; CHECK-LABEL: @test_zero_only_non_ieee(
+; CHECK-NEXT: ret i80 0
+;
+ %y = bitcast x86_fp80 %x to i80
+ %and = and i80 %y, 604462909807314587353087
+ ret i80 %and
+}
+
+define i32 @test_inf_nan_only(float nofpclass(sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_nan_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 2130706432
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2130706432
+ ret i32 %and
+}
+
+define i32 @test_sub_zero_only(float nofpclass(nan norm inf) %x) {
+; CHECK-LABEL: @test_sub_zero_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 2130706432
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2130706432
+ ret i32 %and
+}
+
+define i32 @test_inf_zero_only(float nofpclass(nan norm sub) %x) {
+; CHECK-LABEL: @test_inf_zero_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 8388608
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 16777215
+ ret i32 %and
+}
+
+; Make sure that the signbit is cleared.
+define i32 @test_ninf_only(double %x) {
+; CHECK-LABEL: @test_ninf_only(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[X:%.*]], 0xFFF0000000000000
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: ret i32 0
+; CHECK: if.else:
+; CHECK-NEXT: ret i32 0
+;
+ %cmp = fcmp oeq double %x, 0xFFF0000000000000
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %cast = bitcast double %x to i64
+ %trunc = trunc i64 %cast to i32
+ ret i32 %trunc
+
+if.else:
+ ret i32 0
+}
+
+define i1 @test_simplify_icmp(i32 %x) {
+; CHECK-LABEL: @test_simplify_icmp(
+; CHECK-NEXT: ret i1 false
+;
+ %cast1 = uitofp i32 %x to double
+ %cast2 = bitcast double %cast1 to i64
+ %mask = and i64 %cast2, -140737488355328
+ %cmp = icmp eq i64 %mask, -1970324836974592
+ ret i1 %cmp
+}
+
+define i32 @test_snan_quiet_bit1(float nofpclass(sub norm inf qnan) %x) {
+; CHECK-LABEL: @test_snan_quiet_bit1(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 4194304
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 4194304
+ ret i32 %masked
+}
+
+define i32 @test_snan_quiet_bit2(float nofpclass(sub norm inf qnan) %x) {
+; CHECK-LABEL: @test_snan_quiet_bit2(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 2097152
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 2097152
+ ret i32 %masked
+}
+
+define i32 @test_qnan_quiet_bit1(float nofpclass(sub norm inf snan) %x) {
+; CHECK-LABEL: @test_qnan_quiet_bit1(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 4194304
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 4194304
+ ret i32 %masked
+}
+
+define i32 @test_qnan_quiet_bit2(float nofpclass(sub norm inf snan) %x) {
+; CHECK-LABEL: @test_qnan_quiet_bit2(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 2097152
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 2097152
+ ret i32 %masked
+}
+
+define i16 @test_simplify_mask(i32 %ui, float %x) {
+; CHECK-LABEL: @test_simplify_mask(
+; CHECK-NEXT: [[CONV:%.*]] = uitofp i32 [[UI:%.*]] to float
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[CONV]], [[X:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: ret i16 31744
+; CHECK: if.else:
+; CHECK-NEXT: ret i16 0
+;
+ %conv = uitofp i32 %ui to float
+ %cmp = fcmp olt float %x, %conv
+ br i1 %cmp, label %if.else, label %if.end
+
+if.end:
+ %cast = bitcast float %conv to i32
+ %shr = lshr i32 %cast, 16
+ %trunc = trunc i32 %shr to i16
+ %and = and i16 %trunc, -32768
+ %or = or disjoint i16 %and, 31744
+ ret i16 %or
+
+if.else:
+ ret i16 0
+}
+
+; TODO: %cmp always evaluates to false
+
+define i1 @test_simplify_icmp2(double %x) {
+; CHECK-LABEL: @test_simplify_icmp2(
+; CHECK-NEXT: [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[CAST:%.*]] = bitcast double [[X]] to i64
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[CAST]], 3458764513820540928
+; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK: if.else:
+; CHECK-NEXT: ret i1 false
+;
+ %ab...
[truncated]
|
@llvm/pr-subscribers-llvm-analysis Author: Yingwei Zheng (dtcxzyw) ChangesThis patch relands #86409. I mistakenly thought that Patch is 21.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92084.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 171ddab977dea..0d6d86cb47e67 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -1904,7 +1904,7 @@ template <typename Op_t> struct ElementWiseBitCast_match {
ElementWiseBitCast_match(const Op_t &OpMatch) : Op(OpMatch) {}
template <typename OpTy> bool match(OpTy *V) {
- BitCastInst *I = dyn_cast<BitCastInst>(V);
+ auto *I = dyn_cast<BitCastInst>(V);
if (!I)
return false;
Type *SrcType = I->getSrcTy();
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 375385aca7a39..99b621461fcdc 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1118,6 +1118,42 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
+ const Value *V;
+ // Handle bitcast from floating point to integer.
+ if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
+ V->getType()->isFPOrFPVectorTy()) {
+ Type *FPType = V->getType()->getScalarType();
+ KnownFPClass Result = computeKnownFPClass(V, fcAllFlags, Depth + 1, Q);
+ FPClassTest FPClasses = Result.KnownFPClasses;
+
+ if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+
+ if (FPClasses & fcInf)
+ Known = Known.intersectWith(KnownBits::makeConstant(
+ APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
+
+ if (FPClasses & fcZero)
+ Known = Known.intersectWith(KnownBits::makeConstant(
+ APInt::getZero(FPType->getScalarSizeInBits())));
+
+ Known.Zero.clearSignBit();
+ Known.One.clearSignBit();
+ }
+
+ if (Result.SignBit) {
+ if (*Result.SignBit)
+ Known.makeNegative();
+ else
+ Known.makeNonNegative();
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ break;
+ }
+
// Handle cast from vector integer type to scalar or vector integer.
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index c4bd4bc126f73..5db25a59d33fc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -2216,7 +2216,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2304,7 +2304,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2353,7 +2353,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2376,7 +2376,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
@@ -2399,7 +2399,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP5]]
;
@@ -2448,7 +2448,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP5]]
;
@@ -2560,7 +2560,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index 8ddaf243db92c..e298226ee7ccd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -680,7 +680,7 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
@@ -703,7 +703,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP3]]
;
@@ -772,7 +772,7 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
-; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i16 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
; CHECK-NEXT: ret half [[TMP3]]
;
@@ -795,7 +795,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i16> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
; CHECK-NEXT: ret <2 x half> [[TMP3]]
;
@@ -829,7 +829,7 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
@@ -1075,7 +1075,7 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
;
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 204c8140d3f17..54ca33401ccf4 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -360,7 +360,7 @@ declare half @_Z4pownDhi(half, i32)
; GCN-NATIVE: %0 = bitcast half %x to i16
; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
-; GCN-NATIVE: %2 = or i16 %__pow_sign, %1
+; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
; GCN-NATIVE: %3 = bitcast i16 %2 to half
define half @test_pown_f16(half %x, i32 %y) {
entry:
@@ -378,7 +378,7 @@ declare float @_Z4pownfi(float, i32)
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
-; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
entry:
@@ -414,7 +414,7 @@ entry:
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
-; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
entry:
@@ -438,7 +438,7 @@ declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
; GCN: %1 = bitcast half %x to i16
; GCN: %__pow_sign = and i16 %1, -32768
; GCN: %2 = bitcast half %__exp2 to i16
-; GCN: %3 = or i16 %__pow_sign, %2
+; GCN: %3 = or disjoint i16 %__pow_sign, %2
; GCN: %4 = bitcast i16 %3 to half
define half @test_pow_fast_f16__y_13(half %x) {
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
@@ -453,7 +453,7 @@ define half @test_pow_fast_f16__y_13(half %x) {
; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
-; GCN: %3 = or <2 x i16> %__pow_sign, %2
+; GCN: %3 = or disjoint <2 x i16> %__pow_sign, %2
; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index 8b4249b2c25a9..7a38020500517 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -1374,5 +1374,289 @@ define i8 @nonzero_reduce_xor_vscale_odd(<vscale x 3 x i8> %xx) {
ret i8 %r
}
+define i1 @test_sign_pos(float %x) {
+; CHECK-LABEL: @test_sign_pos(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %y = bitcast float %fabs to i32
+ %sign = icmp sgt i32 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_pos_half(half %x) {
+; CHECK-LABEL: @test_sign_pos_half(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call half @llvm.fabs.f16(half %x)
+ %y = bitcast half %fabs to i16
+ %sign = icmp sgt i16 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_pos_half_non_elementwise(<2 x half> %x) {
+; CHECK-LABEL: @test_sign_pos_half_non_elementwise(
+; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
+; CHECK-NEXT: [[Y:%.*]] = bitcast <2 x half> [[FABS]] to i32
+; CHECK-NEXT: [[SIGN:%.*]] = icmp sgt i32 [[Y]], -1
+; CHECK-NEXT: ret i1 [[SIGN]]
+;
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
+ %y = bitcast <2 x half> %fabs to i32
+ %sign = icmp sgt i32 %y, -1
+ ret i1 %sign
+}
+
+define i1 @test_sign_neg(float %x) {
+; CHECK-LABEL: @test_sign_neg(
+; CHECK-NEXT: ret i1 true
+;
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %fnabs = fneg float %fabs
+ %y = bitcast float %fnabs to i32
+ %sign = icmp slt i32 %y, 0
+ ret i1 %sign
+}
+
+define <2 x i1> @test_sign_pos_vec(<2 x float> %x) {
+; CHECK-LABEL: @test_sign_pos_vec(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+;
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
+ %y = bitcast <2 x float> %fabs to <2 x i32>
+ %sign = icmp slt <2 x i32> %y, zeroinitializer
+ ret <2 x i1> %sign
+}
+
+define i32 @test_inf_only(float nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only(
+; CHECK-NEXT: ret i32 2139095040
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2147483647
+ ret i32 %and
+}
+
+define i16 @test_inf_only_bfloat(bfloat nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only_bfloat(
+; CHECK-NEXT: ret i16 32640
+;
+ %y = bitcast bfloat %x to i16
+ %and = and i16 %y, 32767
+ ret i16 %and
+}
+
+define i128 @test_inf_only_ppc_fp128(ppc_fp128 nofpclass(nan sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_only_ppc_fp128(
+; CHECK-NEXT: ret i128 9218868437227405312
+;
+ %y = bitcast ppc_fp128 %x to i128
+ %and = and i128 %y, 170141183460469231731687303715884105727
+ ret i128 %and
+}
+
+define i32 @test_zero_only(float nofpclass(nan sub norm inf) %x) {
+; CHECK-LABEL: @test_zero_only(
+; CHECK-NEXT: ret i32 0
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2147483647
+ ret i32 %and
+}
+
+define i80 @test_zero_only_non_ieee(x86_fp80 nofpclass(nan sub norm inf) %x) {
+; CHECK-LABEL: @test_zero_only_non_ieee(
+; CHECK-NEXT: ret i80 0
+;
+ %y = bitcast x86_fp80 %x to i80
+ %and = and i80 %y, 604462909807314587353087
+ ret i80 %and
+}
+
+define i32 @test_inf_nan_only(float nofpclass(sub norm zero) %x) {
+; CHECK-LABEL: @test_inf_nan_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 2130706432
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2130706432
+ ret i32 %and
+}
+
+define i32 @test_sub_zero_only(float nofpclass(nan norm inf) %x) {
+; CHECK-LABEL: @test_sub_zero_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 2130706432
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 2130706432
+ ret i32 %and
+}
+
+define i32 @test_inf_zero_only(float nofpclass(nan norm sub) %x) {
+; CHECK-LABEL: @test_inf_zero_only(
+; CHECK-NEXT: [[Y:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], 8388608
+; CHECK-NEXT: ret i32 [[AND]]
+;
+ %y = bitcast float %x to i32
+ %and = and i32 %y, 16777215
+ ret i32 %and
+}
+
+; Make sure that the signbit is cleared.
+define i32 @test_ninf_only(double %x) {
+; CHECK-LABEL: @test_ninf_only(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[X:%.*]], 0xFFF0000000000000
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: ret i32 0
+; CHECK: if.else:
+; CHECK-NEXT: ret i32 0
+;
+ %cmp = fcmp oeq double %x, 0xFFF0000000000000
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %cast = bitcast double %x to i64
+ %trunc = trunc i64 %cast to i32
+ ret i32 %trunc
+
+if.else:
+ ret i32 0
+}
+
+define i1 @test_simplify_icmp(i32 %x) {
+; CHECK-LABEL: @test_simplify_icmp(
+; CHECK-NEXT: ret i1 false
+;
+ %cast1 = uitofp i32 %x to double
+ %cast2 = bitcast double %cast1 to i64
+ %mask = and i64 %cast2, -140737488355328
+ %cmp = icmp eq i64 %mask, -1970324836974592
+ ret i1 %cmp
+}
+
+define i32 @test_snan_quiet_bit1(float nofpclass(sub norm inf qnan) %x) {
+; CHECK-LABEL: @test_snan_quiet_bit1(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 4194304
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 4194304
+ ret i32 %masked
+}
+
+define i32 @test_snan_quiet_bit2(float nofpclass(sub norm inf qnan) %x) {
+; CHECK-LABEL: @test_snan_quiet_bit2(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 2097152
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 2097152
+ ret i32 %masked
+}
+
+define i32 @test_qnan_quiet_bit1(float nofpclass(sub norm inf snan) %x) {
+; CHECK-LABEL: @test_qnan_quiet_bit1(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 4194304
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 4194304
+ ret i32 %masked
+}
+
+define i32 @test_qnan_quiet_bit2(float nofpclass(sub norm inf snan) %x) {
+; CHECK-LABEL: @test_qnan_quiet_bit2(
+; CHECK-NEXT: [[BITS:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[BITS]], 2097152
+; CHECK-NEXT: ret i32 [[MASKED]]
+;
+ %bits = bitcast float %x to i32
+ %masked = and i32 %bits, 2097152
+ ret i32 %masked
+}
+
+define i16 @test_simplify_mask(i32 %ui, float %x) {
+; CHECK-LABEL: @test_simplify_mask(
+; CHECK-NEXT: [[CONV:%.*]] = uitofp i32 [[UI:%.*]] to float
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[CONV]], [[X:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: ret i16 31744
+; CHECK: if.else:
+; CHECK-NEXT: ret i16 0
+;
+ %conv = uitofp i32 %ui to float
+ %cmp = fcmp olt float %x, %conv
+ br i1 %cmp, label %if.else, label %if.end
+
+if.end:
+ %cast = bitcast float %conv to i32
+ %shr = lshr i32 %cast, 16
+ %trunc = trunc i32 %shr to i16
+ %and = and i16 %trunc, -32768
+ %or = or disjoint i16 %and, 31744
+ ret i16 %or
+
+if.else:
+ ret i16 0
+}
+
+; TODO: %cmp always evaluates to false
+
+define i1 @test_simplify_icmp2(double %x) {
+; CHECK-LABEL: @test_simplify_icmp2(
+; CHECK-NEXT: [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT: [[COND:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[CAST:%.*]] = bitcast double [[X]] to i64
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[CAST]], 3458764513820540928
+; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK: if.else:
+; CHECK-NEXT: ret i1 false
+;
+ %ab...
[truncated]
|
Hi @dtcxzyw
It fails with
|
I will take a look. If necessary, feel free to revert this commit :) |
It is weird that BTW, I don't think we need to revert this patch. |
Reduced reproducer:
|
@nikic Do you have a plan that treating the use as a poison value when the knownbit of it has conflicts? |
So |
The plan is to remove all of these assertions and fold conflict to poison. Someone just needs to implement it :) |
Thanks for the quick fix @dtcxzyw ! |
This patch relands #86409.
I mistakenly thought that
Known.makeNegative()
clears the sign bit ofKnown.Zero
. This patch fixes the assertion failure by explicitly clearing the sign bit.