Skip to content

Commit 3aae916

Browse files
authored
Reland "[ValueTracking] Compute knownbits from known fp classes" (#92084)
This patch relands #86409. I mistakenly thought that `Known.makeNegative()` clears the sign bit of `Known.Zero`. This patch fixes the assertion failure by explicitly clearing the sign bit.
1 parent f658d84 commit 3aae916

File tree

6 files changed

+338
-19
lines changed

6 files changed

+338
-19
lines changed

llvm/include/llvm/IR/PatternMatch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1904,7 +1904,7 @@ template <typename Op_t> struct ElementWiseBitCast_match {
19041904
ElementWiseBitCast_match(const Op_t &OpMatch) : Op(OpMatch) {}
19051905

19061906
template <typename OpTy> bool match(OpTy *V) {
1907-
BitCastInst *I = dyn_cast<BitCastInst>(V);
1907+
auto *I = dyn_cast<BitCastInst>(V);
19081908
if (!I)
19091909
return false;
19101910
Type *SrcType = I->getSrcTy();

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,41 @@ static void computeKnownBitsFromOperator(const Operator *I,
11181118
break;
11191119
}
11201120

1121+
const Value *V;
1122+
// Handle bitcast from floating point to integer.
1123+
if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
1124+
V->getType()->isFPOrFPVectorTy()) {
1125+
Type *FPType = V->getType()->getScalarType();
1126+
KnownFPClass Result = computeKnownFPClass(V, fcAllFlags, Depth + 1, Q);
1127+
FPClassTest FPClasses = Result.KnownFPClasses;
1128+
1129+
if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
1130+
Known.Zero.setAllBits();
1131+
Known.One.setAllBits();
1132+
1133+
if (FPClasses & fcInf)
1134+
Known = Known.intersectWith(KnownBits::makeConstant(
1135+
APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
1136+
1137+
if (FPClasses & fcZero)
1138+
Known = Known.intersectWith(KnownBits::makeConstant(
1139+
APInt::getZero(FPType->getScalarSizeInBits())));
1140+
1141+
Known.Zero.clearSignBit();
1142+
Known.One.clearSignBit();
1143+
}
1144+
1145+
if (Result.SignBit) {
1146+
if (*Result.SignBit)
1147+
Known.makeNegative();
1148+
else
1149+
Known.makeNonNegative();
1150+
}
1151+
1152+
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1153+
break;
1154+
}
1155+
11211156
// Handle cast from vector integer type to scalar or vector integer.
11221157
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
11231158
if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2216,7 +2216,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
22162216
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
22172217
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
22182218
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2219-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
2219+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
22202220
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
22212221
; CHECK-NEXT: ret float [[TMP5]]
22222222
;
@@ -2304,7 +2304,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
23042304
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23052305
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
23062306
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2307-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
2307+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
23082308
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
23092309
; CHECK-NEXT: ret float [[TMP5]]
23102310
;
@@ -2353,7 +2353,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
23532353
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23542354
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
23552355
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2356-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
2356+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
23572357
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
23582358
; CHECK-NEXT: ret float [[TMP5]]
23592359
;
@@ -2376,7 +2376,7 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
23762376
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23772377
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
23782378
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2379-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
2379+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
23802380
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
23812381
; CHECK-NEXT: ret float [[TMP5]]
23822382
;
@@ -2399,7 +2399,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
23992399
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
24002400
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
24012401
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
2402-
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
2402+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
24032403
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
24042404
; CHECK-NEXT: ret <2 x float> [[TMP5]]
24052405
;
@@ -2448,7 +2448,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
24482448
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
24492449
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
24502450
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
2451-
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP3]]
2451+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
24522452
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
24532453
; CHECK-NEXT: ret <2 x float> [[TMP5]]
24542454
;
@@ -2560,7 +2560,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
25602560
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
25612561
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
25622562
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2563-
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[__POW_SIGN]], [[TMP3]]
2563+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
25642564
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
25652565
; CHECK-NEXT: ret float [[TMP5]]
25662566
;

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,7 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
680680
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
681681
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
682682
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
683-
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
683+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
684684
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
685685
; CHECK-NEXT: ret float [[TMP3]]
686686
;
@@ -703,7 +703,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
703703
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
704704
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
705705
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
706-
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[__POW_SIGN]], [[TMP1]]
706+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP1]]
707707
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
708708
; CHECK-NEXT: ret <2 x float> [[TMP3]]
709709
;
@@ -772,7 +772,7 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
772772
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
773773
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
774774
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
775-
; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[__POW_SIGN]], [[TMP1]]
775+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i16 [[__POW_SIGN]], [[TMP1]]
776776
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
777777
; CHECK-NEXT: ret half [[TMP3]]
778778
;
@@ -795,7 +795,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
795795
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
796796
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
797797
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
798-
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[__POW_SIGN]], [[TMP1]]
798+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i16> [[__POW_SIGN]], [[TMP1]]
799799
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
800800
; CHECK-NEXT: ret <2 x half> [[TMP3]]
801801
;
@@ -829,7 +829,7 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
829829
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
830830
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
831831
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
832-
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
832+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
833833
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
834834
; CHECK-NEXT: ret float [[TMP3]]
835835
;
@@ -1075,7 +1075,7 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
10751075
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
10761076
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
10771077
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
1078-
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[__POW_SIGN]], [[TMP1]]
1078+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
10791079
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
10801080
; CHECK-NEXT: ret float [[TMP3]]
10811081
;

llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ declare half @_Z4pownDhi(half, i32)
360360
; GCN-NATIVE: %0 = bitcast half %x to i16
361361
; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
362362
; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
363-
; GCN-NATIVE: %2 = or i16 %__pow_sign, %1
363+
; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
364364
; GCN-NATIVE: %3 = bitcast i16 %2 to half
365365
define half @test_pown_f16(half %x, i32 %y) {
366366
entry:
@@ -378,7 +378,7 @@ declare float @_Z4pownfi(float, i32)
378378
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
379379
; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
380380
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
381-
; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
381+
; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
382382
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
383383
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
384384
entry:
@@ -414,7 +414,7 @@ entry:
414414
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
415415
; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
416416
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
417-
; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
417+
; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
418418
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
419419
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
420420
entry:
@@ -438,7 +438,7 @@ declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
438438
; GCN: %1 = bitcast half %x to i16
439439
; GCN: %__pow_sign = and i16 %1, -32768
440440
; GCN: %2 = bitcast half %__exp2 to i16
441-
; GCN: %3 = or i16 %__pow_sign, %2
441+
; GCN: %3 = or disjoint i16 %__pow_sign, %2
442442
; GCN: %4 = bitcast i16 %3 to half
443443
define half @test_pow_fast_f16__y_13(half %x) {
444444
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
@@ -453,7 +453,7 @@ define half @test_pow_fast_f16__y_13(half %x) {
453453
; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
454454
; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
455455
; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
456-
; GCN: %3 = or <2 x i16> %__pow_sign, %2
456+
; GCN: %3 = or disjoint <2 x i16> %__pow_sign, %2
457457
; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
458458
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
459459
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)

0 commit comments

Comments
 (0)