Skip to content

Commit

Permalink
Merged master:5811d723998a into amd-gfx:f8c9b6a1d1e1
Browse files Browse the repository at this point in the history
Local branch amd-gfx f8c9b6a Merged master:ba950ad0a510 into amd-gfx:42611bd8554c
Remote branch master 5811d72 [AArch64][GlobalISel] Promote scalar G_SHL constant shift amounts to s64.
  • Loading branch information
Sw authored and Sw committed Sep 27, 2020
2 parents f8c9b6a + 5811d72 commit b378452
Show file tree
Hide file tree
Showing 13 changed files with 1,198 additions and 170 deletions.
24 changes: 11 additions & 13 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4794,20 +4794,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
case ISD::VECREDUCE_FMUL:
NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
break;
case ISD::VECREDUCE_FMAX:
// This has maxnum semantics, so NaN represents missing data. We must clear
// 'nnan' if it was set because the NaN would be a poison value.
NeutralElem = DAG.getConstantFP(
std::numeric_limits<double>::quiet_NaN(), dl, ElemVT);
Flags.setNoNaNs(false);
break;
case ISD::VECREDUCE_FMIN:
// This has minnum semantics, so NaN represents missing data. We must clear
// 'nnan' if it was set because the NaN would be a poison value.
NeutralElem = DAG.getConstantFP(
std::numeric_limits<double>::quiet_NaN(), dl, ElemVT);
Flags.setNoNaNs(false);
break;
case ISD::VECREDUCE_FMAX: {
// Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
const fltSemantics &Semantics = DAG.EVTToAPFloatSemantics(ElemVT);
APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) :
!Flags.hasNoInfs() ? APFloat::getInf(Semantics) :
APFloat::getLargest(Semantics);
if (N->getOpcode() == ISD::VECREDUCE_FMAX)
NeutralAF.changeSign();

NeutralElem = DAG.getConstantFP(NeutralAF, dl, ElemVT);
}
}

// Pad the vector with the neutral element.
Expand Down
17 changes: 11 additions & 6 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.moreElementsToNextPow2(0);

getActionDefinitionsBuilder(G_SHL)
.customIf([=](const LegalityQuery &Query) {
const auto &SrcTy = Query.Types[0];
const auto &AmtTy = Query.Types[1];
return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
AmtTy.getSizeInBits() == 32;
})
.legalFor({
{s32, s32},
{s32, s64},
{s64, s64},
{v16s8, v16s8},
{v4s16, v4s16},
Expand Down Expand Up @@ -756,16 +763,14 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
// If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
// imported patterns can select it later. Either way, it will be legal.
Register AmtReg = MI.getOperand(2).getReg();
auto *CstMI = MRI.getVRegDef(AmtReg);
assert(CstMI && "expected to find a vreg def");
if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI);
if (!VRegAndVal)
return true;
// Check the shift amount is in range for an immediate form.
unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
int64_t Amount = VRegAndVal->Value;
if (Amount > 31)
return true; // This will have to remain a register variant.
assert(MRI.getType(AmtReg).getSizeInBits() == 32);
auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
MI.getOperand(2).setReg(ExtCst.getReg(0));
return true;
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Scalar/DivRemPairs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,16 +151,16 @@ static DivRemWorklistTy getWorklist(Function &F) {
// rare than division.
for (auto &RemPair : RemMap) {
// Find the matching division instruction from the division map.
Instruction *DivInst = DivMap[RemPair.first];
if (!DivInst)
auto It = DivMap.find(RemPair.first);
if (It == DivMap.end())
continue;

// We have a matching pair of div/rem instructions.
NumPairs++;
Instruction *RemInst = RemPair.second;

// Place it in the worklist.
Worklist.emplace_back(DivInst, RemInst);
Worklist.emplace_back(It->second, RemInst);
}

return Worklist;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ name: test_merge_s4
body: |
bb.0:
; CHECK-LABEL: name: test_merge_s4
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C2]], [[C1]]
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C3]](s64)
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@ body: |
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.ptr + 2, align 4)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s64)
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C3]](s64)
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s64)
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store 2 into %ir.ptr2, align 4)
; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.ptr2 + 2, align 4)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ body: |
; CHECK-LABEL: name: shl_cimm_32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s64)
; CHECK: $w0 = COPY [[SHL]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $w0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ body: |
; CHECK-LABEL: name: test_unmerge_s4
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8)
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C1]](s64)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s8)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32)
; CHECK: $x0 = COPY [[ANYEXT]](s64)
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/AArch64/arm64-clrsb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@ entry:
; CHECK-LABEL: clrsb32
; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]]

; FIXME: We should produce the same result here to save some code size. After
; that, we can remove the GISEL special casing.
; GISEL-LABEL: clrsb32
; GISEL: clz
; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]]
}

; Function Attrs: nounwind ssp
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #2143289344
; CHECK-NEXT: mov w8, #-8388608
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fmaxnmv s0, v0.4s
Expand All @@ -59,7 +59,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32_ninf:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #2143289344
; CHECK-NEXT: mov w8, #-8388609
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fmaxnmv s0, v0.4s
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #2143289344
; CHECK-NEXT: mov w8, #2139095040
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fminnmv s0, v0.4s
Expand All @@ -59,7 +59,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32_ninf:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #2143289344
; CHECK-NEXT: mov w8, #2139095039
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fminnmv s0, v0.4s
Expand Down
Loading

0 comments on commit b378452

Please sign in to comment.