Merged master:5811d723998a into amd-gfx:f8c9b6a1d1e1

Local branch amd-gfx f8c9b6a Merged master:ba950ad0a510 into amd-gfx:42611bd8554c Remote branch master 5811d72 [AArch64][GlobalISel] Promote scalar G_SHL constant shift amounts to s64.
jaebaek · Sep 27, 2020 · b378452 · b378452
2 parents f8c9b6a + 5811d72
commit b378452
Show file tree

Hide file tree

Showing 13 changed files with 1,198 additions and 170 deletions.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4794,20 +4794,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
   case ISD::VECREDUCE_FMUL:
     NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
     break;
-  case ISD::VECREDUCE_FMAX:
-    // This has maxnum semantics, so NaN represents missing data. We must clear
-    // 'nnan' if it was set because the NaN would be a poison value.
-    NeutralElem = DAG.getConstantFP(
-        std::numeric_limits<double>::quiet_NaN(), dl, ElemVT);
-    Flags.setNoNaNs(false);
-    break;
   case ISD::VECREDUCE_FMIN:
-    // This has minnum semantics, so NaN represents missing data. We must clear
-    // 'nnan' if it was set because the NaN would be a poison value.
-    NeutralElem = DAG.getConstantFP(
-        std::numeric_limits<double>::quiet_NaN(), dl, ElemVT);
-    Flags.setNoNaNs(false);
-    break;
+  case ISD::VECREDUCE_FMAX: {
+    // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
+    const fltSemantics &Semantics = DAG.EVTToAPFloatSemantics(ElemVT);
+    APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) :
+                        !Flags.hasNoInfs() ? APFloat::getInf(Semantics) :
+                        APFloat::getLargest(Semantics);
+    if (N->getOpcode() == ISD::VECREDUCE_FMAX)
+      NeutralAF.changeSign();
+
+    NeutralElem = DAG.getConstantFP(NeutralAF, dl, ElemVT);
+  }
   }
 
   // Pad the vector with the neutral element.

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -98,8 +98,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .moreElementsToNextPow2(0);
 
   getActionDefinitionsBuilder(G_SHL)
+      .customIf([=](const LegalityQuery &Query) {
+        const auto &SrcTy = Query.Types[0];
+        const auto &AmtTy = Query.Types[1];
+        return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
+               AmtTy.getSizeInBits() == 32;
+      })
       .legalFor({
           {s32, s32},
+          {s32, s64},
           {s64, s64},
           {v16s8, v16s8},
           {v4s16, v4s16},
@@ -756,16 +763,14 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
   // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
   // imported patterns can select it later. Either way, it will be legal.
   Register AmtReg = MI.getOperand(2).getReg();
-  auto *CstMI = MRI.getVRegDef(AmtReg);
-  assert(CstMI && "expected to find a vreg def");
-  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
+  auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI);
+  if (!VRegAndVal)
     return true;
   // Check the shift amount is in range for an immediate form.
-  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
+  int64_t Amount = VRegAndVal->Value;
   if (Amount > 31)
     return true; // This will have to remain a register variant.
-  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
-  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
+  auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
   MI.getOperand(2).setReg(ExtCst.getReg(0));
   return true;
 }

diff --git a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -151,16 +151,16 @@ static DivRemWorklistTy getWorklist(Function &F) {
   // rare than division.
   for (auto &RemPair : RemMap) {
     // Find the matching division instruction from the division map.
-    Instruction *DivInst = DivMap[RemPair.first];
-    if (!DivInst)
+    auto It = DivMap.find(RemPair.first);
+    if (It == DivMap.end())
       continue;
 
     // We have a matching pair of div/rem instructions.
     NumPairs++;
     Instruction *RemInst = RemPair.second;
 
     // Place it in the worklist.
-    Worklist.emplace_back(DivInst, RemInst);
+    Worklist.emplace_back(It->second, RemInst);
   }
 
   return Worklist;

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir
@@ -6,11 +6,12 @@ name:            test_merge_s4
 body: |
   bb.0:
     ; CHECK-LABEL: name: test_merge_s4
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C2]], [[C1]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C3]](s64)
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir
@@ -28,12 +28,11 @@ body:             |
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
     ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
     ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.ptr + 2, align 4)
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s64)
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
     ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C3]](s64)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s64)
     ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
     ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store 2 into %ir.ptr2, align 4)
     ; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.ptr2 + 2, align 4)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir
@@ -235,8 +235,8 @@ body:             |
 
     ; CHECK-LABEL: name: shl_cimm_32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s64)
     ; CHECK: $w0 = COPY [[SHL]](s32)
     ; CHECK: RET_ReallyLR implicit $w0
     %0:_(s32) = COPY $w0

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir
@@ -24,9 +24,10 @@ body: |
     ; CHECK-LABEL: name: test_unmerge_s4
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
     ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4
     ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8)
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C1]](s64)
     ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s8)
     ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32)
     ; CHECK: $x0 = COPY [[ANYEXT]](s64)

diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -21,10 +21,8 @@ entry:
 ; CHECK-LABEL: clrsb32
 ; CHECK:   cls [[TEMP:w[0-9]+]], [[TEMP]]
 
-; FIXME: We should produce the same result here to save some code size. After
-; that, we can remove the GISEL special casing.
 ; GISEL-LABEL: clrsb32
-; GISEL: clz
+; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]]
 }
 
 ; Function Attrs: nounwind ssp

diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #2143289344
+; CHECK-NEXT:    mov w8, #-8388608
 ; CHECK-NEXT:    fmov s1, w8
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fmaxnmv s0, v0.4s
@@ -59,7 +59,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
 define float @test_v3f32_ninf(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32_ninf:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #2143289344
+; CHECK-NEXT:    mov w8, #-8388609
 ; CHECK-NEXT:    fmov s1, w8
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fmaxnmv s0, v0.4s

diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #2143289344
+; CHECK-NEXT:    mov w8, #2139095040
 ; CHECK-NEXT:    fmov s1, w8
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fminnmv s0, v0.4s
@@ -59,7 +59,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
 define float @test_v3f32_ninf(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32_ninf:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #2143289344
+; CHECK-NEXT:    mov w8, #2139095039
 ; CHECK-NEXT:    fmov s1, w8
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fminnmv s0, v0.4s