Skip to content

Commit

Permalink
[AArch64][GISel] Add handling for G_VECREDUCE_FMAXIMUM and G_VECREDUC…
Browse files Browse the repository at this point in the history
…E_FMINIMUM

This is a lot of copy-pasting for the existing handling of
G_VECREDUCE_FMAX/G_VECREDUCE_FMIN to add handling for
G_VECREDUCE_FMAXIMUM/G_VECREDUCE_FMINIMUM in the same way.

Differential Revision: https://reviews.llvm.org/D156615
  • Loading branch information
davemgreen committed Aug 14, 2023
1 parent 660fded commit a3f2751
Show file tree
Hide file tree
Showing 18 changed files with 666 additions and 299 deletions.
4 changes: 2 additions & 2 deletions llvm/docs/GlobalISel/GenericOpcode.rst
Original file line number Diff line number Diff line change
Expand Up @@ -655,10 +655,10 @@ G_VECREDUCE_FADD, G_VECREDUCE_FMUL

These reductions are relaxed variants which may reduce the elements in any order.

G_VECREDUCE_FMAX, G_VECREDUCE_FMIN
G_VECREDUCE_FMAX, G_VECREDUCE_FMIN, G_VECREDUCE_FMAXIMUM, G_VECREDUCE_FMINIMUM
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
FMIN/FMAX/FMINIMUM/FMAXIMUM nodes can have flags, for NaN/NoNaN variants.


Integer/bitwise reductions
Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,8 @@ class GVecReduce : public GenericMachineInstr {
case TargetOpcode::G_VECREDUCE_FMUL:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_FMAXIMUM:
case TargetOpcode::G_VECREDUCE_FMINIMUM:
case TargetOpcode::G_VECREDUCE_ADD:
case TargetOpcode::G_VECREDUCE_MUL:
case TargetOpcode::G_VECREDUCE_AND:
Expand Down Expand Up @@ -441,6 +443,12 @@ class GVecReduce : public GenericMachineInstr {
case TargetOpcode::G_VECREDUCE_FMIN:
ScalarOpc = TargetOpcode::G_FMINNUM;
break;
case TargetOpcode::G_VECREDUCE_FMAXIMUM:
ScalarOpc = TargetOpcode::G_FMAXIMUM;
break;
case TargetOpcode::G_VECREDUCE_FMINIMUM:
ScalarOpc = TargetOpcode::G_FMINIMUM;
break;
case TargetOpcode::G_VECREDUCE_ADD:
ScalarOpc = TargetOpcode::G_ADD;
break;
Expand Down
13 changes: 13 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1973,6 +1973,19 @@ class MachineIRBuilder {
MachineInstrBuilder buildVecReduceFMin(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_FMIN, {Dst}, {Src});
}

/// Build and insert \p Res = G_VECREDUCE_FMAXIMUM \p Src
MachineInstrBuilder buildVecReduceFMaximum(const DstOp &Dst,
const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_FMAXIMUM, {Dst}, {Src});
}

/// Build and insert \p Res = G_VECREDUCE_FMINIMUM \p Src
MachineInstrBuilder buildVecReduceFMinimum(const DstOp &Dst,
const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_FMINIMUM, {Dst}, {Src});
}

/// Build and insert \p Res = G_VECREDUCE_ADD \p Src
MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_ADD, {Dst}, {Src});
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class APFloat;
case TargetOpcode::G_VECREDUCE_FMUL: \
case TargetOpcode::G_VECREDUCE_FMAX: \
case TargetOpcode::G_VECREDUCE_FMIN: \
case TargetOpcode::G_VECREDUCE_FMAXIMUM: \
case TargetOpcode::G_VECREDUCE_FMINIMUM: \
case TargetOpcode::G_VECREDUCE_ADD: \
case TargetOpcode::G_VECREDUCE_MUL: \
case TargetOpcode::G_VECREDUCE_AND: \
Expand All @@ -72,6 +74,8 @@ class APFloat;
case TargetOpcode::G_VECREDUCE_FMUL: \
case TargetOpcode::G_VECREDUCE_FMAX: \
case TargetOpcode::G_VECREDUCE_FMIN: \
case TargetOpcode::G_VECREDUCE_FMAXIMUM: \
case TargetOpcode::G_VECREDUCE_FMINIMUM: \
case TargetOpcode::G_VECREDUCE_ADD: \
case TargetOpcode::G_VECREDUCE_MUL: \
case TargetOpcode::G_VECREDUCE_AND: \
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,8 @@ HANDLE_TARGET_OPCODE(G_VECREDUCE_FADD)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FMUL)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAX)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FMIN)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAXIMUM)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FMINIMUM)
HANDLE_TARGET_OPCODE(G_VECREDUCE_ADD)
HANDLE_TARGET_OPCODE(G_VECREDUCE_MUL)
HANDLE_TARGET_OPCODE(G_VECREDUCE_AND)
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -1398,6 +1398,8 @@ def G_VECREDUCE_FMUL : VectorReduction;

def G_VECREDUCE_FMAX : VectorReduction;
def G_VECREDUCE_FMIN : VectorReduction;
def G_VECREDUCE_FMAXIMUM : VectorReduction;
def G_VECREDUCE_FMINIMUM : VectorReduction;

def G_VECREDUCE_ADD : VectorReduction;
def G_VECREDUCE_MUL : VectorReduction;
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ def : GINodeEquiv<G_LLROUND, llround>;
def : GINodeEquiv<G_VECREDUCE_FADD, vecreduce_fadd>;
def : GINodeEquiv<G_VECREDUCE_FMAX, vecreduce_fmax>;
def : GINodeEquiv<G_VECREDUCE_FMIN, vecreduce_fmin>;
def : GINodeEquiv<G_VECREDUCE_FMAXIMUM, vecreduce_fmaximum>;
def : GINodeEquiv<G_VECREDUCE_FMINIMUM, vecreduce_fminimum>;

def : GINodeEquiv<G_STRICT_FADD, strict_fadd>;
def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1797,6 +1797,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_VECREDUCE_FMIN;
case Intrinsic::vector_reduce_fmax:
return TargetOpcode::G_VECREDUCE_FMAX;
case Intrinsic::vector_reduce_fminimum:
return TargetOpcode::G_VECREDUCE_FMINIMUM;
case Intrinsic::vector_reduce_fmaximum:
return TargetOpcode::G_VECREDUCE_FMAXIMUM;
case Intrinsic::vector_reduce_add:
return TargetOpcode::G_VECREDUCE_ADD;
case Intrinsic::vector_reduce_mul:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2649,6 +2649,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
}
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMINIMUM:
case TargetOpcode::G_VECREDUCE_FMAXIMUM:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/MachineVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1720,6 +1720,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
case TargetOpcode::G_VECREDUCE_FMUL:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_FMAXIMUM:
case TargetOpcode::G_VECREDUCE_FMINIMUM:
case TargetOpcode::G_VECREDUCE_ADD:
case TargetOpcode::G_VECREDUCE_MUL:
case TargetOpcode::G_VECREDUCE_AND:
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, s32, 4)
.lower();

getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX})
getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
.legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
.legalIf([=](const LegalityQuery &Query) {
const auto &Ty = Query.Types[1];
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_VECREDUCE_FMUL:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_FMAXIMUM:
case TargetOpcode::G_VECREDUCE_FMINIMUM:
case TargetOpcode::G_VECREDUCE_ADD:
case TargetOpcode::G_VECREDUCE_MUL:
case TargetOpcode::G_VECREDUCE_AND:
Expand Down
41 changes: 41 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ define double @fmul_fast(double %start, <4 x double> %vec) {

declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)

define float @fmax(<4 x float> %vec) {
; CHECK-LABEL: name: fmax
Expand Down Expand Up @@ -106,6 +108,45 @@ define float @fmin_nnan(<4 x float> %vec) {
ret float %res
}

define float @fmaximum(<4 x float> %vec) {
; CHECK-LABEL: name: fmaximum
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[BITCAST]](<4 x s32>)
; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32)
; CHECK: RET_ReallyLR implicit $s0
%res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %vec)
ret float %res
}

define float @fminimum(<4 x float> %vec) {
; CHECK-LABEL: name: fminimum
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
; CHECK: RET_ReallyLR implicit $s0
%res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec)
ret float %res
}

define float @fminimum_nnan(<4 x float> %vec) {
; CHECK-LABEL: name: fminimum_nnan
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
; CHECK: RET_ReallyLR implicit $s0
%res = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec)
ret float %res
}

declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)

define i32 @add(<4 x i32> %vec) {
Expand Down
93 changes: 93 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s

---
name: fmin_v2s32
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0

; CHECK-LABEL: name: fmin_v2s32
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[COPY]](<2 x s32>)
; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMIN]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%0:_(<2 x s32>) = COPY $d0
%1:_(s32) = G_VECREDUCE_FMIN %0(<2 x s32>)
$s0 = COPY %1(s32)
RET_ReallyLR implicit $s0

...
---
name: fmax_v8s16
tracksRegLiveness: true
body: |
bb.1:
liveins: $q0

; CHECK-LABEL: name: fmax_v8s16
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXNUM [[FPEXT]], [[FPEXT1]]
; CHECK-NEXT: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[FMAXNUM]](<4 x s32>)
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[VECREDUCE_FMAX]](s32)
; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16)
; CHECK-NEXT: RET_ReallyLR implicit $h0
%0:_(<8 x s16>) = COPY $q0
%1:_(s16) = G_VECREDUCE_FMAX %0(<8 x s16>)
$h0 = COPY %1(s16)
RET_ReallyLR implicit $h0

...
---
name: fminimum_v2s32
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0

; CHECK-LABEL: name: fminimum_v2s32
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[VECREDUCE_FMINIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[COPY]](<2 x s32>)
; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMINIMUM]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%0:_(<2 x s32>) = COPY $d0
%1:_(s32) = G_VECREDUCE_FMINIMUM %0(<2 x s32>)
$s0 = COPY %1(s32)
RET_ReallyLR implicit $s0

...
---
name: fmaximum_v8s16
tracksRegLiveness: true
body: |
bb.1:
liveins: $q0

; CHECK-LABEL: name: fmaximum_v8s16
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[FPEXT]], [[FPEXT1]]
; CHECK-NEXT: [[VECREDUCE_FMAXIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[FMAXIMUM]](<4 x s32>)
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[VECREDUCE_FMAXIMUM]](s32)
; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16)
; CHECK-NEXT: RET_ReallyLR implicit $h0
%0:_(<8 x s16>) = COPY $q0
%1:_(s16) = G_VECREDUCE_FMAXIMUM %0(<8 x s16>)
$h0 = COPY %1(s16)
RET_ReallyLR implicit $h0

...
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,14 @@
# DEBUG-NEXT: G_VECREDUCE_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_VECREDUCE_FMAXIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_VECREDUCE_FMINIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_VECREDUCE_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-reductions.mir
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,24 @@ body: |
RET_ReallyLR implicit $w0

...
---
name: fmaximum_v4s32
legalized: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $q0

; CHECK-LABEL: name: fmaximum_v4s32
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0
; CHECK: [[VECREDUCE_FMAXIMUM:%[0-9]+]]:fpr(s32) = G_VECREDUCE_FMAXIMUM [[COPY]](<4 x s32>)
; CHECK: $w0 = COPY [[VECREDUCE_FMAXIMUM]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(<4 x s32>) = COPY $q0
%1:_(s32) = G_VECREDUCE_FMAXIMUM %0(<4 x s32>)
$w0 = COPY %1(s32)
RET_ReallyLR implicit $w0

...

Loading

0 comments on commit a3f2751

Please sign in to comment.