Skip to content

Commit

Permalink
Merged main:6f618a7b8249e7baa3b2d18f8bbec3c5b6f6d24e into amd-gfx:e5e…
Browse files Browse the repository at this point in the history
…dfda5900b

Local branch amd-gfx e5edfda Merged main:65b7cbbd8735b90933369364153b982d498f649a into amd-gfx:2f9a9b483f9f
Remote branch main 6f618a7 Update my email
  • Loading branch information
SC llvm team authored and SC llvm team committed Aug 24, 2024
2 parents e5edfda + 6f618a7 commit 08f1863
Show file tree
Hide file tree
Showing 18 changed files with 636 additions and 259 deletions.
2 changes: 1 addition & 1 deletion .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<i@maskray.me> <maskray@google.com>
<JCTremoulet@gmail.com> <jotrem@microsoft.com>
<min@myhsu.dev> <minyihh@uci.edu>
<qiucofan@cn.ibm.com> <qiucf@cn.ibm.com>
<qcf@ecnelises.com> <qiucofan@cn.ibm.com> <qiucf@cn.ibm.com>
<rnk@google.com> <reid@kleckner.net>
<thakis@chromium.org> <nicolasweber@gmx.de>
Jianjian GUAN <jacquesguan@me.com>
Expand Down
6 changes: 3 additions & 3 deletions libcxx/test/support/atomic_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@
# define TEST_ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE
# define TEST_ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE
# define TEST_ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE
#elif TEST_COMPILER_MSVC
#elif defined(TEST_COMPILER_MSVC)
// This is lifted from STL/stl/inc/atomic on github for the purposes of
// keeping the tests compiling for MSVC's STL. It's not a perfect solution
// but at least the tests will keep running.
//
// Note MSVC's STL never produces a type that is sometimes lock free, but not always lock free.
template <class T, size_t Size = sizeof(T)>
constexpr bool msvc_is_lock_free_macro_value() {
return (Size <= 8 && (Size & Size - 1) == 0) ? 2 : 0;
constexpr int msvc_is_lock_free_macro_value() {
return (Size <= 8 && (Size & (Size - 1)) == 0) ? 2 : 0;
}
# define TEST_ATOMIC_CHAR_LOCK_FREE ::msvc_is_lock_free_macro_value<char>()
# define TEST_ATOMIC_SHORT_LOCK_FREE ::msvc_is_lock_free_macro_value<short>()
Expand Down
4 changes: 0 additions & 4 deletions llvm/CREDITS.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -432,10 +432,6 @@ W: http://vladimir_prus.blogspot.com
E: ghost@cs.msu.su
D: Made inst_iterator behave like a proper iterator, LowerConstantExprs pass

N: QIU Chaofan
E: qiucofan@cn.ibm.com
D: PowerPC Backend Developer

N: Kalle Raiskila
E: kalle.rasikila@nokia.com
D: Some bugfixes to CellSPU
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Config/llvm-config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

/* Indicate that this is LLVM compiled from the amd-gfx branch. */
#define LLVM_HAVE_BRANCH_AMD_GFX
#define LLVM_MAIN_REVISION 509536
#define LLVM_MAIN_REVISION 509543

/* Define if LLVM_ENABLE_DUMP is enabled */
#cmakedefine LLVM_ENABLE_DUMP
Expand Down
12 changes: 7 additions & 5 deletions llvm/include/llvm/IR/VPIntrinsics.def
Original file line number Diff line number Diff line change
Expand Up @@ -722,27 +722,29 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fminimum, VP_REDUCE_FMINIMUM,
#error \
"The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
#endif
#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPID, VPSD, SEQ_VPSD, INTRIN) \
#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPID, VPSD, SEQ_VPSD, SDOPC, SEQ_SDOPC, INTRIN) \
BEGIN_REGISTER_VP_INTRINSIC(VPID, 2, 3) \
BEGIN_REGISTER_VP_SDNODE(VPSD, 1, VPID, 2, 3) \
VP_PROPERTY_REDUCTION(0, 1) \
VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) \
END_REGISTER_VP_SDNODE(VPSD) \
BEGIN_REGISTER_VP_SDNODE(SEQ_VPSD, 1, VPID, 2, 3) \
HELPER_MAP_VPID_TO_VPSD(VPID, SEQ_VPSD) \
VP_PROPERTY_FUNCTIONAL_SDOPC(SEQ_SDOPC) \
VP_PROPERTY_REDUCTION(0, 1) \
END_REGISTER_VP_SDNODE(SEQ_VPSD) \
VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \
END_REGISTER_VP_INTRINSIC(VPID)

// llvm.vp.reduce.fadd(start,x,mask,vlen)
HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD,
VP_REDUCE_SEQ_FADD,
vector_reduce_fadd)
VP_REDUCE_SEQ_FADD, VECREDUCE_FADD,
VECREDUCE_SEQ_FADD, vector_reduce_fadd)

// llvm.vp.reduce.fmul(start,x,mask,vlen)
HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL,
VP_REDUCE_SEQ_FMUL,
vector_reduce_fmul)
VP_REDUCE_SEQ_FMUL, VECREDUCE_FMUL,
VECREDUCE_SEQ_FMUL, vector_reduce_fmul)

#undef HELPER_REGISTER_REDUCTION_SEQ_VP

Expand Down
20 changes: 16 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7311,8 +7311,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
// Generate a vp.reduce_op if it is custom/legal for the target. This avoids
// needing to pad the source vector, because the inactive lanes can simply be
// disabled and not contribute to the result.
// TODO: VECREDUCE_FADD, VECREDUCE_FMUL aren't currently mapped correctly,
// and thus don't take this path.
if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
SDValue Start = NeutralElem;
Expand Down Expand Up @@ -7351,6 +7349,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
SDValue VecOp = N->getOperand(1);
SDValue Op = GetWidenedVector(VecOp);

EVT VT = N->getValueType(0);
EVT OrigVT = VecOp.getValueType();
EVT WideVT = Op.getValueType();
EVT ElemVT = OrigVT.getVectorElementType();
Expand All @@ -7364,6 +7363,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
unsigned OrigElts = OrigVT.getVectorMinNumElements();
unsigned WideElts = WideVT.getVectorMinNumElements();

// Generate a vp.reduce_op if it is custom/legal for the target. This avoids
// needing to pad the source vector, because the inactive lanes can simply be
// disabled and not contribute to the result.
if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
OrigVT.getVectorElementCount());
return DAG.getNode(*VPOpcode, dl, VT, {AccOp, Op, Mask, EVL}, Flags);
}

if (WideVT.isScalableVector()) {
unsigned GCD = std::gcd(OrigElts, WideElts);
EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
Expand All @@ -7372,14 +7384,14 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
DAG.getVectorIdxConstant(Idx, dl));
return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags);
}

for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));

return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags);
}

SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/IR/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace {
// by the MergeFunctions pass.

class StructuralHashImpl {
uint64_t Hash;
uint64_t Hash = 4;

void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }

Expand All @@ -43,7 +43,7 @@ class StructuralHashImpl {
}

public:
StructuralHashImpl() : Hash(4) {}
StructuralHashImpl() = default;

void updateOperand(Value *Operand) {
hashType(Operand->getType());
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2517,9 +2517,7 @@ static void updateRegisterMapForDbgValueListAfterMove(
if (RegIt == RegisterMap.end())
return;
auto &InstrVec = RegIt->getSecond();
for (unsigned I = 0; I < InstrVec.size(); I++)
if (InstrVec[I] == InstrToReplace)
InstrVec[I] = DbgValueListInstr;
llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
});
}

Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2002,10 +2002,7 @@ SmallVector<uint32_t, 8> HvxSelector::getPerfectCompletions(ShuffleMask SM,
if ((unsigned)llvm::popcount(P) < Count) {
// Reset all occurences of P, if there are more occurrences of P
// than there are bits in P.
for (unsigned &Q : Worklist) {
if (Q == P)
Q = 0;
}
llvm::replace(Worklist, P, 0U);
}
}

Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,11 @@ bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
assert(InOp.isReg() && "ProxyReg input should be a register.");
assert(OutOp.isReg() && "ProxyReg output should be a register.");
RemoveList.push_back(&MI);
RAUWBatch.try_emplace(OutOp.getReg(), InOp.getReg());
Register replacement = InOp.getReg();
// Check if the replacement itself has been replaced.
if (auto it = RAUWBatch.find(replacement); it != RAUWBatch.end())
replacement = it->second;
RAUWBatch.try_emplace(OutOp.getReg(), replacement);
break;
}
}
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35781,9 +35781,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
// Zero out any registers that are already used.
for (const auto &MO : MI.operands()) {
if (MO.isReg() && MO.isUse())
for (unsigned &Reg : AvailableRegs)
if (Reg == MO.getReg())
Reg = 0;
llvm::replace(AvailableRegs, static_cast<unsigned>(MO.getReg()), 0U);
}

// Choose the first remaining non-zero available register.
Expand Down
25 changes: 0 additions & 25 deletions llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll

This file was deleted.

98 changes: 98 additions & 0 deletions llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# RUN: llc %s --run-pass=nvptx-proxyreg-erasure -march=nvptx64 -o - | FileCheck %s

--- |
; ModuleID = 'third-party/llvm-project/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll'
source_filename = "third-party/llvm-project/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"

declare <4 x i32> @callee_vec_i32()

define <4 x i32> @check_vec_i32() {
%ret = call <4 x i32> @callee_vec_i32()
ret <4 x i32> %ret
}

...
---
name: check_vec_i32
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: false
failsVerification: false
tracksDebugUserValues: false
registers:
- { id: 0, class: int32regs, preferred-register: '' }
- { id: 1, class: int32regs, preferred-register: '' }
- { id: 2, class: int32regs, preferred-register: '' }
- { id: 3, class: int32regs, preferred-register: '' }
- { id: 4, class: int32regs, preferred-register: '' }
- { id: 5, class: int32regs, preferred-register: '' }
- { id: 6, class: int32regs, preferred-register: '' }
- { id: 7, class: int32regs, preferred-register: '' }
- { id: 8, class: int32regs, preferred-register: '' }
- { id: 9, class: int32regs, preferred-register: '' }
- { id: 10, class: int32regs, preferred-register: '' }
- { id: 11, class: int32regs, preferred-register: '' }
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: true
stackProtector: ''
functionContext: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
isCalleeSavedInfoValid: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
%0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0
; CHECK-NOT: ProxyReg
%4:int32regs = ProxyRegI32 killed %0
%5:int32regs = ProxyRegI32 killed %1
%6:int32regs = ProxyRegI32 killed %2
%7:int32regs = ProxyRegI32 killed %3
; CHECK: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3
StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0
%8:int32regs = LoadParamMemI32 0
; CHECK-NOT: ProxyReg
%9:int32regs = ProxyRegI32 killed %8
%10:int32regs = ProxyRegI32 killed %9
%11:int32regs = ProxyRegI32 killed %10
; CHECK: StoreRetvalI32 killed %8
StoreRetvalI32 killed %11, 0
Return
...
Original file line number Diff line number Diff line change
Expand Up @@ -791,12 +791,7 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 7
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
Expand Down Expand Up @@ -880,7 +875,7 @@ define float @reduce_fadd_4xi32_non_associative(ptr %p) {
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vi v8, v9, 3
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa4, v8
; CHECK-NEXT: fadd.s fa0, fa4, fa5
Expand Down
12 changes: 0 additions & 12 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ define half @vreduce_fadd_v7f16(ptr %x, half %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, 1048568
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 7
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
Expand Down Expand Up @@ -470,10 +466,6 @@ define float @vreduce_fadd_v7f32(ptr %x, float %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 7
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
Expand All @@ -488,10 +480,6 @@ define float @vreduce_ord_fadd_v7f32(ptr %x, float %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 7
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
Expand Down
Loading

0 comments on commit 08f1863

Please sign in to comment.