Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CGP][AArch64] Rebase the common base offset for better ISel #74046

Merged
merged 2 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
}

int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) {
return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);
}

unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const {
auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
Expand Down
8 changes: 7 additions & 1 deletion llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
Expand Down Expand Up @@ -2721,6 +2721,12 @@ class TargetLoweringBase {
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;

/// Return the prefered common base offset.
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
int64_t MaxOffset) const {
return 0;
}

/// Return true if the specified immediate is legal icmp immediate, that is
/// the target has icmp instructions which can compare a register against the
/// immediate without having to materialize the immediate into a register.
Expand Down
79 changes: 50 additions & 29 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6121,6 +6121,55 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
Value *NewBaseGEP = nullptr;

auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
GetElementPtrInst *GEP) {
LLVMContext &Ctx = GEP->getContext();
Type *PtrIdxTy = DL->getIndexType(GEP->getType());
Type *I8PtrTy =
PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
Type *I8Ty = Type::getInt8Ty(Ctx);

BasicBlock::iterator NewBaseInsertPt;
BasicBlock *NewBaseInsertBB;
if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
// If the base of the struct is an instruction, the new base will be
// inserted close to it.
NewBaseInsertBB = BaseI->getParent();
if (isa<PHINode>(BaseI))
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
NewBaseInsertBB =
SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
} else
NewBaseInsertPt = std::next(BaseI->getIterator());
} else {
// If the current base is an argument or global value, the new base
// will be inserted to the entry block.
NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
}
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
// Create a new base.
Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
NewBaseGEP = OldBase;
if (NewBaseGEP->getType() != I8PtrTy)
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
NewBaseGEP =
NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
NewGEPBases.insert(NewBaseGEP);
return;
};

// Check whether all the offsets can be encoded with prefered common base.
if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
BaseOffset = PreferBase;
// Create a new base if the offset of the BaseGEP can be decoded with one
// instruction.
createNewBase(BaseOffset, OldBase, BaseGEP);
}

auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
GetElementPtrInst *GEP = LargeOffsetGEP->first;
Expand Down Expand Up @@ -6153,35 +6202,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
if (!NewBaseGEP) {
// Create a new base if we don't have one yet. Find the insertion
// pointer for the new base first.
BasicBlock::iterator NewBaseInsertPt;
BasicBlock *NewBaseInsertBB;
if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
// If the base of the struct is an instruction, the new base will be
// inserted close to it.
NewBaseInsertBB = BaseI->getParent();
if (isa<PHINode>(BaseI))
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
NewBaseInsertBB =
SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
} else
NewBaseInsertPt = std::next(BaseI->getIterator());
} else {
// If the current base is an argument or global value, the new base
// will be inserted to the entry block.
NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
}
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
// Create a new base.
Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
NewBaseGEP = OldBase;
if (NewBaseGEP->getType() != I8PtrTy)
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
NewBaseGEP =
NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
NewGEPBases.insert(NewBaseGEP);
createNewBase(BaseOffset, OldBase, GEP);
}

IRBuilder<> Builder(GEP);
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16070,6 +16070,20 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
AM.Scale);
}

// Check whether the 2 offsets belong to the same imm24 range, and their high
// 12bits are same, then their high part can be decoded with the offset of add.
int64_t
AArch64TargetLowering::getPreferredLargeGEPBaseOffset(int64_t MinOffset,
int64_t MaxOffset) const {
int64_t HighPart = MinOffset & ~0xfffULL;
if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) {
// Rebase the value to an integer multiple of imm12.
return HighPart;
}

return 0;
}

bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
// Consider splitting large offset of struct or array.
return true;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,9 @@ class AArch64TargetLowering : public TargetLowering {
unsigned AS,
Instruction *I = nullptr) const override;

int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
int64_t MaxOffset) const override;

/// Return true if an FMA operation is faster than a pair of fmul and fadd
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
Expand Down
86 changes: 86 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-addrmode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,89 @@ define void @t17(i64 %a) {
%3 = load volatile i64, ptr %2, align 8
ret void
}

define i32 @LdOffset_i8(ptr %a) {
; CHECK-LABEL: LdOffset_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrb w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
%conv = zext i8 %val to i32
ret i32 %conv
}

define i32 @LdOffset_i16(ptr %a) {
; CHECK-LABEL: LdOffset_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
%conv = sext i16 %val to i32
ret i32 %conv
}

define i32 @LdOffset_i32(ptr %a) {
; CHECK-LABEL: LdOffset_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 4
ret i32 %val
}

define i64 @LdOffset_i64_multi_offset(ptr %a) {
; CHECK-LABEL: LdOffset_i64_multi_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: add x8, x0, #2031, lsl #12 // =8318976
; CHECK-NEXT: ldr x9, [x8, #960]
; CHECK-NEXT: ldr x8, [x8, #3016]
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
%val0 = load i64, ptr %arrayidx, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %a, i64 1040249
%val1 = load i64, ptr %arrayidx1, align 8
%add = add nsw i64 %val1, %val0
ret i64 %add
}

define i64 @LdOffset_i64_multi_offset_with_commmon_base(ptr %a) {
; CHECK-LABEL: LdOffset_i64_multi_offset_with_commmon_base:
; CHECK: // %bb.0:
; CHECK-NEXT: add x8, x0, #507, lsl #12 // =2076672
; CHECK-NEXT: ldr x9, [x8, #26464]
; CHECK-NEXT: ldr x8, [x8, #26496]
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret
%b = getelementptr inbounds i16, ptr %a, i64 1038336
%arrayidx = getelementptr inbounds i64, ptr %b, i64 3308
%val0 = load i64, ptr %arrayidx, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %b, i64 3312
%val1 = load i64, ptr %arrayidx1, align 8
%add = add nsw i64 %val1, %val0
ret i64 %add
}

; Negative test: the offset is odd
define i32 @LdOffset_i16_odd_offset(ptr nocapture noundef readonly %a) {
; CHECK-LABEL: LdOffset_i16_odd_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56953 // =0xde79
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsh w0, [x0, x8]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039993
%val = load i16, ptr %arrayidx, align 2
%conv = sext i16 %val to i32
ret i32 %conv
}

57 changes: 25 additions & 32 deletions llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@
define void @test1(ptr %s, i32 %n) {
; CHECK-LABEL: test1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov w10, #40000 // =0x9c40
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: add x9, x9, x10
; CHECK-NEXT: cmp w8, w1
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB0_2
; CHECK-NEXT: .LBB0_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str w8, [x9, #4]
; CHECK-NEXT: add w8, w8, #1
; CHECK-NEXT: str w8, [x9]
; CHECK-NEXT: cmp w8, w1
; CHECK-NEXT: str w9, [x8, #3140]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB0_1
; CHECK-NEXT: .LBB0_2: // %while_end
; CHECK-NEXT: ret
Expand Down Expand Up @@ -47,16 +46,15 @@ define void @test2(ptr %struct, i32 %n) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cbz x0, .LBB1_3
; CHECK-NEXT: // %bb.1: // %while_cond.preheader
; CHECK-NEXT: mov w8, #40000 // =0x9c40
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x0, x8
; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB1_3
; CHECK-NEXT: .LBB1_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str w9, [x8, #4]
; CHECK-NEXT: str w9, [x8, #3140]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB1_2
; CHECK-NEXT: .LBB1_3: // %while_end
Expand Down Expand Up @@ -89,16 +87,15 @@ define void @test3(ptr %s1, ptr %s2, i1 %cond, i32 %n) {
; CHECK-NEXT: csel x8, x1, x0, ne
; CHECK-NEXT: cbz x8, .LBB2_3
; CHECK-NEXT: // %bb.1: // %while_cond.preheader
; CHECK-NEXT: mov w10, #40000 // =0x9c40
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x8, x10
; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w9, w3
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: .LBB2_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str w9, [x8, #4]
; CHECK-NEXT: str w9, [x8, #3140]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: cmp w9, w3
; CHECK-NEXT: b.lt .LBB2_2
; CHECK-NEXT: .LBB2_3: // %while_end
Expand Down Expand Up @@ -141,41 +138,38 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
; CHECK-NEXT: .cfi_personality 156, DW.ref.__FrameHandler
; CHECK-NEXT: .cfi_lsda 28, .Lexception0
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: mov w21, wzr
; CHECK-NEXT: mov w20, #40000 // =0x9c40
; CHECK-NEXT: mov w20, wzr
; CHECK-NEXT: .LBB3_1: // %while_cond
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: bl foo
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: // %bb.2: // %while_cond_x.split
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: add x8, x0, x20
; CHECK-NEXT: cmp w21, w19
; CHECK-NEXT: str wzr, [x8]
; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w20, w19
; CHECK-NEXT: str wzr, [x8, #3136]
; CHECK-NEXT: b.ge .LBB3_4
; CHECK-NEXT: // %bb.3: // %while_body
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: str w21, [x8, #4]
; CHECK-NEXT: add w21, w21, #1
; CHECK-NEXT: str w21, [x8]
; CHECK-NEXT: str w20, [x8, #3140]
; CHECK-NEXT: add w20, w20, #1
; CHECK-NEXT: str w20, [x8, #3136]
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_4: // %while_end
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w20
; CHECK-NEXT: .cfi_restore w21
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB3_5: // %cleanup
Expand Down Expand Up @@ -223,14 +217,13 @@ define void @test5(ptr %s, i32 %n) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x8, #19, lsl #12 // =77824
; CHECK-NEXT: add x8, x8, #2176
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str w9, [x8, #4]
; CHECK-NEXT: str w9, [x8, #2180]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: str w9, [x8, #2176]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB4_1
; CHECK-NEXT: .LBB4_2: // %while_end
Expand Down
Loading