Skip to content

[InstCombine] remove dead loads, such as memcpy from undef #143958

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/test/Misc/loop-opt-setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ int foo(void) {
// CHECK-NOT: br i1

void Helper(void) {
const int *nodes[5];
const int *nodes[5] = {0};
int num_active = 5;

while (num_active)
Expand Down
108 changes: 86 additions & 22 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3244,12 +3244,13 @@ static bool isRemovableWrite(CallBase &CB, Value *UsedV,
return Dest && Dest->Ptr == UsedV;
}

static bool isAllocSiteRemovable(Instruction *AI,
static std::optional<ModRefInfo> isAllocSiteRemovable(Instruction *AI,
SmallVectorImpl<WeakTrackingVH> &Users,
const TargetLibraryInfo &TLI) {
const TargetLibraryInfo &TLI, bool KnowInit) {
SmallVector<Instruction*, 4> Worklist;
const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
Worklist.push_back(AI);
ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod;

do {
Instruction *PI = Worklist.pop_back_val();
Expand All @@ -3258,7 +3259,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
switch (I->getOpcode()) {
default:
// Give up the moment we see something we can't handle.
return false;
return std::nullopt;

case Instruction::AddrSpaceCast:
case Instruction::BitCast:
Expand All @@ -3273,10 +3274,10 @@ static bool isAllocSiteRemovable(Instruction *AI,
// We also fold comparisons in some conditions provided the alloc has
// not escaped (see isNeverEqualToUnescapedAlloc).
if (!ICI->isEquality())
return false;
return std::nullopt;
unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
return false;
return std::nullopt;

// Do not fold compares to aligned_alloc calls, as they may have to
// return null in case the required alignment cannot be satisfied,
Expand All @@ -3296,7 +3297,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
!AlignmentAndSizeKnownValid(CB))
return false;
return std::nullopt;
Users.emplace_back(I);
continue;
}
Expand All @@ -3306,16 +3307,23 @@ static bool isAllocSiteRemovable(Instruction *AI,
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default:
return false;
return std::nullopt;

case Intrinsic::memmove:
case Intrinsic::memcpy:
case Intrinsic::memset: {
MemIntrinsic *MI = cast<MemIntrinsic>(II);
if (MI->isVolatile() || MI->getRawDest() != PI)
return false;
[[fallthrough]];
if (MI->isVolatile())
return std::nullopt;
// Note: this could also be ModRef, but we can still interpret that
// as just Mod in that case.
ModRefInfo NewAccess =
MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
return std::nullopt;
Access |= NewAccess;
}
[[fallthrough]];
case Intrinsic::assume:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
Expand All @@ -3332,11 +3340,6 @@ static bool isAllocSiteRemovable(Instruction *AI,
}
}

if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
Users.emplace_back(I);
continue;
}

if (Family && getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
getAllocationFamily(I, &TLI) == Family) {
Users.emplace_back(I);
Expand All @@ -3350,20 +3353,43 @@ static bool isAllocSiteRemovable(Instruction *AI,
continue;
}

return false;
if (!isRefSet(Access) &&
isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
Access |= ModRefInfo::Mod;
Users.emplace_back(I);
continue;
}

return std::nullopt;

case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(I);
if (SI->isVolatile() || SI->getPointerOperand() != PI)
return false;
return std::nullopt;
if (isRefSet(Access))
return std::nullopt;
Access |= ModRefInfo::Mod;
Users.emplace_back(I);
continue;
}

case Instruction::Load: {
LoadInst *LI = cast<LoadInst>(I);
if (LI->isVolatile() || LI->getPointerOperand() != PI)
return std::nullopt;
if (isModSet(Access))
return std::nullopt;
Access |= ModRefInfo::Ref;
Users.emplace_back(I);
continue;
}
}
llvm_unreachable("missing a return?");
}
} while (!Worklist.empty());
return true;

assert(Access != ModRefInfo::ModRef);
return Access;
}

Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
Expand Down Expand Up @@ -3391,10 +3417,30 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
}

if (isAllocSiteRemovable(&MI, Users, TLI)) {
// Determine what getInitialValueOfAllocation would return without actually
// allocating the result.
bool KnowInitUndef = false;
bool KnowInitZero = false;
Constant *Init = getInitialValueOfAllocation(
&MI, &TLI, Type::getInt8Ty(MI.getContext()));
if (Init) {
if (isa<UndefValue>(Init))
KnowInitUndef = true;
else if (Init->isNullValue())
KnowInitZero = true;
}
// The various sanitizers don't actually return undef memory, but rather
// memory initialized with special forms of runtime poison
auto &F = *MI.getFunction();
if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
F.hasFnAttribute(Attribute::SanitizeAddress))
KnowInitUndef = false;

auto Removable = isAllocSiteRemovable(&MI, Users, TLI, KnowInitZero | KnowInitUndef);
if (Removable) {
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
// Lowering all @llvm.objectsize calls first because they may
// use a bitcast/GEP of the alloca we are removing.
// Lowering all @llvm.objectsize and MTI calls first because they may use
// a bitcast/GEP of the alloca we are removing.
if (!Users[i])
continue;

Expand All @@ -3411,6 +3457,17 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
eraseInstFromFunction(*I);
Users[i] = nullptr; // Skip examining in the next loop.
}
if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
if (KnowInitZero && isRefSet(*Removable)) {
IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(MTI);
auto *M = Builder.CreateMemSet(
MTI->getRawDest(),
ConstantInt::get(Type::getInt8Ty(MI.getContext()), 0),
MTI->getLength(), MTI->getDestAlign());
M->copyMetadata(*MTI);
}
}
}
}
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
Expand All @@ -3433,7 +3490,14 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
} else {
// Casts, GEP, or anything else: we're about to delete this instruction,
// so it can not have any valid uses.
replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
Constant *Replace;
if (isa<LoadInst>(I)) {
assert(KnowInitZero || KnowInitUndef);
Replace = KnowInitUndef ? UndefValue::get(I->getType())
: Constant::getNullValue(I->getType());
} else
Replace = PoisonValue::get(I->getType());
replaceInstUsesWith(*I, Replace);
}
eraseInstFromFunction(*I);
}
Expand Down
74 changes: 64 additions & 10 deletions llvm/test/Transforms/InstCombine/and-or-icmps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -364,23 +364,77 @@ define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) {

define void @simplify_before_foldAndOfICmps(ptr %p) {
; CHECK-LABEL: @simplify_before_foldAndOfICmps(
; CHECK-NEXT: [[A8:%.*]] = alloca i16, align 2
; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8]], align 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably replace this load with an argument to retain test behavior?

; CHECK-NEXT: store i1 true, ptr [[P:%.*]], align 1
; CHECK-NEXT: store ptr null, ptr [[P]], align 8
; CHECK-NEXT: ret void
;
%A8 = alloca i16
%L7 = load i16, ptr %A8
%G21 = getelementptr i16, ptr %A8, i8 -1
%B11 = udiv i16 %L7, -1
%G4 = getelementptr i16, ptr %A8, i16 %B11
%L2 = load i16, ptr %G4
%L = load i16, ptr %G4
%B23 = mul i16 %B11, %B11
%L4 = load i16, ptr %A8
%B21 = sdiv i16 %L7, %L4
%B7 = sub i16 0, %B21
%B18 = mul i16 %B23, %B7
%C10 = icmp ugt i16 %L, %B11
%B20 = and i16 %L7, %L2
%B1 = mul i1 %C10, true
%C5 = icmp sle i16 %B21, %L
%C11 = icmp ule i16 %B21, %L
%C7 = icmp slt i16 %B20, 0
%B29 = srem i16 %L4, %B18
%B15 = add i1 %C7, %C10
%B19 = add i1 %C11, %B15
%C6 = icmp sge i1 %C11, %B19
%B33 = or i16 %B29, %L4
%C13 = icmp uge i1 %C5, %B1
%C3 = icmp ult i1 %C13, %C6
store i16 undef, ptr %G21
%C18 = icmp ule i1 %C10, %C7
%G26 = getelementptr i1, ptr null, i1 %C3
store i16 %B33, ptr %p
store i1 %C18, ptr %p
store ptr %G26, ptr %p
ret void
}

define void @simplify_before_foldAndOfICmps2(ptr %p, ptr %A8) "instcombine-no-verify-fixpoint" {
; CHECK-LABEL: @simplify_before_foldAndOfICmps2(
; CHECK-NEXT: [[L7:%.*]] = load i16, ptr [[A8:%.*]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[L7]], -1
; CHECK-NEXT: [[B11:%.*]] = zext i1 [[TMP1]] to i16
; CHECK-NEXT: [[C10:%.*]] = icmp ugt i16 [[L7]], [[B11]]
; CHECK-NEXT: [[C7:%.*]] = icmp slt i16 [[L7]], 0
; CHECK-NEXT: [[C3:%.*]] = and i1 [[C7]], [[C10]]
; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[C10]], true
; CHECK-NEXT: [[C18:%.*]] = or i1 [[C7]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[C3]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i64
; CHECK-NEXT: [[G4:%.*]] = getelementptr i16, ptr [[A8]], i64 [[TMP2]]
; CHECK-NEXT: [[L2:%.*]] = load i16, ptr [[G4]], align 2
; CHECK-NEXT: [[L4:%.*]] = load i16, ptr [[A8]], align 2
; CHECK-NEXT: [[B21:%.*]] = sdiv i16 [[L7]], [[L4]]
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i16 [[B21]], i16 0
; CHECK-NEXT: [[B18:%.*]] = sub i16 0, [[TMP5]]
; CHECK-NEXT: [[C11:%.*]] = icmp ugt i16 [[L2]], [[B11]]
; CHECK-NEXT: [[B20:%.*]] = and i16 [[L7]], [[L2]]
; CHECK-NEXT: [[C5:%.*]] = icmp sgt i16 [[B21]], [[L2]]
; CHECK-NEXT: [[C12:%.*]] = icmp ule i16 [[B21]], [[L2]]
; CHECK-NEXT: [[C10:%.*]] = icmp slt i16 [[B20]], 0
; CHECK-NEXT: [[B29:%.*]] = srem i16 [[L4]], [[B18]]
; CHECK-NEXT: [[B15:%.*]] = xor i1 [[C10]], [[C11]]
; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[C12]], [[B15]]
; CHECK-NEXT: [[C6:%.*]] = xor i1 [[TMP6]], true
; CHECK-NEXT: [[B33:%.*]] = or i16 [[B29]], [[L4]]
; CHECK-NEXT: [[C3:%.*]] = and i1 [[C5]], [[C6]]
; CHECK-NEXT: [[C4:%.*]] = and i1 [[C3]], [[C11]]
; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[C11]], true
; CHECK-NEXT: [[C18:%.*]] = or i1 [[C10]], [[TMP4]]
; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[C4]] to i64
; CHECK-NEXT: [[G26:%.*]] = getelementptr i1, ptr null, i64 [[TMP3]]
; CHECK-NEXT: store i16 [[L7]], ptr [[P:%.*]], align 2
; CHECK-NEXT: store i16 [[B33]], ptr [[P:%.*]], align 2
; CHECK-NEXT: store i1 [[C18]], ptr [[P]], align 1
; CHECK-NEXT: store ptr [[G26]], ptr [[P]], align 8
; CHECK-NEXT: ret void
;
%A8 = alloca i16
%L7 = load i16, ptr %A8
%G21 = getelementptr i16, ptr %A8, i8 -1
%B11 = udiv i16 %L7, -1
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/Transforms/InstCombine/apint-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -562,11 +562,10 @@ define i40 @test26(i40 %A) {

; OSS-Fuzz #9880
; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9880
define i177 @ossfuzz_9880(i177 %X) {
define i177 @ossfuzz_9880(i177 %X, ptr %A) {
; CHECK-LABEL: @ossfuzz_9880(
; CHECK-NEXT: ret i177 0
;
%A = alloca i177
%L1 = load i177, ptr %A
%B = or i177 0, -1
%B5 = udiv i177 %L1, %B
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/Transforms/InstCombine/call-cast-target.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,19 +110,17 @@ entry:

declare i1 @fn5(ptr byval({ i32, i32 }) align 4 %r)

define i1 @test5() {
; CHECK-LABEL: define i1 @test5() {
; CHECK-NEXT: [[TMP1:%.*]] = alloca { i32, i32 }, align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 4
define i1 @test5(ptr %ptr) {
; CHECK-LABEL: define i1 @test5(ptr %ptr) {
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i32 4
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @fn5(i32 [[TMP2]], i32 [[TMP4]])
; CHECK-NEXT: ret i1 [[TMP5]]
;
%1 = alloca { i32, i32 }, align 4
%2 = getelementptr inbounds { i32, i32 }, ptr %1, i32 0, i32 0
%2 = getelementptr inbounds { i32, i32 }, ptr %ptr, i32 0, i32 0
%3 = load i32, ptr %2, align 4
%4 = getelementptr inbounds { i32, i32 }, ptr %1, i32 0, i32 1
%4 = getelementptr inbounds { i32, i32 }, ptr %ptr, i32 0, i32 1
%5 = load i32, ptr %4, align 4
%6 = call i1 @fn5(i32 %3, i32 %5)
ret i1 %6
Expand Down
Loading
Loading