-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Forward memcpy source to load instruction #140249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Reducing the overhead caused by `make_range(++Load->getReverseIterator(), ScanBB->rend())`.
@llvm/pr-subscribers-llvm-transforms Author: dianqk (dianqk) ChangesFixes #137810. Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=059b0c2efbf30d986d812c4d2cf6d6c7876569fe&to=bb5edb394bb8983c5d3eacbaa3c3491504dd549b&stat=instructions%3Au. Full diff: https://github.com/llvm/llvm-project/pull/140249.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 639070c07897b..94d761379a9c5 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -153,9 +153,10 @@ Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
/// This overload provides a more efficient implementation of
/// FindAvailableLoadedValue() for the case where we are not interested in
/// finding the closest clobbering instruction if no available load is found.
-/// This overload cannot be used to scan across multiple blocks.
+/// This overload cannot be used to scan across multiple blocks. If a memcpy is
+/// returned, it indicates that we can load from its source.
Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
- bool *IsLoadCSE,
+ bool *IsLoadCSE, int64_t &Offset,
unsigned MaxInstsToScan = DefMaxInstsToScan);
/// Scan backwards to see if we have the value of the given pointer available
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 425f3682122cd..f766331dab2f1 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -713,8 +713,31 @@ Value *llvm::findAvailablePtrLoadStore(
return nullptr;
}
+static Value *availableMemCpySrc(LoadInst *LI, MemCpyInst *MemCpy,
+ int64_t &Offset) {
+ if (!LI->isSimple() || MemCpy->isVolatile())
+ return nullptr;
+ const DataLayout &DL = LI->getDataLayout();
+ u_int64_t Size = DL.getTypeStoreSize(LI->getType()).getKnownMinValue();
+ if (Size == 0)
+ return nullptr;
+ Value *OldSrc = LI->getPointerOperand();
+
+ if (OldSrc != MemCpy->getDest()) {
+ std::optional<int64_t> PointerOffset =
+ OldSrc->getPointerOffsetFrom(MemCpy->getDest(), DL);
+ if (!PointerOffset || *PointerOffset < 0)
+ return nullptr;
+ Offset = *PointerOffset;
+ }
+ auto *CopyLen = dyn_cast<ConstantInt>(MemCpy->getLength());
+ if (!CopyLen || CopyLen->getZExtValue() < Size + Offset)
+ return nullptr;
+ return MemCpy;
+}
+
Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
- bool *IsLoadCSE,
+ bool *IsLoadCSE, int64_t &Offset,
unsigned MaxInstsToScan) {
const DataLayout &DL = Load->getDataLayout();
Value *StrippedPtr = Load->getPointerOperand()->stripPointerCasts();
@@ -739,6 +762,9 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
Available = getAvailableLoadStore(&Inst, StrippedPtr, AccessTy,
AtLeastAtomic, DL, IsLoadCSE);
+ if (auto *MemCpy = dyn_cast<MemCpyInst>(&Inst))
+ Available = availableMemCpySrc(Load, MemCpy, Offset);
+
if (Available)
break;
@@ -753,6 +779,12 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
for (Instruction *Inst : MustNotAliasInsts)
if (isModSet(AA.getModRefInfo(Inst, Loc)))
return nullptr;
+ if (auto *MemCpy = dyn_cast<MemCpyInst>(Available)) {
+ MemoryLocation Loc = MemoryLocation::getForSource(MemCpy);
+ for (Instruction *Inst : MustNotAliasInsts)
+ if (isModSet(AA.getModRefInfo(Inst, Loc)))
+ return nullptr;
+ }
}
return Available;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c29cba6f675c5..cf0ebc9fd043f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1053,13 +1053,45 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
// separated by a few arithmetic operations.
bool IsLoadCSE = false;
BatchAAResults BatchAA(*AA);
- if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
+ int64_t Offset = 0;
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE, Offset)) {
if (IsLoadCSE)
combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
- return replaceInstUsesWith(
- LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
- LI.getName() + ".cast"));
+ /// Perform simplification of load's. If we have memcpy A which copies X to
+ /// Y, and load instruction B which loads from Y, then we can rewrite B to
+ /// be a load instruction loads from X. This allows later passes to remove
+ /// the memcpy A or identify the source of the load instruction.
+ if (auto *MemCpy = dyn_cast<MemCpyInst>(AvailableVal)) {
+ Value *NewSrc = MemCpy->getSource();
+ Value *OldSrc = LI.getPointerOperand();
+ MaybeAlign NewAlign = MemCpy->getSourceAlign();
+ if (Offset != 0) {
+ if (NewAlign.has_value())
+ NewAlign = commonAlignment(*NewAlign, Offset);
+ // Avoid increasing instructions
+ if (isa<Instruction>(OldSrc) && OldSrc->hasOneUse())
+ NewSrc =
+ Builder.CreateInBoundsPtrAdd(NewSrc, Builder.getInt64(Offset));
+ else
+ NewSrc = nullptr;
+ }
+ // Avoid infinite loops
+ if (NewSrc && !BatchAA.isMustAlias(OldSrc, NewSrc))
+ AvailableVal = Builder.CreateAlignedLoad(LI.getType(), NewSrc, NewAlign,
+ LI.getName());
+ else {
+ AvailableVal = nullptr;
+ if (NewSrc && NewSrc->use_empty())
+ cast<Instruction>(NewSrc)->eraseFromParent();
+ }
+ } else
+ AvailableVal = Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
+ LI.getName() + ".cast");
+
+ if (AvailableVal)
+ return replaceInstUsesWith(LI, AvailableVal);
}
// None of the following transforms are legal for volatile/ordered atomic
diff --git a/llvm/test/Transforms/InstCombine/memcpy-forward-load.ll b/llvm/test/Transforms/InstCombine/memcpy-forward-load.ll
new file mode 100644
index 0000000000000..7a56bb50b0903
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy-forward-load.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i24 @forward_load(ptr align 4 %src) {
+; CHECK-LABEL: define i24 @forward_load(
+; CHECK-SAME: ptr align 4 [[SRC:%.*]]) {
+; CHECK-NEXT: [[VAL1:%.*]] = load i24, ptr [[SRC]], align 4
+; CHECK-NEXT: ret i24 [[VAL1]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %val = load i24, ptr %dest
+ ret i24 %val
+}
+
+define i8 @forward_load_gep(ptr %src) {
+; CHECK-LABEL: define i8 @forward_load_gep(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC]], i64 2
+; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %gep = getelementptr inbounds i8, ptr %dest, i64 2
+ %val = load i8, ptr %gep
+ ret i8 %val
+}
+
+define i17 @forward_load_padding(ptr %src) {
+; CHECK-LABEL: define i17 @forward_load_padding(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[VAL:%.*]] = load i17, ptr [[SRC]], align 1
+; CHECK-NEXT: ret i17 [[VAL]]
+;
+ %dest = alloca [5 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %val = load i17, ptr %dest
+ ret i17 %val
+}
+
+define <2 x i8> @forward_load_vector(ptr %src) {
+; CHECK-LABEL: define <2 x i8> @forward_load_vector(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[SRC]], align 1
+; CHECK-NEXT: ret <2 x i8> [[TMP1]]
+;
+ %dest = alloca <2 x i8>
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
+ %val = load <2 x i8>, ptr %dest
+ ret <2 x i8> %val
+}
+
+; Negative tests
+
+define i24 @forward_load_volatile(ptr %src) {
+; CHECK-LABEL: define i24 @forward_load_volatile(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC]], i64 3, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load volatile i24, ptr [[DEST]], align 4
+; CHECK-NEXT: ret i24 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %val = load volatile i24, ptr %dest
+ ret i24 %val
+}
+
+define i24 @failed_forward_load_write_src(ptr %src) {
+; CHECK-LABEL: define i24 @failed_forward_load_write_src(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC]], i64 3, i1 false)
+; CHECK-NEXT: store i1 true, ptr [[SRC]], align 1
+; CHECK-NEXT: [[VAL:%.*]] = load i24, ptr [[DEST]], align 4
+; CHECK-NEXT: ret i24 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ store i1 true, ptr %src
+ %val = load i24, ptr %dest
+ ret i24 %val
+}
+
+define i24 @failed_forward_load_write_dest(ptr %src) {
+; CHECK-LABEL: define i24 @failed_forward_load_write_dest(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC]], i64 3, i1 false)
+; CHECK-NEXT: store i1 true, ptr [[DEST]], align 1
+; CHECK-NEXT: [[VAL:%.*]] = load i24, ptr [[DEST]], align 4
+; CHECK-NEXT: ret i24 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ store i1 true, ptr %dest
+ %val = load i24, ptr %dest
+ ret i24 %val
+}
+
+define i16 @failed_forward_load_size(ptr %src) {
+; CHECK-LABEL: define i16 @failed_forward_load_size(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[SRC]], align 1
+; CHECK-NEXT: store i8 [[TMP1]], ptr [[DEST]], align 1
+; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[DEST]], align 2
+; CHECK-NEXT: ret i16 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false)
+ %val = load i16, ptr %dest
+ ret i16 %val
+}
+
+define i8 @failed_forward_load_gep(ptr %src) {
+; CHECK-LABEL: define i8 @failed_forward_load_gep(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 1
+; CHECK-NEXT: store i16 [[TMP1]], ptr [[DEST]], align 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[DEST]], i64 2
+; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
+ %gep = getelementptr inbounds i8, ptr %dest, i64 2
+ %val = load i8, ptr %gep
+ ret i8 %val
+}
+
+define i8 @failed_forward_load_gep_multi_use(ptr %src) {
+; CHECK-LABEL: define i8 @failed_forward_load_gep_multi_use(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC]], i64 3, i1 false)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[DEST]], i64 2
+; CHECK-NEXT: [[VAL1:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: call void @use_ptr(ptr nonnull [[GEP]])
+; CHECK-NEXT: ret i8 [[VAL1]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %gep = getelementptr inbounds i8, ptr %dest, i64 2
+ %val = load i8, ptr %gep
+ call void @use_ptr(ptr %gep)
+ ret i8 %val
+}
+
+define i24 @failed_forward_load_must_alias(ptr %src) {
+; CHECK-LABEL: define i24 @failed_forward_load_must_alias(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC]], i64 2
+; CHECK-NEXT: [[DEST_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC]], i64 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST_GEP]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC_GEP]], i64 3, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load i24, ptr [[DEST_GEP]], align 4
+; CHECK-NEXT: ret i24 [[VAL]]
+;
+ %src_gep = getelementptr inbounds i8, ptr %src, i64 2
+ %dest_gep = getelementptr inbounds i8, ptr %src, i64 2
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest_gep, ptr %src_gep, i64 3, i1 false)
+ %val = load i24, ptr %dest_gep
+ ret i24 %val
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
+declare void @use_ptr(ptr)
diff --git a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
index f084fe38bb226..431870155ae83 100644
--- a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
+++ b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
@@ -208,8 +208,7 @@ define i32 @test_memcpy_after_phi(i1 %cond, ptr %ptr) {
; CHECK: join:
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PHI]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false)
-; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4
-; CHECK-NEXT: ret i32 [[V]]
+; CHECK-NEXT: ret i32 0
;
entry:
%a = alloca [32 x i8]
@@ -384,8 +383,7 @@ define i8 @select_after_memcpy_keep_alloca(i1 %cond, ptr %p) {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 1
; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND:%.*]], ptr [[ALLOCA]], ptr [[P:%.*]]
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PTR]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1
-; CHECK-NEXT: ret i8 [[LOAD]]
+; CHECK-NEXT: ret i8 0
;
entry:
%alloca = alloca [32 x i8]
diff --git a/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll b/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll
new file mode 100644
index 0000000000000..d5dc213e6d6b6
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -O2 -S < %s | FileCheck %s
+
+define i1 @main(ptr %i2) {
+; CHECK-LABEL: define noundef i1 @main(
+; CHECK-SAME: ptr writeonly captures(none) initializes((0, 3)) [[I2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[COMMON_RET:.*:]]
+; CHECK-NEXT: store i8 0, ptr [[I2]], align 1
+; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds nuw i8, ptr [[I2]], i64 1
+; CHECK-NEXT: store i8 1, ptr [[I3]], align 1
+; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds nuw i8, ptr [[I2]], i64 2
+; CHECK-NEXT: store i8 2, ptr [[I4]], align 1
+; CHECK-NEXT: ret i1 true
+;
+ %i1 = alloca [3 x i8], align 1
+ store i8 0, ptr %i2, align 1
+ %i3 = getelementptr inbounds nuw i8, ptr %i2, i64 1
+ store i8 1, ptr %i3, align 1
+ %i4 = getelementptr inbounds nuw i8, ptr %i2, i64 2
+ store i8 2, ptr %i4, align 1
+ call void @llvm.lifetime.start.p0(i64 3, ptr nonnull %i1)
+ call void @llvm.memcpy.p0.p0.i64(ptr %i1, ptr %i2, i64 3, i1 false)
+ %i5 = load i8, ptr %i1, align 1
+ %i6 = icmp eq i8 %i5, 0
+ %i7 = getelementptr inbounds nuw i8, ptr %i1, i64 1
+ %i8 = load i8, ptr %i7, align 1
+ %i9 = icmp eq i8 %i8, 1
+ %i10 = select i1 %i6, i1 %i9, i1 false
+ %i11 = getelementptr inbounds nuw i8, ptr %i1, i64 2
+ %i12 = load i8, ptr %i11, align 1
+ %i13 = icmp eq i8 %i12, 2
+ %i14 = select i1 %i10, i1 %i13, i1 false
+ br i1 %i14, label %true, label %false
+
+true:
+ call void @llvm.lifetime.end.p0(i64 3, ptr nonnull %i1)
+ ret i1 true
+
+false:
+ call void @assert_failed(ptr %i1)
+ ret i1 false
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
+declare void @llvm.lifetime.start.p0(i64, ptr)
+declare void @llvm.lifetime.end.p0(i64, ptr)
+declare void @assert_failed(ptr)
|
@llvm/pr-subscribers-llvm-analysis Author: dianqk (dianqk) ChangesFixes #137810. Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=059b0c2efbf30d986d812c4d2cf6d6c7876569fe&to=bb5edb394bb8983c5d3eacbaa3c3491504dd549b&stat=instructions%3Au. Full diff: https://github.com/llvm/llvm-project/pull/140249.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 639070c07897b..94d761379a9c5 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -153,9 +153,10 @@ Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
/// This overload provides a more efficient implementation of
/// FindAvailableLoadedValue() for the case where we are not interested in
/// finding the closest clobbering instruction if no available load is found.
-/// This overload cannot be used to scan across multiple blocks.
+/// This overload cannot be used to scan across multiple blocks. If a memcpy is
+/// returned, it indicates that we can load from its source.
Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
- bool *IsLoadCSE,
+ bool *IsLoadCSE, int64_t &Offset,
unsigned MaxInstsToScan = DefMaxInstsToScan);
/// Scan backwards to see if we have the value of the given pointer available
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 425f3682122cd..f766331dab2f1 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -713,8 +713,31 @@ Value *llvm::findAvailablePtrLoadStore(
return nullptr;
}
+static Value *availableMemCpySrc(LoadInst *LI, MemCpyInst *MemCpy,
+ int64_t &Offset) {
+ if (!LI->isSimple() || MemCpy->isVolatile())
+ return nullptr;
+ const DataLayout &DL = LI->getDataLayout();
+ u_int64_t Size = DL.getTypeStoreSize(LI->getType()).getKnownMinValue();
+ if (Size == 0)
+ return nullptr;
+ Value *OldSrc = LI->getPointerOperand();
+
+ if (OldSrc != MemCpy->getDest()) {
+ std::optional<int64_t> PointerOffset =
+ OldSrc->getPointerOffsetFrom(MemCpy->getDest(), DL);
+ if (!PointerOffset || *PointerOffset < 0)
+ return nullptr;
+ Offset = *PointerOffset;
+ }
+ auto *CopyLen = dyn_cast<ConstantInt>(MemCpy->getLength());
+ if (!CopyLen || CopyLen->getZExtValue() < Size + Offset)
+ return nullptr;
+ return MemCpy;
+}
+
Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
- bool *IsLoadCSE,
+ bool *IsLoadCSE, int64_t &Offset,
unsigned MaxInstsToScan) {
const DataLayout &DL = Load->getDataLayout();
Value *StrippedPtr = Load->getPointerOperand()->stripPointerCasts();
@@ -739,6 +762,9 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
Available = getAvailableLoadStore(&Inst, StrippedPtr, AccessTy,
AtLeastAtomic, DL, IsLoadCSE);
+ if (auto *MemCpy = dyn_cast<MemCpyInst>(&Inst))
+ Available = availableMemCpySrc(Load, MemCpy, Offset);
+
if (Available)
break;
@@ -753,6 +779,12 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
for (Instruction *Inst : MustNotAliasInsts)
if (isModSet(AA.getModRefInfo(Inst, Loc)))
return nullptr;
+ if (auto *MemCpy = dyn_cast<MemCpyInst>(Available)) {
+ MemoryLocation Loc = MemoryLocation::getForSource(MemCpy);
+ for (Instruction *Inst : MustNotAliasInsts)
+ if (isModSet(AA.getModRefInfo(Inst, Loc)))
+ return nullptr;
+ }
}
return Available;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c29cba6f675c5..cf0ebc9fd043f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1053,13 +1053,45 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
// separated by a few arithmetic operations.
bool IsLoadCSE = false;
BatchAAResults BatchAA(*AA);
- if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
+ int64_t Offset = 0;
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE, Offset)) {
if (IsLoadCSE)
combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
- return replaceInstUsesWith(
- LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
- LI.getName() + ".cast"));
+ /// Perform simplification of load's. If we have memcpy A which copies X to
+ /// Y, and load instruction B which loads from Y, then we can rewrite B to
+ /// be a load instruction loads from X. This allows later passes to remove
+ /// the memcpy A or identify the source of the load instruction.
+ if (auto *MemCpy = dyn_cast<MemCpyInst>(AvailableVal)) {
+ Value *NewSrc = MemCpy->getSource();
+ Value *OldSrc = LI.getPointerOperand();
+ MaybeAlign NewAlign = MemCpy->getSourceAlign();
+ if (Offset != 0) {
+ if (NewAlign.has_value())
+ NewAlign = commonAlignment(*NewAlign, Offset);
+ // Avoid increasing instructions
+ if (isa<Instruction>(OldSrc) && OldSrc->hasOneUse())
+ NewSrc =
+ Builder.CreateInBoundsPtrAdd(NewSrc, Builder.getInt64(Offset));
+ else
+ NewSrc = nullptr;
+ }
+ // Avoid infinite loops
+ if (NewSrc && !BatchAA.isMustAlias(OldSrc, NewSrc))
+ AvailableVal = Builder.CreateAlignedLoad(LI.getType(), NewSrc, NewAlign,
+ LI.getName());
+ else {
+ AvailableVal = nullptr;
+ if (NewSrc && NewSrc->use_empty())
+ cast<Instruction>(NewSrc)->eraseFromParent();
+ }
+ } else
+ AvailableVal = Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
+ LI.getName() + ".cast");
+
+ if (AvailableVal)
+ return replaceInstUsesWith(LI, AvailableVal);
}
// None of the following transforms are legal for volatile/ordered atomic
diff --git a/llvm/test/Transforms/InstCombine/memcpy-forward-load.ll b/llvm/test/Transforms/InstCombine/memcpy-forward-load.ll
new file mode 100644
index 0000000000000..7a56bb50b0903
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy-forward-load.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i24 @forward_load(ptr align 4 %src) {
+; CHECK-LABEL: define i24 @forward_load(
+; CHECK-SAME: ptr align 4 [[SRC:%.*]]) {
+; CHECK-NEXT: [[VAL1:%.*]] = load i24, ptr [[SRC]], align 4
+; CHECK-NEXT: ret i24 [[VAL1]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %val = load i24, ptr %dest
+ ret i24 %val
+}
+
+define i8 @forward_load_gep(ptr %src) {
+; CHECK-LABEL: define i8 @forward_load_gep(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC]], i64 2
+; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %gep = getelementptr inbounds i8, ptr %dest, i64 2
+ %val = load i8, ptr %gep
+ ret i8 %val
+}
+
+define i17 @forward_load_padding(ptr %src) {
+; CHECK-LABEL: define i17 @forward_load_padding(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[VAL:%.*]] = load i17, ptr [[SRC]], align 1
+; CHECK-NEXT: ret i17 [[VAL]]
+;
+ %dest = alloca [5 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %val = load i17, ptr %dest
+ ret i17 %val
+}
+
+define <2 x i8> @forward_load_vector(ptr %src) {
+; CHECK-LABEL: define <2 x i8> @forward_load_vector(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[SRC]], align 1
+; CHECK-NEXT: ret <2 x i8> [[TMP1]]
+;
+ %dest = alloca <2 x i8>
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
+ %val = load <2 x i8>, ptr %dest
+ ret <2 x i8> %val
+}
+
+; Negative tests
+
+define i24 @forward_load_volatile(ptr %src) {
+; CHECK-LABEL: define i24 @forward_load_volatile(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC]], i64 3, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load volatile i24, ptr [[DEST]], align 4
+; CHECK-NEXT: ret i24 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %val = load volatile i24, ptr %dest
+ ret i24 %val
+}
+
+define i24 @failed_forward_load_write_src(ptr %src) {
+; CHECK-LABEL: define i24 @failed_forward_load_write_src(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC]], i64 3, i1 false)
+; CHECK-NEXT: store i1 true, ptr [[SRC]], align 1
+; CHECK-NEXT: [[VAL:%.*]] = load i24, ptr [[DEST]], align 4
+; CHECK-NEXT: ret i24 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ store i1 true, ptr %src
+ %val = load i24, ptr %dest
+ ret i24 %val
+}
+
+define i24 @failed_forward_load_write_dest(ptr %src) {
+; CHECK-LABEL: define i24 @failed_forward_load_write_dest(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC]], i64 3, i1 false)
+; CHECK-NEXT: store i1 true, ptr [[DEST]], align 1
+; CHECK-NEXT: [[VAL:%.*]] = load i24, ptr [[DEST]], align 4
+; CHECK-NEXT: ret i24 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ store i1 true, ptr %dest
+ %val = load i24, ptr %dest
+ ret i24 %val
+}
+
+define i16 @failed_forward_load_size(ptr %src) {
+; CHECK-LABEL: define i16 @failed_forward_load_size(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[SRC]], align 1
+; CHECK-NEXT: store i8 [[TMP1]], ptr [[DEST]], align 1
+; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[DEST]], align 2
+; CHECK-NEXT: ret i16 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false)
+ %val = load i16, ptr %dest
+ ret i16 %val
+}
+
+define i8 @failed_forward_load_gep(ptr %src) {
+; CHECK-LABEL: define i8 @failed_forward_load_gep(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 1
+; CHECK-NEXT: store i16 [[TMP1]], ptr [[DEST]], align 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[DEST]], i64 2
+; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[VAL]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
+ %gep = getelementptr inbounds i8, ptr %dest, i64 2
+ %val = load i8, ptr %gep
+ ret i8 %val
+}
+
+define i8 @failed_forward_load_gep_multi_use(ptr %src) {
+; CHECK-LABEL: define i8 @failed_forward_load_gep_multi_use(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[DEST:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC]], i64 3, i1 false)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[DEST]], i64 2
+; CHECK-NEXT: [[VAL1:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: call void @use_ptr(ptr nonnull [[GEP]])
+; CHECK-NEXT: ret i8 [[VAL1]]
+;
+ %dest = alloca [3 x i8]
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+ %gep = getelementptr inbounds i8, ptr %dest, i64 2
+ %val = load i8, ptr %gep
+ call void @use_ptr(ptr %gep)
+ ret i8 %val
+}
+
+define i24 @failed_forward_load_must_alias(ptr %src) {
+; CHECK-LABEL: define i24 @failed_forward_load_must_alias(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC]], i64 2
+; CHECK-NEXT: [[DEST_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC]], i64 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DEST_GEP]], ptr noundef nonnull align 1 dereferenceable(3) [[SRC_GEP]], i64 3, i1 false)
+; CHECK-NEXT: [[VAL:%.*]] = load i24, ptr [[DEST_GEP]], align 4
+; CHECK-NEXT: ret i24 [[VAL]]
+;
+ %src_gep = getelementptr inbounds i8, ptr %src, i64 2
+ %dest_gep = getelementptr inbounds i8, ptr %src, i64 2
+ call void @llvm.memcpy.p0.p0.i64(ptr %dest_gep, ptr %src_gep, i64 3, i1 false)
+ %val = load i24, ptr %dest_gep
+ ret i24 %val
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
+declare void @use_ptr(ptr)
diff --git a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
index f084fe38bb226..431870155ae83 100644
--- a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
+++ b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
@@ -208,8 +208,7 @@ define i32 @test_memcpy_after_phi(i1 %cond, ptr %ptr) {
; CHECK: join:
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PHI]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false)
-; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4
-; CHECK-NEXT: ret i32 [[V]]
+; CHECK-NEXT: ret i32 0
;
entry:
%a = alloca [32 x i8]
@@ -384,8 +383,7 @@ define i8 @select_after_memcpy_keep_alloca(i1 %cond, ptr %p) {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 1
; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND:%.*]], ptr [[ALLOCA]], ptr [[P:%.*]]
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PTR]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1
-; CHECK-NEXT: ret i8 [[LOAD]]
+; CHECK-NEXT: ret i8 0
;
entry:
%alloca = alloca [32 x i8]
diff --git a/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll b/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll
new file mode 100644
index 0000000000000..d5dc213e6d6b6
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/pr137810-forward-load.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -O2 -S < %s | FileCheck %s
+
+define i1 @main(ptr %i2) {
+; CHECK-LABEL: define noundef i1 @main(
+; CHECK-SAME: ptr writeonly captures(none) initializes((0, 3)) [[I2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[COMMON_RET:.*:]]
+; CHECK-NEXT: store i8 0, ptr [[I2]], align 1
+; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds nuw i8, ptr [[I2]], i64 1
+; CHECK-NEXT: store i8 1, ptr [[I3]], align 1
+; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds nuw i8, ptr [[I2]], i64 2
+; CHECK-NEXT: store i8 2, ptr [[I4]], align 1
+; CHECK-NEXT: ret i1 true
+;
+ %i1 = alloca [3 x i8], align 1
+ store i8 0, ptr %i2, align 1
+ %i3 = getelementptr inbounds nuw i8, ptr %i2, i64 1
+ store i8 1, ptr %i3, align 1
+ %i4 = getelementptr inbounds nuw i8, ptr %i2, i64 2
+ store i8 2, ptr %i4, align 1
+ call void @llvm.lifetime.start.p0(i64 3, ptr nonnull %i1)
+ call void @llvm.memcpy.p0.p0.i64(ptr %i1, ptr %i2, i64 3, i1 false)
+ %i5 = load i8, ptr %i1, align 1
+ %i6 = icmp eq i8 %i5, 0
+ %i7 = getelementptr inbounds nuw i8, ptr %i1, i64 1
+ %i8 = load i8, ptr %i7, align 1
+ %i9 = icmp eq i8 %i8, 1
+ %i10 = select i1 %i6, i1 %i9, i1 false
+ %i11 = getelementptr inbounds nuw i8, ptr %i1, i64 2
+ %i12 = load i8, ptr %i11, align 1
+ %i13 = icmp eq i8 %i12, 2
+ %i14 = select i1 %i10, i1 %i13, i1 false
+ br i1 %i14, label %true, label %false
+
+true:
+ call void @llvm.lifetime.end.p0(i64 3, ptr nonnull %i1)
+ ret i1 true
+
+false:
+ call void @assert_failed(ptr %i1)
+ ret i1 false
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
+declare void @llvm.lifetime.start.p0(i64, ptr)
+declare void @llvm.lifetime.end.p0(i64, ptr)
+declare void @assert_failed(ptr)
|
} | ||
// Avoid infinite loops | ||
if (NewSrc && !BatchAA.isMustAlias(OldSrc, NewSrc)) | ||
AvailableVal = Builder.CreateAlignedLoad(LI.getType(), NewSrc, NewAlign, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some metadata may be safe to propagate.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider the following case:
memcpy(dst, src, len);
%x = load dst
br %cond, %then, %else
then:
%y = load dst
This patch forwards the memcpy source pointer to %x
, but not for %y
. If %x
cannot be simplified eventually, the resulting IR is:
memcpy(dst, src, len);
%x = load src
br %cond, %then, %else
then:
%y = load dst
This regression is common in real-world programs: dtcxzyw/llvm-opt-benchmark#2354 (comment)
Can we start in a more conservative way, by ensuring that the load can be CSEed (query FindAvailableLoadedValue
recursively with forwarded memcpy source), or the memcpy has only one user in MemorySSA?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Crash reproducer:
; bin/opt -passes=instcombine reduced.ll -S
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.S0 = type { i32, i32, i16 }
@g_1092 = external global [1 x [4 x { i32, i32, i16, [2 x i8] }]]
define i8 @func_124(i32 %storemerge740) {
entry:
br label %for.cond554
for.cond554: ; preds = %for.cond554, %entry
%idxprom578 = zext i32 %storemerge740 to i64
%arrayidx584 = getelementptr [1 x [4 x %struct.S0]], ptr @g_1092, i64 0, i64 %idxprom578, i64 1
%arrayidx589 = getelementptr [1 x [4 x %struct.S0]], ptr @g_1092, i64 0, i64 %idxprom578, i64 1
call void @llvm.memcpy.p0.p0.i64(ptr %arrayidx584, ptr %arrayidx589, i64 12, i1 false)
%f1597 = getelementptr [1 x [4 x %struct.S0]], ptr @g_1092, i64 0, i64 %idxprom578, i64 1, i32 1
%0 = load i32, ptr %f1597, align 4
%tobool598.not = icmp eq i32 %0, 0
br i1 %tobool598.not, label %for.cond601, label %for.cond554
for.cond601: ; preds = %for.cond554
ret i8 0
}
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0. Program arguments: bin/opt -passes=instcombine reduced.ll -S
1. Running pass "function(instcombine<max-iterations=1;verify-fixpoint>)" on module "reduced.ll"
2. Running pass "instcombine<max-iterations=1;verify-fixpoint>" on function "func_124"
#0 0x0000780f1ca26032 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.21.0git+0x226032)
#1 0x0000780f1ca22f0f llvm::sys::RunSignalHandlers() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.21.0git+0x222f0f)
#2 0x0000780f1ca23054 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
#3 0x0000780f1c445330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330)
#4 0x0000780f13685421 llvm::Instruction::eraseFromParent() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x285421)
#5 0x0000780f14e65666 llvm::InstCombinerImpl::eraseInstFromFunction(llvm::Instruction&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0x5c666)
#6 0x0000780f14e6fc31 llvm::InstCombinerImpl::run() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0x66c31)
#7 0x0000780f14e70f23 combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) InstructionCombining.cpp:0:0
#8 0x0000780f14e71f32 llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.21.0git+0x68f32)
#9 0x0000780f16dac615 llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libPolly.so.21.0git+0x1ac615)
#10 0x0000780f1372a624 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x32a624)
#11 0x0000780f1b6db3c5 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMX86CodeGen.so.21.0git+0xdb3c5)
#12 0x0000780f13728f00 llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x328f00)
#13 0x0000780f1b6dbd85 llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMX86CodeGen.so.21.0git+0xdbd85)
#14 0x0000780f137294f5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.21.0git+0x3294f5)
#15 0x0000780f1cc652e9 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.21.0git+0x2c2e9)
#16 0x0000780f1cc70306 optMain (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.21.0git+0x37306)
#17 0x0000780f1c42a1ca __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#18 0x0000780f1c42a28b call_init ./csu/../csu/libc-start.c:128:20
#19 0x0000780f1c42a28b __libc_start_main ./csu/../csu/libc-start.c:347:5
#20 0x0000567ad14d1095 _start (bin/opt+0x1095)
Segmentation fault (core dumped)
Oops, I should use |
Fixes #137810.
I've already tried implementing this with MemCpyOpt in #138490, which resulted in unacceptable compile-time. Therefore, I implement this with InstCombine.
Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=059b0c2efbf30d986d812c4d2cf6d6c7876569fe&to=bb5edb394bb8983c5d3eacbaa3c3491504dd549b&stat=instructions%3Au.