Skip to content

[memcpyopt] handle memcpy from memset in more cases #140954

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 67 additions & 36 deletions llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1367,8 +1367,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
return true;
}

/// Determine whether the instruction has undefined content for the given Size,
/// either because it was freshly alloca'd or started its lifetime.
/// Determine whether the pointer V had only undefined content from Def up to
/// the given Size, either because it was freshly alloca'd or started its
/// lifetime.
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
MemoryDef *Def, Value *Size) {
if (MSSA->isLiveOnEntryDef(Def))
Expand Down Expand Up @@ -1403,6 +1404,24 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
return false;
}

static bool coversInputFully(MemorySSA *MSSA, MemCpyInst *MemCpy,
MemIntrinsic *MemSrc, BatchAAResults &BAA) {
// If the memcpy is larger than the previous, but the memory was undef prior
// to that, we can just ignore the tail. Technically we're only
// interested in the bytes from 0..MemSrcOffset and
// MemSrcLength+MemSrcOffset..CopySize here, but as we can't easily represent
// this location, we use the full 0..CopySize range.
Value *CopySize = MemCpy->getLength();
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
return true;
return false;
}

/// Transform memcpy to memset when its source was just memset.
/// In other words, turn:
/// \code
Expand All @@ -1418,51 +1437,63 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
MemSetInst *MemSet,
BatchAAResults &BAA) {
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
// memcpying from the same address. Otherwise it is hard to reason about.
if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
return false;

Value *MemSetSize = MemSet->getLength();
Value *CopySize = MemCpy->getLength();

if (MemSetSize != CopySize) {
// Make sure the memcpy doesn't read any more than what the memset wrote.
// Don't worry about sizes larger than i64.

// A known memset size is required.
auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
if (!CMemSetSize)
int64_t MOffset = 0;
const DataLayout &DL = MemCpy->getModule()->getDataLayout();
// We can only transforms memcpy's where the dest of one is the source of the
// other, or they have a known offset.
if (MemCpy->getSource() != MemSet->getDest()) {
std::optional<int64_t> Offset =
MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
if (!Offset)
return false;
MOffset = *Offset;
}

// A known memcpy size is also required.
MaybeAlign MDestAlign = MemCpy->getDestAlign();
int64_t MOffsetAligned = MDestAlign.valueOrOne().value() > 1 && MOffset < 0 ? -(-MOffset & ~(MDestAlign.valueOrOne().value() - 1)) : MOffset; // Compute the MOffset that keeps MDest aligned (truncate towards zero)
if (MOffset != 0 || MemSetSize != CopySize) {
// Make sure the memcpy doesn't read any more than what the memset wrote, other than undef.
auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
if (!CCopySize)
return false;
if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
// If the memcpy is larger than the memset, but the memory was undef prior
// to the memset, we can just ignore the tail. Technically we're only
// interested in the bytes from MemSetSize..CopySize here, but as we can't
// easily represent this location, we use the full 0..CopySize range.
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
bool CanReduceSize = false;
MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
CanReduceSize = true;

if (!CanReduceSize)
// Don't worry about sizes larger than i64.
if (!CMemSetSize || !CCopySize || MOffset < 0 ||
CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
if (!coversInputFully(MSSA, MemCpy, MemSet, BAA))
return false;
CopySize = MemSetSize;

if (CMemSetSize && CCopySize) {
// If both have constant sizes and offsets, clip the memcpy to the bounds of the memset if applicable.
if (CCopySize->getZExtValue() + std::abs(MOffset) > CMemSetSize->getZExtValue()) {
if (MOffsetAligned == 0 || (MOffset < 0 && CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()))
CopySize = MemSetSize;
else
CopySize = ConstantInt::get(CopySize->getType(), std::max((int64_t)0, (int64_t)(CMemSetSize->getZExtValue() - std::abs(MOffsetAligned))));
}
else if (MOffsetAligned < 0) {
// Even if CMemSetSize isn't known, if the MOffsetAligned is negative, make sure to clip the new memset
CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
}
}
else if (CCopySize && MOffsetAligned < 0) {
// Even if CMemSetSize isn't known, if the MOffsetAligned is negative, can still clip the new memset
CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
}
else {
MOffsetAligned = 0;
}
}
}

IRBuilder<> Builder(MemCpy);
Value *MDest = MemCpy->getRawDest();
if (MOffsetAligned < 0)
MDest = Builder.CreateInBoundsPtrAdd(MDest, Builder.getInt64(-MOffsetAligned));
Instruction *NewM =
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
CopySize, MemCpy->getDestAlign());
Builder.CreateMemSet(MDest, MemSet->getOperand(1),
CopySize, MDestAlign);
auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
Expand Down Expand Up @@ -1683,7 +1714,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
}

LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
LLVM_DEBUG(dbgs() << "Stack Move: Performed stack-move optimization\n");
NumStackMove++;
return true;
}
Expand Down
6 changes: 5 additions & 1 deletion llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ define void @test() {
; CHECK-LABEL: define void @test() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AGG_TMP_SROA_14:%.*]] = alloca [20 x i8], align 4
; CHECK-NEXT: [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
; CHECK-NEXT: [[AGG_TMP_SROA_15:%.*]] = alloca [20 x i8], align 4
; CHECK-NEXT: [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_15]], i64 4
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[AGG_TMP_SROA_14_128_SROA_IDX]], i8 0, i64 1, i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[AGG_TMP_SROA_14]])
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[AGG_TMP_SROA_14]], i64 4
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[TMP0]], i8 0, i64 1, i1 false)
; CHECK-NEXT: [[AGG_TMP3_SROA_35_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr inttoptr (i64 4 to ptr), i8 0, i64 1, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr null, i8 0, i64 1, i1 false)
Expand Down
48 changes: 48 additions & 0 deletions llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,54 @@ define void @test_write_before_memset_in_both_regions(ptr %result) {
ret void
}

define void @test_offset_memset(ptr %result) {
; CHECK-LABEL: @test_offset_memset(
; CHECK-NEXT: [[A1:%.*]] = alloca [4 x i32], align 8
; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[RESULT:%.*]], i64 4
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[TMP1]], i8 0, i64 8, i1 false)
; CHECK-NEXT: ret void
;
%a = alloca [ 4 x i32 ], align 8
%b = getelementptr i32, ptr %a, i32 1
call void @llvm.memset.p0.i64(ptr align 8 %b, i8 0, i64 12, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 12, i1 false)
ret void
}

define void @test_offset_memsetcpy(ptr %result) {
; CHECK-LABEL: @test_offset_memsetcpy(
; CHECK-NEXT: [[A1:%.*]] = alloca [4 x i32], align 8
; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A1]], i8 0, i64 12, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 8, i1 false)
; CHECK-NEXT: ret void
;
%a = alloca [ 4 x i32 ], align 8
%b = getelementptr i32, ptr %a, i32 1
call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %b, i64 12, i1 false)
ret void
}

define void @test_two_memset(ptr %result) {
; CHECK-LABEL: @test_two_memset(
; CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 8
; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 3
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[B]], i8 1, i64 4, i1 false)
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
; CHECK-NEXT: ret void
;
%a = alloca [ 4 x i32 ], align 8
%b = getelementptr i32, ptr %a, i32 3
call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
call void @llvm.memset.p0.i64(ptr align 8 %b, i8 1, i64 4, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
ret void
}

declare ptr @malloc(i64)
declare void @free(ptr)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ define void @test_different_source_gep(ptr %dst1, ptr %dst2, i8 %c) {
; CHECK-LABEL: @test_different_source_gep(
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false)
; CHECK-NEXT: [[P:%.*]] = getelementptr i8, ptr [[DST1]], i64 64
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST2:%.*]], ptr [[P]], i64 64, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST2:%.*]], i8 [[C]], i64 64, i1 false)
; CHECK-NEXT: ret void
;
call void @llvm.memset.p0.i64(ptr %dst1, i8 %c, i64 128, i1 false)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ define i32 @foo(i1 %z) {
; CHECK: for.body3.lr.ph:
; CHECK-NEXT: br label [[FOR_INC7_1]]
; CHECK: for.inc7.1:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[SCEVGEP]], i64 4, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 4, i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: ret i32 [[TMP2]]
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ define void @test(ptr %src, i8 %c, i64 %size) {
ret void
}

; Differing sizes, so left as it is.
; Differing sizes, but would be UB if size1 > size2
define void @negative_test(ptr %src, i8 %c, i64 %size1, i64 %size2) {
; CHECK-LABEL: @negative_test(
; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
; CHECK-NEXT: [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false)
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[DST1]], i64 [[SIZE2]], i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST2]], i8 [[C]], i64 [[SIZE2]], i1 false)
; CHECK-NEXT: ret void
;
%dst1 = alloca i8, i64 %size1
Expand Down
Loading