Skip to content

[EarlyCSE] Add support for writeonly call CSE #145474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/test/CodeGenCXX/auto-var-init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,7 @@ TEST_UNINIT(base, base);
// ZERO-O1-NOT: !annotation

TEST_BRACES(base, base);
// ZERO-LABEL: @test_base_braces()
// CHECK-LABEL: @test_base_braces()
// CHECK: %braces = alloca %struct.base, align [[ALIGN:[0-9]*]]
// CHECK-NEXT: call void @llvm.memset{{.*}}(ptr align [[ALIGN]] %{{.*}}, i8 0, i64 8, i1 false)
Expand Down
18 changes: 14 additions & 4 deletions llvm/lib/Transforms/Scalar/EarlyCSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ struct CallValue {

static bool canHandle(Instruction *Inst) {
CallInst *CI = dyn_cast<CallInst>(Inst);
if (!CI || !CI->onlyReadsMemory() ||
if (!CI || (!CI->onlyReadsMemory() && !CI->onlyWritesMemory()) ||
// FIXME: Currently the calls which may access the thread id may
// be considered as not accessing the memory. But this is
// problematic for coroutines, since coroutines may resume in a
Expand Down Expand Up @@ -1626,14 +1626,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
!(MemInst.isValid() && !MemInst.mayReadFromMemory()))
LastStore = nullptr;

// If this is a read-only call, process it.
if (CallValue::canHandle(&Inst)) {
// If this is a read-only or write-only call, process it. Skip store
// MemInsts, as they will be more precisely handled later on. Also skip
// memsets, as DSE may be able to optimize them better by removing the
// earlier rather than later store.
if (CallValue::canHandle(&Inst) &&
(!MemInst.isValid() || !MemInst.isStore()) && !isa<MemSetInst>(&Inst)) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
if (InVal.first != nullptr &&
isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
&Inst)) {
&Inst) &&
InVal.first->mayReadFromMemory() == Inst.mayReadFromMemory()) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added this to handle weird cases like the readonly_and_writeonly test, though I don't think it's strictly necessary...

LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
<< " to: " << *InVal.first << '\n');
if (!DebugCounter::shouldExecute(CSECounter)) {
Expand All @@ -1651,6 +1656,11 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}

// Increase memory generation for writes. Do this before inserting
// the call, so it has the generation after the write occurred.
if (Inst.mayWriteToMemory())
++CurrentGeneration;

// Otherwise, remember that we have this instruction.
AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
continue;
Expand Down
148 changes: 147 additions & 1 deletion llvm/test/Transforms/EarlyCSE/writeonly.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s
; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s --check-prefixes=CHECK,NO-MSSA
; RUN: opt -S -passes='early-cse<memssa>' < %s | FileCheck %s --check-prefixes=CHECK,MSSA

@var = global i32 undef
declare void @foo() nounwind
Expand All @@ -15,3 +16,148 @@ define void @test() {
store i32 2, ptr @var
ret void
}

declare void @writeonly_void() memory(write)

; Can CSE writeonly calls, including non-nounwind/willreturn.
define void @writeonly_cse() {
; CHECK-LABEL: @writeonly_cse(
; CHECK-NEXT: call void @writeonly_void()
; CHECK-NEXT: ret void
;
call void @writeonly_void()
call void @writeonly_void()
ret void
}

; Can CSE, loads do not matter.
define i32 @writeonly_cse_intervening_load(ptr %p) {
; CHECK-LABEL: @writeonly_cse_intervening_load(
; CHECK-NEXT: call void @writeonly_void()
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
call void @writeonly_void()
%v = load i32, ptr %p
call void @writeonly_void()
ret i32 %v
}

; Cannot CSE, the store may be to the same memory.
define void @writeonly_cse_intervening_store(ptr %p) {
; CHECK-LABEL: @writeonly_cse_intervening_store(
; CHECK-NEXT: call void @writeonly_void()
; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4
; CHECK-NEXT: call void @writeonly_void()
; CHECK-NEXT: ret void
;
call void @writeonly_void()
store i32 0, ptr %p
call void @writeonly_void()
ret void
}

; Can CSE, the store does not alias the writeonly call.
define void @writeonly_cse_intervening_noalias_store(ptr noalias %p) {
; NO-MSSA-LABEL: @writeonly_cse_intervening_noalias_store(
; NO-MSSA-NEXT: call void @writeonly_void()
; NO-MSSA-NEXT: store i32 0, ptr [[P:%.*]], align 4
; NO-MSSA-NEXT: call void @writeonly_void()
; NO-MSSA-NEXT: ret void
;
; MSSA-LABEL: @writeonly_cse_intervening_noalias_store(
; MSSA-NEXT: call void @writeonly_void()
; MSSA-NEXT: store i32 0, ptr [[P:%.*]], align 4
; MSSA-NEXT: ret void
;
call void @writeonly_void()
store i32 0, ptr %p
call void @writeonly_void()
ret void
}

; Cannot CSE loads across writeonly call.
define i32 @load_cse_across_writeonly(ptr %p) {
; CHECK-LABEL: @load_cse_across_writeonly(
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
; CHECK-NEXT: call void @writeonly_void()
; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]]
; CHECK-NEXT: ret i32 [[RES]]
;
%v1 = load i32, ptr %p
call void @writeonly_void()
%v2 = load i32, ptr %p
%res = sub i32 %v1, %v2
ret i32 %res
}

; Can CSE loads across eliminated writeonly call.
define i32 @load_cse_across_csed_writeonly(ptr %p) {
; CHECK-LABEL: @load_cse_across_csed_writeonly(
; CHECK-NEXT: call void @writeonly_void()
; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[P:%.*]], align 4
; CHECK-NEXT: ret i32 0
;
call void @writeonly_void()
%v1 = load i32, ptr %p
call void @writeonly_void()
%v2 = load i32, ptr %p
%res = sub i32 %v1, %v2
ret i32 %res
}

declare i32 @writeonly(ptr %p) memory(write)

; Can CSE writeonly calls with arg and return.
define i32 @writeonly_ret_cse(ptr %p) {
; CHECK-LABEL: @writeonly_ret_cse(
; CHECK-NEXT: [[V2:%.*]] = call i32 @writeonly(ptr [[P:%.*]])
; CHECK-NEXT: ret i32 0
;
%v1 = call i32 @writeonly(ptr %p)
%v2 = call i32 @writeonly(ptr %p)
%res = sub i32 %v1, %v2
ret i32 %res
}

; Cannot CSE writeonly calls with different arguments.
define i32 @writeonly_different_args(ptr %p1, ptr %p2) {
; CHECK-LABEL: @writeonly_different_args(
; CHECK-NEXT: [[V1:%.*]] = call i32 @writeonly(ptr [[P1:%.*]])
; CHECK-NEXT: [[V2:%.*]] = call i32 @writeonly(ptr [[P2:%.*]])
; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]]
; CHECK-NEXT: ret i32 [[RES]]
;
%v1 = call i32 @writeonly(ptr %p1)
%v2 = call i32 @writeonly(ptr %p2)
%res = sub i32 %v1, %v2
ret i32 %res
}

declare void @callee()

; These are weird cases where the same call is both readonly and writeonly
; based on call-site attributes. I believe this implies that both calls are
; actually readnone and safe to CSE, but leave them alone to be conservative.
define void @readonly_and_writeonly() {
; CHECK-LABEL: @readonly_and_writeonly(
; CHECK-NEXT: call void @callee() #[[ATTR2:[0-9]+]]
; CHECK-NEXT: call void @callee() #[[ATTR1]]
; CHECK-NEXT: ret void
;
call void @callee() memory(read)
call void @callee() memory(write)
ret void
}

define void @writeonly_and_readonly() {
; CHECK-LABEL: @writeonly_and_readonly(
; CHECK-NEXT: call void @callee() #[[ATTR1]]
; CHECK-NEXT: call void @callee() #[[ATTR2]]
; CHECK-NEXT: ret void
;
call void @callee() memory(write)
call void @callee() memory(read)
ret void
}