Skip to content

Commit

Permalink
[Transforms][IPO] Add func suffix in ArgumentPromotion and DeadArgume… (
Browse files Browse the repository at this point in the history
llvm#105742)

…ntElimination

ArgumentPromotion and DeadArgumentElimination passes could change
function signatures but the function name remains the same as before the
transformation. This makes it hard for tracing with bpf programs where
user tends to use function signature in the source. See discussion [1]
for details.

This patch added suffix to functions whose signatures are changed. The
suffix lets users know that function signature has changed and they need
to impact the IR or binary to find modified signature before tracing
those functions.

The suffix for ArgumentPromotion is ".argprom" and the suffixes for
DeadArgumentElimination are ".argelim" and ".retelim". The suffix also
gives user hints about what kind of transformation has been done.

With this patch, I built a recent linux kernel with full LTO enabled. I
got 4 functions with only argpromotion like
```
  set_track_update.argelim.argprom
  pmd_trans_huge_lock.argprom
  ...
```
I got 1058 functions with only deadargelim like
```
  process_bit0.argelim
  pci_io_ecs_init.argelim
  ...
```
I got 3 functions with both argpromotion and deadargelim
```
  set_track_update.argelim.argprom
  zero_pud_populate.argelim.argprom
  zero_pmd_populate.argelim.argprom
```

  [1] llvm#104678
  • Loading branch information
yonghong-song authored Sep 19, 2024
1 parent 30cdf1e commit 959448f
Show file tree
Hide file tree
Showing 79 changed files with 342 additions and 263 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,

F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
NF->setName(NF->getName() + ".argprom");

// Loop over all the callers of the function, transforming the call sites to
// pass in the loaded pointers.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,10 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) {
// it again.
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
if (NumArgumentsEliminated)
NF->setName(NF->getName() + ".argelim");
else
NF->setName(NF->getName() + ".retelim");
NF->IsNewDbgInfoFormat = F->IsNewDbgInfoFormat;

// Loop over all the callers of the function, transforming the call sites to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ define internal void @a() alwaysinline {
}

define internal void @b(ptr) noinline {
; CHECK-LABEL: @b(
; CHECK-LABEL: @b.argprom(
; CHECK-NEXT: ret void
;
ret void
}

define internal void @c() noinline {
; CHECK-LABEL: @c(
; CHECK-NEXT: call void @b()
; CHECK-NEXT: call void @b.argprom()
; CHECK-NEXT: ret void
;
call void @b(ptr @a)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/BugPoint/remove_arguments_test.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

declare i32 @test2()

; CHECK: define void @test() {
; CHECK: define void @test.argelim() {
define i32 @test(i32 %A, ptr %B, float %C) {
call i32 @test2()
ret i32 %1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/arg_promotion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ define dso_local void @caller_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
; CHECK-LABEL: define dso_local void @caller_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
; CHECK-NEXT: call fastcc void @callee_4xi32(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: call fastcc void @callee_4xi32.argprom.argprom(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: ret void
;
entry:
call fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst)
call fastcc void @callee_4xi32.argprom(ptr noalias %src, ptr noalias %dst)
ret void
}

define internal fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
; CHECK-LABEL: define internal fastcc void @callee_4xi32(
define internal fastcc void @callee_4xi32.argprom(ptr noalias %src, ptr noalias %dst) #1 {
; CHECK-LABEL: define internal fastcc void @callee_4xi32.argprom.argprom(
; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
; CHECK-NEXT: ret void
Expand All @@ -65,7 +65,7 @@ define dso_local void @caller_i256(ptr noalias %src, ptr noalias %dst) #0 {
; CHECK-LABEL: define dso_local void @caller_i256(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_VAL:%.*]] = load i256, ptr [[SRC:%.*]], align 16
; CHECK-NEXT: call fastcc void @callee_i256(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: call fastcc void @callee_i256.argprom(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -74,7 +74,7 @@ entry:
}

define internal fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) #0 {
; CHECK-LABEL: define internal fastcc void @callee_i256(
; CHECK-LABEL: define internal fastcc void @callee_i256.argprom(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i256 [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -159,7 +159,7 @@ define dso_local void @caller_struct4xi32(ptr noalias %src, ptr noalias %dst) #1
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC]], i64 16
; CHECK-NEXT: [[SRC_VAL1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
; CHECK-NEXT: call fastcc void @callee_struct4xi32(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: call fastcc void @callee_struct4xi32.argprom(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -168,7 +168,7 @@ entry:
}

define internal fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 {
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32(
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32.argprom(
; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_4XI32:%.*]], ptr [[DST]], i64 0, i32 1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/internalize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; ALL: gvar_used
@gvar_used = addrspace(1) global i32 undef, align 4

; OPT: define internal fastcc void @func_used_noinline(
; OPT: define internal fastcc void @func_used_noinline.argelim(
; OPT-NONE: define fastcc void @func_used_noinline(
define fastcc void @func_used_noinline(ptr addrspace(1) %out, i32 %tid) #1 {
entry:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,22 @@ attributes #0 = { noinline optnone }
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: call {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3foov.retelim()
;; The second call to foo allocates cold memory. It should call cloned functions
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3barv()
; IR: call {{.*}} @_Z3foov.memprof.1.retelim()
; IR: define internal {{.*}} @_Z3barv.retelim()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv()
; IR: call {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3bazv()
; IR: define internal {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.retelim()
; IR: call {{.*}} @_Z3barv.retelim()
; IR: define internal {{.*}} @_Z3foov.retelim()
; IR: call {{.*}} @_Z3bazv.retelim()
; IR: define internal {{.*}} @_Z3barv.memprof.1.retelim()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
; IR: call {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3bazv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
; IR: call {{.*}} @_Z3barv.memprof.1.retelim()
; IR: define internal {{.*}} @_Z3foov.memprof.1.retelim()
; IR: call {{.*}} @_Z3bazv.memprof.1.retelim()
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

Expand Down
24 changes: 12 additions & 12 deletions llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,22 @@ attributes #0 = { noinline optnone }
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: call {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3foov.retelim()
;; The second call to foo allocates cold memory. It should call cloned functions
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3barv()
; IR: call {{.*}} @_Z3foov.memprof.1.retelim()
; IR: define internal {{.*}} @_Z3barv.retelim()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv()
; IR: call {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3bazv()
; IR: define internal {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.retelim()
; IR: call {{.*}} @_Z3barv.retelim()
; IR: define internal {{.*}} @_Z3foov.retelim()
; IR: call {{.*}} @_Z3bazv.retelim()
; IR: define internal {{.*}} @_Z3barv.memprof.1.retelim()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
; IR: call {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3bazv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
; IR: call {{.*}} @_Z3barv.memprof.1.retelim()
; IR: define internal {{.*}} @_Z3foov.memprof.1.retelim()
; IR: call {{.*}} @_Z3bazv.memprof.1.retelim()
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

Expand Down
19 changes: 18 additions & 1 deletion llvm/test/ThinLTO/X86/memprof-basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST


;; Try again but with distributed ThinLTO
Expand Down Expand Up @@ -303,6 +303,23 @@ attributes #0 = { noinline optnone }
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

; IRNODIST: define {{.*}} @main
; IRNODIST: call {{.*}} @_Z3foov.retelim()
; IRNODIST: call {{.*}} @_Z3foov.memprof.1.retelim()
; IRNODIST: define internal {{.*}} @_Z3barv.retelim()
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IRNODIST: define internal {{.*}} @_Z3bazv.retelim()
; IRNODIST: call {{.*}} @_Z3barv.retelim()
; IRNODIST: define internal {{.*}} @_Z3foov.retelim()
; IRNODIST: call {{.*}} @_Z3bazv.retelim()
; IRNODIST: define internal {{.*}} @_Z3barv.memprof.1.retelim()
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IRNODIST: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
; IRNODIST: call {{.*}} @_Z3barv.memprof.1.retelim()
; IRNODIST: define internal {{.*}} @_Z3foov.memprof.1.retelim()
; IRNODIST: call {{.*}} @_Z3bazv.memprof.1.retelim()
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }

; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
Expand Down
14 changes: 13 additions & 1 deletion llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST


;; Try again but with distributed ThinLTO
Expand Down Expand Up @@ -247,6 +247,18 @@ attributes #0 = { noinline optnone}
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

; IRNODIST: define internal {{.*}} @_Z1Dv.retelim()
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IRNODIST: define internal {{.*}} @_Z1Fv.retelim()
; IRNODIST: call {{.*}} @_Z1Dv.retelim()
; IRNODIST: define internal {{.*}} @_Z1Bv.retelim()
; IRNODIST: call {{.*}} @_Z1Dv.memprof.1.retelim()
; IRNODIST: define internal {{.*}} @_Z1Ev.retelim()
; IRNODIST: call {{.*}} @_Z1Dv.memprof.1.retelim()
; IRNODIST: define internal {{.*}} @_Z1Dv.memprof.1.retelim()
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }

; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
Expand Down
19 changes: 18 additions & 1 deletion llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST


;; Try again but with distributed ThinLTO
Expand Down Expand Up @@ -283,6 +283,23 @@ attributes #0 = { noinline optnone }
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

; IRNODIST: define internal {{.*}} @_Z1EPPcS0_.argelim(
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IRNODIST: define internal {{.*}} @_Z1BPPcS0_(
; IRNODIST: call {{.*}} @_Z1EPPcS0_.argelim(
; IRNODIST: define internal {{.*}} @_Z1CPPcS0_(
; IRNODIST: call {{.*}} @_Z1EPPcS0_.memprof.3.argelim(
; IRNODIST: define internal {{.*}} @_Z1DPPcS0_(
; IRNODIST: call {{.*}} @_Z1EPPcS0_.memprof.2.argelim(
; IRNODIST: define internal {{.*}} @_Z1EPPcS0_.memprof.2.argelim(
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IRNODIST: define internal {{.*}} @_Z1EPPcS0_.memprof.3.argelim(
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }

; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
Expand Down
15 changes: 14 additions & 1 deletion llvm/test/ThinLTO/X86/memprof-indirectcall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
;; from main allocating cold memory.
; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST


;; Try again but with distributed ThinLTO
Expand Down Expand Up @@ -419,6 +419,19 @@ attributes #0 = { noinline optnone }
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

; IRNODIST: define {{.*}} @main(
; IRNODIST: call {{.*}} @_Z3foov.argelim()
; IRNODIST: call {{.*}} @_Z3foov.memprof.1.argelim()
; IRNODIST: call {{.*}} @_Z3barP1A.argelim(
; IRNODIST: call {{.*}} @_Z3barP1A.argelim(
; IRNODIST: call {{.*}} @_Z3barP1A.argelim(
; IRNODIST: call {{.*}} @_Z3barP1A.argelim(
; IRNODIST: define internal {{.*}} @_Z3foov.argelim()
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IRNODIST: define internal {{.*}} @_Z3foov.memprof.1.argelim()
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }

; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
Expand Down
15 changes: 14 additions & 1 deletion llvm/test/ThinLTO/X86/memprof-inlined.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
;; cold memory.
; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST


;; Try again but with distributed ThinLTO
Expand Down Expand Up @@ -323,6 +323,19 @@ attributes #0 = { noinline optnone }
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

; IRNODIST: define internal {{.*}} @_Z3barv.retelim()
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IRNODIST: define internal {{.*}} @_Z3foov.retelim()
; IRNODIST: call {{.*}} @_Z3barv.retelim()
; IRNODIST: define {{.*}} @main()
; IRNODIST: call {{.*}} @_Z3foov.retelim()
; IRNODIST: call {{.*}} @_Z3foov.memprof.1.retelim()
; IRNODIST: define internal {{.*}} @_Z3barv.memprof.1.retelim()
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IRNODIST: define internal {{.*}} @_Z3foov.memprof.1.retelim()
; IRNODIST: call {{.*}} @_Z3barv.memprof.1.retelim()
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }

; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: cat %t | FileCheck -check-prefix=REMARK %s

define internal i32 @deref(ptr %x) nounwind {
; CHECK-LABEL: define {{[^@]+}}@deref
; CHECK-LABEL: define {{[^@]+}}@deref.argprom
; CHECK-SAME: (i32 [[X_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i32 [[X_0_VAL]]
Expand All @@ -29,7 +29,7 @@ define i32 @f(i32 %x) {
; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
; CHECK-NEXT: [[X_ADDR_VAL:%.*]] = load i32, ptr [[X_ADDR]], align 4
; CHECK-NEXT: [[TEMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
; CHECK-NEXT: [[TEMP1:%.*]] = call i32 @deref.argprom(i32 [[X_ADDR_VAL]])
; CHECK-NEXT: ret i32 [[TEMP1]]
;
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,4 @@ entry:
; Without number-of-argument constraint, argpromotion will create a function signature with 5 arguments, which equals
; the maximum number of argument permitted by bpf backend, so argpromotion result code does work.
;
; CHECK: i32 @foo2(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val)
; CHECK: i32 @foo2.argprom(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val)
4 changes: 2 additions & 2 deletions llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ bb:
}

define internal fastcc void @promote_avx2(ptr %arg, ptr readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@promote_avx2
; CHECK-LABEL: define {{[^@]+}}@promote_avx2.argprom
; CHECK-SAME: (ptr [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]])
; CHECK-NEXT: bb:
; CHECK-NEXT: store <4 x i64> [[ARG1_VAL]], ptr [[ARG]]
Expand All @@ -62,7 +62,7 @@ define void @promote(ptr %arg) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
; CHECK-NEXT: [[TMP_VAL:%.*]] = load <4 x i64>, ptr [[TMP]]
; CHECK-NEXT: call fastcc void @promote_avx2(ptr [[TMP2]], <4 x i64> [[TMP_VAL]])
; CHECK-NEXT: call fastcc void @promote_avx2.argprom(ptr [[TMP2]], <4 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
; CHECK-NEXT: ret void
Expand Down
Loading

0 comments on commit 959448f

Please sign in to comment.