Skip to content

Commit f5192d7

Browse files
committed
[x86] Propagate memory operands during call frame optimization
Summary: Propagate memory operands when folding load instructions into instructions that directly operate on memory. The original revision has been split. See D80140 for the other part of the changes. Reviewers: craig.topper, rnk, lebedev.ri, efriedma Reviewed By: craig.topper Subscribers: lebedev.ri, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80062
1 parent ce5780b commit f5192d7

File tree

2 files changed

+112
-0
lines changed

2 files changed

+112
-0
lines changed

llvm/lib/Target/X86/X86CallFrameOptimization.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
531531
PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
532532
}
533533
Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
534+
Push->cloneMemRefs(MF, *Store);
534535
break;
535536
case X86::MOV32mr:
536537
case X86::MOV64mr: {
@@ -562,13 +563,15 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
562563
unsigned NumOps = DefMov->getDesc().getNumOperands();
563564
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
564565
Push->addOperand(DefMov->getOperand(i));
566+
Push->cloneMergedMemRefs(MF, {&*DefMov, &*Store});
565567

566568
DefMov->eraseFromParent();
567569
} else {
568570
PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
569571
Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
570572
.addReg(Reg)
571573
.getInstr();
574+
Push->cloneMemRefs(MF, *Store);
572575
}
573576
break;
574577
}
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# RUN: llc -o - -mtriple=x86_64-- -run-pass=x86-cf-opt %s | FileCheck %s
2+
3+
--- |
4+
; ModuleID = 'test.ll'
5+
source_filename = "code_io.c"
6+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
7+
8+
@.str.8 = private unnamed_addr constant [34 x i8] c"%10s%10s%10s%10s%10s%10s%10s%10s\0A\00", align 1
9+
@.str.9 = private unnamed_addr constant [6 x i8] c"nbody\00", align 1
10+
@.str.10 = private unnamed_addr constant [6 x i8] c"dtime\00", align 1
11+
@.str.11 = private unnamed_addr constant [4 x i8] c"eps\00", align 1
12+
@.str.12 = private unnamed_addr constant [4 x i8] c"tol\00", align 1
13+
@.str.13 = private unnamed_addr constant [6 x i8] c"dtout\00", align 1
14+
@.str.14 = private unnamed_addr constant [6 x i8] c"tstop\00", align 1
15+
@.str.15 = private unnamed_addr constant [7 x i8] c"fcells\00", align 1
16+
@.str.16 = private unnamed_addr constant [6 x i8] c"NPROC\00", align 1
17+
18+
define dso_local void @initoutput() local_unnamed_addr {
19+
entry:
20+
%call1 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([34 x i8], [34 x i8]* @.str.8, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.9, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.10, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.12, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.13, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.14, i64 0, i64 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.15, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.16, i64 0, i64 0))
21+
ret void
22+
}
23+
24+
declare dso_local i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr
25+
26+
; Function Attrs: nounwind
27+
declare void @llvm.stackprotector(i8*, i8**) #0
28+
29+
attributes #0 = { nounwind }
30+
31+
...
32+
---
33+
name: initoutput
34+
alignment: 16
35+
exposesReturnsTwice: false
36+
legalized: false
37+
regBankSelected: false
38+
selected: false
39+
failedISel: false
40+
tracksRegLiveness: true
41+
hasWinCFI: false
42+
registers:
43+
- { id: 0, class: gr64, preferred-register: '' }
44+
- { id: 1, class: gr64, preferred-register: '' }
45+
- { id: 2, class: gr64, preferred-register: '' }
46+
- { id: 3, class: gr64, preferred-register: '' }
47+
- { id: 4, class: gr64, preferred-register: '' }
48+
- { id: 5, class: gr64, preferred-register: '' }
49+
- { id: 6, class: gr64, preferred-register: '' }
50+
- { id: 7, class: gr32, preferred-register: '' }
51+
- { id: 8, class: gr8, preferred-register: '' }
52+
- { id: 9, class: gr32, preferred-register: '' }
53+
liveins: []
54+
frameInfo:
55+
isFrameAddressTaken: false
56+
isReturnAddressTaken: false
57+
hasStackMap: false
58+
hasPatchPoint: false
59+
stackSize: 0
60+
offsetAdjustment: 0
61+
maxAlignment: 8
62+
adjustsStack: false
63+
hasCalls: true
64+
stackProtector: ''
65+
maxCallFrameSize: 4294967295
66+
cvBytesOfCalleeSavedRegisters: 0
67+
hasOpaqueSPAdjustment: false
68+
hasVAStart: false
69+
hasMustTailInVarArgFunc: false
70+
localFrameSize: 0
71+
savePoint: ''
72+
restorePoint: ''
73+
fixedStack: []
74+
stack: []
75+
callSites: []
76+
constants: []
77+
machineFunctionInfo: {}
78+
body: |
79+
bb.0.entry:
80+
ADJCALLSTACKDOWN64 24, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
81+
%0:gr64 = COPY $rsp
82+
MOV64mi32 %0, 1, $noreg, 16, $noreg, @.str.16 :: (store 8 into stack + 16)
83+
MOV64mi32 %0, 1, $noreg, 8, $noreg, @.str.15 :: (store 8 into stack + 8)
84+
MOV64mi32 %0, 1, $noreg, 0, $noreg, @.str.14 :: (store 8 into stack)
85+
%1:gr64 = MOV32ri64 @.str.8
86+
%2:gr64 = MOV32ri64 @.str.9
87+
%3:gr64 = MOV32ri64 @.str.10
88+
%4:gr64 = MOV32ri64 @.str.11
89+
%5:gr64 = MOV32ri64 @.str.12
90+
%6:gr64 = MOV32ri64 @.str.13
91+
%7:gr32 = MOV32r0 implicit-def dead $eflags
92+
%8:gr8 = COPY %7.sub_8bit
93+
$rdi = COPY %1
94+
$rsi = COPY %2
95+
$rdx = COPY %3
96+
$rcx = COPY %4
97+
$r8 = COPY %5
98+
$r9 = COPY %6
99+
$al = COPY %8
100+
CALL64pcrel32 @printf, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit $rcx, implicit $r8, implicit $r9, implicit $al, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
101+
ADJCALLSTACKUP64 24, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
102+
RET 0
103+
104+
# Call frame optimization should propagate memory operands
105+
# CHECK: PUSH64i32 @{{.*}} :: (store 8 into stack + 16)
106+
# CHECK: PUSH64i32 @{{.*}} :: (store 8 into stack + 8)
107+
# CHECK: PUSH64i32 @{{.*}} :: (store 8 into stack)
108+
109+
...

0 commit comments

Comments
 (0)