-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[SystemZ] Add proper mcount handling #135767
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-systemz Author: Dominik Steenken (dominik-steenken) ChangesWhen compiling with On SystemZ, these calls require special handling:
This commit adds some special handling to the EntryExitInstrumenterPass that keeps the insertion of the mcount function into the module, but skips over insertion of the actual call in order to perform this insertion in the Noe that the desired change in the This PR should address issue #121137 . The test inserted here is derived from the example given in that issue. Full diff: https://github.com/llvm/llvm-project/pull/135767.diff 3 Files Affected:
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 9561ea544b270..0936690dafdcd 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "SystemZFrameLowering.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZCallingConv.h"
#include "SystemZInstrInfo.h"
#include "SystemZMachineFunctionInfo.h"
@@ -18,6 +19,8 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -558,6 +561,31 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
+ // Add mcount instrumentation if necessary.
+ if (MF.getFunction().getFnAttribute("instrument-function-entry-inlined").getValueAsString() == "mcount") {
+
+ // Store return address 8 bytes above stack pointer.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R14D)
+ .addReg(SystemZ::R15D)
+ .addImm(8)
+ .addReg(0);
+
+ // Call mcount (Regmask 0 to ensure this will not be moved by the
+ // scheduler.).
+ const uint32_t Mask = 0;
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::CallBRASL))
+ .addGlobalAddress(MF.getFunction().getParent()->getFunction("mcount"))
+ .addRegMask(&Mask);
+
+ // Reload return address drom 8 bytes above stack pointer.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LG))
+ .addReg(SystemZ::R14D)
+ .addReg(SystemZ::R15D)
+ .addImm(8)
+ .addReg(0);
+ }
+
// The current offset of the stack pointer from the CFA.
int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP;
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index d47f1b4253b54..72e4f8791a91a 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -22,7 +22,7 @@
using namespace llvm;
-static void insertCall(Function &CurFn, StringRef Func,
+static bool insertCall(Function &CurFn, StringRef Func,
BasicBlock::iterator InsertionPt, DebugLoc DL) {
Module &M = *InsertionPt->getParent()->getParent()->getParent();
LLVMContext &C = InsertionPt->getParent()->getContext();
@@ -63,12 +63,16 @@ static void insertCall(Function &CurFn, StringRef Func,
false));
CallInst *Call = CallInst::Create(Fn, RetAddr, "", InsertionPt);
Call->setDebugLoc(DL);
+ } else if (TargetTriple.isSystemZ()) {
+ M.getOrInsertFunction(Func, Type::getVoidTy(C));
+ // skip insertion for `mcount` on SystemZ. This will be handled later in `emitPrologue`.
+ return false;
} else {
FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
Call->setDebugLoc(DL);
}
- return;
+ return true;
}
if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
@@ -87,7 +91,7 @@ static void insertCall(Function &CurFn, StringRef Func,
CallInst *Call =
CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
Call->setDebugLoc(DL);
- return;
+ return true;
}
// We only know how to call a fixed set of instrumentation functions, because
@@ -129,9 +133,9 @@ static bool runOnFunction(Function &F, bool PostInlining) {
if (auto SP = F.getSubprogram())
DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
- insertCall(F, EntryFunc, F.begin()->getFirstInsertionPt(), DL);
- Changed = true;
- F.removeFnAttr(EntryAttr);
+ Changed = insertCall(F, EntryFunc, F.begin()->getFirstInsertionPt(), DL);
+ if (Changed)
+ F.removeFnAttr(EntryAttr);
}
if (!ExitFunc.empty()) {
diff --git a/llvm/test/CodeGen/SystemZ/mcount.ll b/llvm/test/CodeGen/SystemZ/mcount.ll
new file mode 100644
index 0000000000000..01bd34548f125
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/mcount.ll
@@ -0,0 +1,35 @@
+; Test proper insertion of mcount instrumentation
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -o - | FileCheck %s
+;
+; CHECK: # %bb.0:
+; CHECK-NEXT: stg %r14, 8(%r15)
+; CHECK-NEXT: brasl %r14, mcount@PLT
+; CHECK-NEXT: lg %r14, 8(%r15)
+define dso_local signext i32 @fib(i32 noundef signext %n) #0 {
+entry:
+ %n.addr = alloca i32, align 4
+ store i32 %n, ptr %n.addr, align 4
+ %0 = load i32, ptr %n.addr, align 4
+ %cmp = icmp sle i32 %0, 1
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true: ; preds = %entry
+ br label %cond.end
+
+cond.false: ; preds = %entry
+ %1 = load i32, ptr %n.addr, align 4
+ %sub = sub nsw i32 %1, 1
+ %call = call signext i32 @fib(i32 noundef signext %sub)
+ %2 = load i32, ptr %n.addr, align 4
+ %sub1 = sub nsw i32 %2, 2
+ %call2 = call signext i32 @fib(i32 noundef signext %sub1)
+ %add = add nsw i32 %call, %call2
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ 1, %cond.true ], [ %add, %cond.false ]
+ ret i32 %cond
+}
+
+attributes #0 = { "instrument-function-entry-inlined"="mcount" }
|
@llvm/pr-subscribers-llvm-transforms Author: Dominik Steenken (dominik-steenken) ChangesWhen compiling with On SystemZ, these calls require special handling:
This commit adds some special handling to the EntryExitInstrumenterPass that keeps the insertion of the mcount function into the module, but skips over insertion of the actual call in order to perform this insertion in the Noe that the desired change in the This PR should address issue #121137 . The test inserted here is derived from the example given in that issue. Full diff: https://github.com/llvm/llvm-project/pull/135767.diff 3 Files Affected:
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 9561ea544b270..0936690dafdcd 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "SystemZFrameLowering.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZCallingConv.h"
#include "SystemZInstrInfo.h"
#include "SystemZMachineFunctionInfo.h"
@@ -18,6 +19,8 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -558,6 +561,31 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
+ // Add mcount instrumentation if necessary.
+ if (MF.getFunction().getFnAttribute("instrument-function-entry-inlined").getValueAsString() == "mcount") {
+
+ // Store return address 8 bytes above stack pointer.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R14D)
+ .addReg(SystemZ::R15D)
+ .addImm(8)
+ .addReg(0);
+
+ // Call mcount (Regmask 0 to ensure this will not be moved by the
+ // scheduler.).
+ const uint32_t Mask = 0;
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::CallBRASL))
+ .addGlobalAddress(MF.getFunction().getParent()->getFunction("mcount"))
+ .addRegMask(&Mask);
+
+ // Reload return address drom 8 bytes above stack pointer.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LG))
+ .addReg(SystemZ::R14D)
+ .addReg(SystemZ::R15D)
+ .addImm(8)
+ .addReg(0);
+ }
+
// The current offset of the stack pointer from the CFA.
int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP;
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index d47f1b4253b54..72e4f8791a91a 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -22,7 +22,7 @@
using namespace llvm;
-static void insertCall(Function &CurFn, StringRef Func,
+static bool insertCall(Function &CurFn, StringRef Func,
BasicBlock::iterator InsertionPt, DebugLoc DL) {
Module &M = *InsertionPt->getParent()->getParent()->getParent();
LLVMContext &C = InsertionPt->getParent()->getContext();
@@ -63,12 +63,16 @@ static void insertCall(Function &CurFn, StringRef Func,
false));
CallInst *Call = CallInst::Create(Fn, RetAddr, "", InsertionPt);
Call->setDebugLoc(DL);
+ } else if (TargetTriple.isSystemZ()) {
+ M.getOrInsertFunction(Func, Type::getVoidTy(C));
+ // skip insertion for `mcount` on SystemZ. This will be handled later in `emitPrologue`.
+ return false;
} else {
FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
Call->setDebugLoc(DL);
}
- return;
+ return true;
}
if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
@@ -87,7 +91,7 @@ static void insertCall(Function &CurFn, StringRef Func,
CallInst *Call =
CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
Call->setDebugLoc(DL);
- return;
+ return true;
}
// We only know how to call a fixed set of instrumentation functions, because
@@ -129,9 +133,9 @@ static bool runOnFunction(Function &F, bool PostInlining) {
if (auto SP = F.getSubprogram())
DL = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
- insertCall(F, EntryFunc, F.begin()->getFirstInsertionPt(), DL);
- Changed = true;
- F.removeFnAttr(EntryAttr);
+ Changed = insertCall(F, EntryFunc, F.begin()->getFirstInsertionPt(), DL);
+ if (Changed)
+ F.removeFnAttr(EntryAttr);
}
if (!ExitFunc.empty()) {
diff --git a/llvm/test/CodeGen/SystemZ/mcount.ll b/llvm/test/CodeGen/SystemZ/mcount.ll
new file mode 100644
index 0000000000000..01bd34548f125
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/mcount.ll
@@ -0,0 +1,35 @@
+; Test proper insertion of mcount instrumentation
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -o - | FileCheck %s
+;
+; CHECK: # %bb.0:
+; CHECK-NEXT: stg %r14, 8(%r15)
+; CHECK-NEXT: brasl %r14, mcount@PLT
+; CHECK-NEXT: lg %r14, 8(%r15)
+define dso_local signext i32 @fib(i32 noundef signext %n) #0 {
+entry:
+ %n.addr = alloca i32, align 4
+ store i32 %n, ptr %n.addr, align 4
+ %0 = load i32, ptr %n.addr, align 4
+ %cmp = icmp sle i32 %0, 1
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true: ; preds = %entry
+ br label %cond.end
+
+cond.false: ; preds = %entry
+ %1 = load i32, ptr %n.addr, align 4
+ %sub = sub nsw i32 %1, 1
+ %call = call signext i32 @fib(i32 noundef signext %sub)
+ %2 = load i32, ptr %n.addr, align 4
+ %sub1 = sub nsw i32 %2, 2
+ %call2 = call signext i32 @fib(i32 noundef signext %sub1)
+ %add = add nsw i32 %call, %call2
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ 1, %cond.true ], [ %add, %cond.false ]
+ ret i32 %cond
+}
+
+attributes #0 = { "instrument-function-entry-inlined"="mcount" }
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
296fa31
to
dd52808
Compare
The "Linux Premerge Checks" fail seems procedural rather than related to the PR:
How can i request a rebuild? |
c246c4e
to
d715e01
Compare
@uweigand This is ready for review now. |
When compiling with `-pg`, the `EntryExitInstrumenterPass` will insert calls to the glibc function `mcount` at the begining of each `MachineFunction`. On SystemZ, these calls require special handling: - The call to `mcount` needs to happen at the beginning of the prologue. - Prior to the call to `mcount`, register `%r14`, the return address of the callee function, must be stored 8 bytes above the stack pointer `%r15`. After the call to `mcount` returns, that register needs to be restored. This commit adds some special handling to the EntryExitInstrumenterPass that keeps the insertion of the mcount function into the module, but skips over insertion of the actual call in order to perform this insertion in the `emitPrologue` function. There, a simple sequence of store/call/load is inserted, which implements the above.
d715e01
to
955a04b
Compare
@@ -558,6 +559,32 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF, | |||
// to determine the end of the prologue. | |||
DebugLoc DL; | |||
|
|||
// Add mcount instrumentation if necessary. | |||
if (MF.getFunction().getFnAttribute("systemz-backend").getValueAsString() == | |||
"insert-mcount") { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer a more specific attribute name, like "systemz-instrument-function-entry", where the value is the actual mcount function name - similar to the common attribute.
.addReg(0); | ||
|
||
// Call mcount (Regmask 0 to ensure this will not be moved by the | ||
// scheduler.). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this regmask 0 really necessary? The memory access patterns should already prevent moving this call. In general, it would be preferable to model the special mcount ABI correctly (i.e. mcount preserves more registers than the normal ABI, not less).
@@ -63,6 +63,12 @@ static void insertCall(Function &CurFn, StringRef Func, | |||
false)); | |||
CallInst *Call = CallInst::Create(Fn, RetAddr, "", InsertionPt); | |||
Call->setDebugLoc(DL); | |||
} else if (TargetTriple.isSystemZ()) { | |||
M.getOrInsertFunction(Func, Type::getVoidTy(C)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to do this here, or can this be delayed until where the function is used in the backend? Looks a bit strange to have this call without any use of the return value ...
When compiling with
-pg
, theEntryExitInstrumenterPass
will insert calls to the glibc functionmcount
at the begining of eachMachineFunction
.On SystemZ, these calls require special handling:
mcount
needs to happen at the beginning of the prologue.mcount
, register%r14
, the return address of the callee function, must be stored 8 bytes above the stack pointer%r15
. After the call tomcount
returns, that register needs to be restored.This commit adds some special handling to the EntryExitInstrumenterPass that keeps the insertion of the mcount function into the module, but skips over insertion of the actual call in order to perform this insertion in the
emitPrologue
function. There, a simple sequence of store/call/load is inserted, which implements the above.The desired change in the
EntryExitInstrumenterPass
necessitated the addition of a new attribute and attribute kind to each function, which is used to trigger the postprocessing, aka call insertion, inemitPrologue
. Note that the new attribute must be of a different kind than themcount
atribute, since otherwise it would replace that attribute and later be deleted by the code that intended to deletemcount
. The new attribnute is calledinsert-mcount
, while the attribute kind issystemz-backend
, to clearly mark it as a SystemZ-specific backend concern.This PR should address issue #121137 . The test inserted here is derived from the example given in that issue.