-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[llvm-exegesis] [AArch64] Add support for Load Instructions in subprocess execution mode #144895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3cfcda4
b244126
853e171
b32026b
018a9db
d147a71
ef5de8b
4d0ff8b
b554d5e
aacd37d
d8f93a8
4796720
28c44e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,10 +6,26 @@ | |
// | ||
//===----------------------------------------------------------------------===// | ||
#include "../Target.h" | ||
#include "../Error.h" | ||
#include "../MmapUtils.h" | ||
#include "../SerialSnippetGenerator.h" | ||
#include "../SnippetGenerator.h" | ||
#include "../SubprocessMemory.h" | ||
#include "AArch64.h" | ||
#include "AArch64RegisterInfo.h" | ||
#include "llvm/CodeGen/MachineInstrBuilder.h" | ||
#include "llvm/MC/MCInstBuilder.h" | ||
#include "llvm/MC/MCRegisterInfo.h" | ||
#include <vector> | ||
#define DEBUG_TYPE "exegesis-aarch64-target" | ||
|
||
#if defined(__aarch64__) && defined(__linux__) | ||
#include <sys/mman.h> | ||
#include <sys/syscall.h> | ||
#include <unistd.h> // for getpagesize() | ||
#ifdef HAVE_LIBPFM | ||
#include <perfmon/perf_event.h> | ||
#endif // HAVE_LIBPFM | ||
#include <linux/prctl.h> // For PR_PAC_* constants | ||
#include <sys/prctl.h> | ||
#ifndef PR_PAC_APIAKEY | ||
|
@@ -73,7 +89,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth, | |
// Generates instructions to load an immediate value into an FPCR register. | ||
static std::vector<MCInst> | ||
loadFPCRImmediate(MCRegister Reg, unsigned RegBitWidth, const APInt &Value) { | ||
MCRegister TempReg = AArch64::X8; | ||
MCRegister TempReg = AArch64::X16; | ||
MCInst LoadImm = MCInstBuilder(AArch64::MOVi64imm).addReg(TempReg).addImm(0); | ||
MCInst MoveToFPCR = | ||
MCInstBuilder(AArch64::MSR).addImm(AArch64SysReg::FPCR).addReg(TempReg); | ||
|
@@ -106,6 +122,90 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth, | |
return Instructions; | ||
} | ||
|
||
static void generateRegisterStackPush(unsigned int RegToPush, | ||
std::vector<MCInst> &GeneratedCode, | ||
int imm = -16) { | ||
// STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes | ||
// before the store to maintain 16-bytes alignment. | ||
if (AArch64::GPR64RegClass.contains(RegToPush)) | ||
GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre) | ||
.addReg(AArch64::SP) | ||
.addReg(RegToPush) | ||
.addReg(AArch64::SP) | ||
.addImm(imm)); | ||
else if (AArch64::GPR32RegClass.contains(RegToPush)) | ||
GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre) | ||
.addReg(AArch64::SP) | ||
.addReg(RegToPush) | ||
.addReg(AArch64::SP) | ||
.addImm(imm)); | ||
else | ||
llvm_unreachable("Unsupported register class for stack push"); | ||
} | ||
|
||
static void generateRegisterStackPop(unsigned int RegToPopTo, | ||
std::vector<MCInst> &GeneratedCode, | ||
int imm = 16) { | ||
// LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load. | ||
if (AArch64::GPR64RegClass.contains(RegToPopTo)) | ||
GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost) | ||
.addReg(AArch64::SP) | ||
.addReg(RegToPopTo) | ||
.addReg(AArch64::SP) | ||
.addImm(imm)); | ||
else if (AArch64::GPR32RegClass.contains(RegToPopTo)) | ||
GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost) | ||
.addReg(AArch64::SP) | ||
.addReg(RegToPopTo) | ||
.addReg(AArch64::SP) | ||
.addImm(imm)); | ||
else | ||
llvm_unreachable("Unsupported register class for stack pop"); | ||
} | ||
|
||
void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) { | ||
// AArch64 Linux follows the AAPCS (ARM Architecture Procedure Call Standard): | ||
// - X8 register contains the system call number | ||
// - X0-X5 registers contain the first 6 arguments (if any) | ||
// - SVC #0 instruction triggers the system call | ||
// - Return value is placed in X0 register | ||
GeneratedCode.push_back( | ||
loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber))); | ||
GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0)); | ||
} | ||
|
||
/// Functions to save/restore system call registers | ||
#ifdef __linux__ | ||
constexpr std::array<unsigned, 8> SyscallArgumentRegisters{ | ||
AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, | ||
AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7, | ||
}; | ||
|
||
static void saveSysCallRegisters(std::vector<MCInst> &GeneratedCode, | ||
unsigned ArgumentCount) { | ||
// AArch64 follows the AAPCS (ARM Architecture Procedure Call Standard): | ||
// X0-X7 registers contain the first 8 arguments. | ||
assert(ArgumentCount <= 8 && | ||
"This implementation saves up to 8 argument registers (X0-X7)"); | ||
// Preserve X8 (used for the syscall number/return value). | ||
generateRegisterStackPush(AArch64::X8, GeneratedCode); | ||
// Preserve the registers used to pass arguments to the system call. | ||
for (unsigned I = 0; I < ArgumentCount; ++I) { | ||
generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode); | ||
} | ||
} | ||
|
||
static void restoreSysCallRegisters(std::vector<MCInst> &GeneratedCode, | ||
unsigned ArgumentCount) { | ||
assert(ArgumentCount <= 8 && | ||
"This implementation restores up to 8 argument registers (X0-X7)"); | ||
// Restore registers in reverse order | ||
for (int I = ArgumentCount - 1; I >= 0; --I) { | ||
generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode); | ||
} | ||
generateRegisterStackPop(AArch64::X8, GeneratedCode); | ||
} | ||
#endif // __linux__ | ||
#include "AArch64GenExegesis.inc" | ||
|
||
namespace { | ||
|
@@ -115,7 +215,39 @@ class ExegesisAArch64Target : public ExegesisTarget { | |
ExegesisAArch64Target() | ||
: ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {} | ||
|
||
enum ReservedRegisters { | ||
CodeSize = AArch64::X12, | ||
AuxiliaryMemoryFD = AArch64::X13, | ||
TempRegister = AArch64::X16, | ||
}; | ||
|
||
std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg, | ||
int imm = 0) const override { | ||
std::vector<MCInst> Insts; | ||
if (AArch64::GPR32RegClass.contains(Reg) || | ||
AArch64::GPR64RegClass.contains(Reg)) { | ||
generateRegisterStackPop(Reg, Insts, imm); | ||
return Insts; | ||
} | ||
return {}; | ||
} | ||
|
||
private: | ||
#ifdef __linux__ | ||
std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override; | ||
std::vector<MCInst> | ||
generateMmap(uintptr_t Address, size_t Length, | ||
uintptr_t FileDescriptorAddress) const override; | ||
void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override; | ||
std::vector<MCInst> generateMemoryInitialSetup() const override; | ||
std::vector<MCInst> setStackRegisterToAuxMem() const override; | ||
uintptr_t getAuxiliaryMemoryStartAddress() const override; | ||
std::vector<MCInst> configurePerfCounter(long Request, | ||
bool SaveRegisters) const override; | ||
std::vector<MCRegister> getArgumentRegisters() const override; | ||
std::vector<MCRegister> getRegistersNeedSaving() const override; | ||
#endif // __linux__ | ||
|
||
std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg, | ||
const APInt &Value) const override { | ||
if (AArch64::GPR32RegClass.contains(Reg)) | ||
|
@@ -155,6 +287,155 @@ class ExegesisAArch64Target : public ExegesisTarget { | |
|
||
} // namespace | ||
|
||
#ifdef __linux__ | ||
// true : let use of fixed address to Virtual Address Space Ceiling | ||
// false: let kernel choose the address of the auxiliary memory | ||
bool UseFixedAddress = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the first argument to the mmap syscall isn't it? If the address is 0, i.e. not specified, then MMAP returns an address? This could be clarified. But either way, why do we only use the fixed addresses? And is there any value in having this boolean here? Do we need it, or is it clearer to just get rid of it if we only use the fixed addresses anyway? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Exegesis currently only has support for mappings at a fixed address. Given this flag is always set to true, we should just get rid of it. |
||
|
||
static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not that familiar with the memory map. For folks with the same problem, can you comment and document where this number comes from? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll look at opening up a patch to document this. It essentially comes from https://www.kernel.org/doc/html/v5.8/x86/x86_64/mm.html which ends up being a function of how the MMU is setup. |
||
|
||
static void generateRoundToNearestPage(unsigned int TargetRegister, | ||
std::vector<MCInst> &GeneratedCode) { | ||
int PageSizeShift = static_cast<int>(round(log2(getpagesize()))); | ||
// Round down to the nearest page by getting rid of the least significant bits | ||
// representing location in the page. | ||
|
||
// Single instruction using AND with inverted mask (effectively BIC) | ||
uint64_t BitsToClearMask = (1ULL << PageSizeShift) - 1; // 0xFFF | ||
uint64_t AndMask = ~BitsToClearMask; // ...FFFFFFFFFFFF000 | ||
GeneratedCode.push_back(MCInstBuilder(AArch64::ANDXri) | ||
.addReg(TargetRegister) // Xd | ||
.addReg(TargetRegister) // Xn | ||
.addImm(AndMask) // imm bitmask | ||
); | ||
} | ||
|
||
std::vector<MCInst> | ||
ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const { | ||
std::vector<MCInst> ExitCallCode; | ||
ExitCallCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, ExitCode))); | ||
generateSysCall(SYS_exit, ExitCallCode); // SYS_exit is 93 | ||
return ExitCallCode; | ||
} | ||
|
||
std::vector<MCInst> | ||
ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, | ||
uintptr_t FileDescriptorAddress) const { | ||
// mmap(address, length, prot, flags, fd, offset=0) | ||
int flags = MAP_SHARED; | ||
if (Address != 0) { | ||
flags |= MAP_FIXED_NOREPLACE; | ||
} | ||
std::vector<MCInst> MmapCode; | ||
MmapCode.push_back( | ||
loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr | ||
MmapCode.push_back( | ||
loadImmediate(AArch64::X1, 64, APInt(64, Length))); // length | ||
MmapCode.push_back(loadImmediate(AArch64::X2, 64, | ||
APInt(64, PROT_READ | PROT_WRITE))); // prot | ||
MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags | ||
// FIXME: File descriptor address is not initialized. | ||
// Copy file descriptor location from aux memory into X4 | ||
MmapCode.push_back( | ||
loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd | ||
MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset | ||
generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222 | ||
return MmapCode; | ||
} | ||
|
||
void ExegesisAArch64Target::generateMmapAuxMem( | ||
std::vector<MCInst> &GeneratedCode) const { | ||
int fd = -1; | ||
int flags = MAP_SHARED; | ||
uintptr_t address = getAuxiliaryMemoryStartAddress(); | ||
if (fd == -1) | ||
flags |= MAP_ANONYMOUS; | ||
if (address != 0) | ||
flags |= MAP_FIXED_NOREPLACE; | ||
int prot = PROT_READ | PROT_WRITE; | ||
|
||
GeneratedCode.push_back( | ||
loadImmediate(AArch64::X0, 64, APInt(64, address))); // map adr | ||
GeneratedCode.push_back(loadImmediate( | ||
AArch64::X1, 64, | ||
APInt(64, SubprocessMemory::AuxiliaryMemorySize))); // length | ||
GeneratedCode.push_back( | ||
loadImmediate(AArch64::X2, 64, APInt(64, prot))); // prot | ||
GeneratedCode.push_back( | ||
loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags | ||
GeneratedCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd | ||
GeneratedCode.push_back( | ||
loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset | ||
generateSysCall(SYS_mmap, GeneratedCode); // SYS_mmap is 222 | ||
} | ||
|
||
std::vector<MCInst> ExegesisAArch64Target::generateMemoryInitialSetup() const { | ||
std::vector<MCInst> MemoryInitialSetupCode; | ||
generateMmapAuxMem(MemoryInitialSetupCode); // FIXME: Uninit file descriptor | ||
|
||
// If using fixed address for auxiliary memory skip this step, | ||
// When using dynamic memory allocation (non-fixed address), we must preserve | ||
// the mmap return value (X0) which contains the allocated memory address. | ||
// This value is saved to the stack to ensure registers requiring memory | ||
// access can retrieve the correct address even if X0 is modified by | ||
// intermediate code. | ||
generateRegisterStackPush(AArch64::X0, MemoryInitialSetupCode); | ||
// FIXME: Ensure stack pointer remains stable to prevent loss of saved address | ||
return MemoryInitialSetupCode; | ||
} | ||
|
||
std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const { | ||
std::vector<MCInst> instructions; // NOP | ||
// Motivation unclear, found no need for this in AArch64. | ||
// TODO: Implement this, if required. | ||
dbgs() << "Warning: setStackRegisterToAuxMem called but not required for " | ||
"AArch64\n"; | ||
return instructions; | ||
} | ||
|
||
uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const { | ||
if (!UseFixedAddress) | ||
// Allow kernel to select an appropriate memory address | ||
return 0; | ||
// Return the second to last page in the virtual address space | ||
// to try and prevent interference with memory annotations in the snippet | ||
// VAddressSpaceCeiling = 0x0000800000000000 | ||
// FIXME: Why 2 pages? | ||
return VAddressSpaceCeiling - (2 * getpagesize()); | ||
} | ||
|
||
std::vector<MCInst> | ||
ExegesisAArch64Target::configurePerfCounter(long Request, | ||
bool SaveRegisters) const { | ||
std::vector<MCInst> ConfigurePerfCounterCode; // NOP | ||
// FIXME: SYSCALL exits with EBADF error - file descriptor is invalid | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's unclear to me what the consequences are of this? Does not work at all? Does it mean all measurements are unreliable? Please clarify. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is unclear to me where to get file descriptor's address and using Yes, Currently all measurements are unreliable in subprocess mode for AArch64. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The performance counters do not work at all if these syscalls do not work. This probably needs to be fixed before this lands. |
||
// No file is opened previosly to add as file descriptor | ||
dbgs() << "Warning: configurePerfCounter not implemented, measurements will " | ||
"be unreliable\n"; | ||
return ConfigurePerfCounterCode; | ||
} | ||
|
||
std::vector<MCRegister> ExegesisAArch64Target::getArgumentRegisters() const { | ||
return {AArch64::X0, AArch64::X1}; | ||
} | ||
|
||
std::vector<MCRegister> ExegesisAArch64Target::getRegistersNeedSaving() const { | ||
return { | ||
AArch64::X0, | ||
AArch64::X1, | ||
AArch64::X2, | ||
AArch64::X3, | ||
AArch64::X4, | ||
AArch64::X5, | ||
AArch64::X8, | ||
ReservedRegisters::TempRegister, | ||
ReservedRegisters::CodeSize, | ||
ReservedRegisters::AuxiliaryMemoryFD, | ||
}; | ||
} | ||
|
||
#endif // __linux__ | ||
|
||
static ExegesisTarget *getTheExegesisAArch64Target() { | ||
static ExegesisAArch64Target Target; | ||
return &Target; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,6 +66,8 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, | |
assert(MM.Address % getpagesize() == 0 && | ||
"Memory mappings need to be aligned to page boundaries."); | ||
#endif | ||
// FIXME: file descriptor for aux memory seems not initialized. | ||
// TODO: Invoke openat syscall to get correct fd for aux memory | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would this need |
||
const MemoryValue &MemVal = Key.MemoryValues.at(MM.MemoryValueName); | ||
BBF.addInstructions(ET.generateMmap( | ||
MM.Address, MemVal.SizeBytes, | ||
|
@@ -78,15 +80,47 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, | |
Register StackPointerRegister = BBF.MF.getSubtarget() | ||
.getTargetLowering() | ||
->getStackPointerRegisterToSaveRestore(); | ||
#define DEBUG_TYPE "register-initial-values" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding new debug info here should probably be split out to a separate patch. |
||
// FIXME: Only loading first register with memory address is hacky. | ||
bool isFirstRegister = true; | ||
for (const RegisterValue &RV : Key.RegisterInitialValues) { | ||
// Debug: register name and class name and value from BenchmarkKey | ||
const MCRegisterInfo *RegInfo = BBF.MF.getTarget().getMCRegisterInfo(); | ||
const char *RegName = RegInfo->getName(RV.Register); | ||
const char *regClassName = "Unknown"; | ||
for (unsigned i = 0, e = RegInfo->getNumRegClasses(); i < e; ++i) { | ||
const MCRegisterClass &RC = RegInfo->getRegClass(i); | ||
if (RC.contains(RV.Register)) { | ||
regClassName = RegInfo->getRegClassName(&RC); | ||
break; | ||
} | ||
} | ||
LLVM_DEBUG( | ||
dbgs() << "Setting register (Class: " << regClassName << ") " << RegName | ||
<< std::string( | ||
std::max(0, 3 - static_cast<int>(strlen(RegName))), ' ')); | ||
|
||
if (GenerateMemoryInstructions) { | ||
// If we're generating memory instructions, don't load in the value for | ||
// the register with the stack pointer as it will be used later to finish | ||
// the setup. | ||
if (Register(RV.Register) == StackPointerRegister) | ||
continue; | ||
#if defined(__aarch64__) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think a preprocessor directive here is correct. Exegesis should be able to generate snippets for non-native platforms. |
||
auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this only need to be done on AArch64? |
||
if (!StackLoadInsts.empty() && isFirstRegister) { | ||
for (const auto &Inst : StackLoadInsts) | ||
BBF.addInstruction(Inst); | ||
isFirstRegister = false; | ||
LLVM_DEBUG(dbgs() << "from stack with post-increment offset of " << 16 | ||
<< " bytes\n"); | ||
continue; | ||
} | ||
#endif | ||
} | ||
// Load a constant in the register. | ||
LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n"); | ||
#undef DEBUG_TYPE | ||
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); | ||
if (SetRegisterCode.empty()) | ||
IsSnippetSetupComplete = false; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some comments here about the mechanics of a syscall would be nice, i.e. X8 is the register where the syscall is expected etc, and that the SVC call.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added comments explaining role of X8, X0-X5,
SVC #0
and return conventionThanks!