Skip to content

Commit 3143a77

Browse files
authored
Update frametable printer to handle alloc calls + skip musttail calls for statepoints (#23)
1 parent 618927c commit 3143a77

File tree

3 files changed

+106
-13
lines changed

3 files changed

+106
-13
lines changed

llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp

Lines changed: 97 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ static std::string camlGlobalSymName(const Module &M, const char *Id) {
6767
}
6868
}
6969

70-
report_fatal_error("Module name not provided for OxCaml GC!");
70+
report_fatal_error("[OxCamlGCPrinter] module name not provided");
7171
}
7272

7373
static void emitCamlGlobal(const Module &M, MCStreamer &OS, const char *Id) {
@@ -135,11 +135,35 @@ static unsigned mapLLVMDwarfRegToOxCamlIndex(unsigned DwarfRegNum) {
135135
} else if (XMMBeginDwarf <= DwarfRegNum && DwarfRegNum <= XMMEndDwarf) {
136136
return DwarfRegNum - XMMBeginDwarf + XMMBeginOxCaml;
137137
} else {
138-
report_fatal_error("Unrecognised DWARF register for use in OxCaml frametable: "
138+
report_fatal_error("[OxCamlGCPrinter] unrecognised DWARF register: "
139139
+ Twine(DwarfRegNum));
140140
}
141141
}
142142

143+
// note that although `StackMaps` keeps `ID` as a 64-bit integer, anything
144+
// above 32 bits gets truncated, so we can't use them.
145+
146+
static uint64_t stackOffsetOfID(uint64_t ID) {
147+
return ID & ((1ull << 16) - 1) & ~(1ull);
148+
}
149+
150+
static uint64_t allocSizeOfID(uint64_t ID) {
151+
return ID >> 16;
152+
}
153+
154+
static bool IDHasAlloc(uint64_t ID) {
155+
return ID & 1ull;
156+
}
157+
158+
// Every 8-bit entry emitted in the frametable is offset by 2 (since that is the
159+
// min allocation size). So, every slot can represent allocations of size [2, 257]
160+
static uint8_t encodeAllocSize(uint64_t AllocSize) {
161+
return AllocSize - 2;
162+
}
163+
164+
static const int AllocMask = 2;
165+
static const int FrameSizeReservedMask = 3; // Debug + Alloc
166+
143167
bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter &AP) {
144168
MCStreamer &OS = *AP.OutStreamer;
145169
unsigned PtrSize = M.getDataLayout().getPointerSize(); // Can only be 8 for now
@@ -173,12 +197,33 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter
173197

174198
// frame_data
175199
uint64_t FrameSize = CSI.CSFunctionInfo.StaticStackSize;
176-
if (CSI.ID != StatepointDirectives::DefaultStatepointID)
177-
FrameSize += CSI.ID; // Stack offset from OxCaml
178200
FrameSize += PtrSize; // Return address
179201

202+
// The LLVM IR emitted from OxCaml will always set the statepoint ID for
203+
// calls to be wrapped in a statepoint. Also, note that DefaultStatepointID
204+
// (= 0xABCDEF00 as of now) does not clash with the encoding we use since
205+
// anything that sets the upper 16 bits will also set the bottom bit.
206+
if (CSI.ID != StatepointDirectives::DefaultStatepointID) {
207+
// Stack offset from OxCaml (in case LLVM says we have dynamic objects)
208+
// This will get set to UINT64_MAX in `StackMaps.recordStackMapOpers` if
209+
// that is the case.
210+
if (CSI.CSFunctionInfo.FrameSize != UINT64_MAX) {
211+
FrameSize += stackOffsetOfID(CSI.ID);
212+
}
213+
214+
if (FrameSize & FrameSizeReservedMask) {
215+
report_fatal_error("[OxCamlGCPrinter] frame size has bottom bits set: "
216+
+ Twine(FrameSize));
217+
}
218+
219+
// Alloc bit
220+
if (IDHasAlloc(CSI.ID)) {
221+
FrameSize |= AllocMask;
222+
}
223+
}
224+
180225
if (FrameSize >= 1 << 16)
181-
report_fatal_error("Long frames not supported for OxCaml GC: FrameSize = "
226+
report_fatal_error("[OxCamlGCPrinter] frame size requires long frames: "
182227
+ Twine(FrameSize));
183228
OS.emitInt16(FrameSize);
184229

@@ -195,7 +240,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter
195240

196241
if (LiveCount >= 1 << 16) {
197242
// Very rude!
198-
report_fatal_error("Long frames not supported for OxCaml GC: LiveCount = "
243+
report_fatal_error("[OxCamlGCPrinter] live count requires long frames: "
199244
+ Twine(LiveCount));
200245
}
201246
OS.emitInt16(LiveCount);
@@ -223,7 +268,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter
223268

224269
if (Offset < -(1 << 15) || Offset >= (1 << 15)) {
225270
// Very rude!
226-
report_fatal_error("Stack offset too large for OxCaml frametable: "
271+
report_fatal_error("[OxCamlGCPrinter] stack offset too large: "
227272
+ Twine(Offset));
228273
}
229274
OS.emitInt16(static_cast<uint16_t>(Offset));
@@ -238,6 +283,51 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter
238283
OS.emitInt16(EncodedReg);
239284
}
240285

286+
if (IDHasAlloc(CSI.ID)) {
287+
int AllocSize = allocSizeOfID(CSI.ID);
288+
289+
if (AllocSize < 2) {
290+
report_fatal_error("[OxCamlGCPrinter] alloc size must at least be two!");
291+
}
292+
293+
// Allocations can theoretically go up to 255 * 257 = 65535 words,
294+
// but in practice comballoc never gives us allocations that exceed 255,
295+
// so this handling isn't necessarily needed, but it's here just in case.
296+
297+
int MaxAllocSize = 257;
298+
299+
if (AllocSize % MaxAllocSize == 0) {
300+
size_t NumAlloc = AllocSize / MaxAllocSize;
301+
302+
OS.emitInt8(NumAlloc);
303+
for (size_t i = 0; i < NumAlloc; ++i) {
304+
OS.emitInt8(encodeAllocSize(MaxAllocSize));
305+
}
306+
} else if (AllocSize % MaxAllocSize == 1) {
307+
// This is special since we cannot have allocations of size 1...
308+
309+
// Guaranteed to be nonnegative
310+
size_t NumMaxAlloc = AllocSize / MaxAllocSize - 1;
311+
312+
OS.emitInt8(NumMaxAlloc + 2);
313+
for (size_t i = 0; i < NumMaxAlloc; ++i) {
314+
OS.emitInt8(encodeAllocSize(MaxAllocSize));
315+
}
316+
317+
OS.emitInt8(encodeAllocSize(MaxAllocSize - 1));
318+
OS.emitInt8(encodeAllocSize(2));
319+
} else {
320+
size_t NumMaxAlloc = AllocSize / MaxAllocSize;
321+
322+
OS.emitInt8(NumMaxAlloc + 1);
323+
for (size_t i = 0; i < NumMaxAlloc; ++i) {
324+
OS.emitInt8(encodeAllocSize(MaxAllocSize));
325+
}
326+
327+
OS.emitInt8(encodeAllocSize(AllocSize % MaxAllocSize));
328+
}
329+
}
330+
241331
OS.emitValueToAlignment(Align(PtrSize));
242332
}
243333

llvm/lib/Target/X86/X86CallingConv.td

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -766,11 +766,9 @@ def CC_X86_64_OxCaml_C_Call : CallingConv<[
766766

767767
def CC_X86_64_OxCaml_C_Call_StackArgs : CallingConv<[
768768
// Calling conventions followed by [caml_c_call_stack_args] to additionally handle
769-
// transfer of stack arguments. Note that this function normally takes a pair of
770-
// pointers on the stack, but since LLVM makes it hard to directly meddle with the
771-
// stack, this in reality calls yet anothr wrapper which calculates this range given
772-
// the number of stack arguments in bytes in R12.
773-
CCIfType<[i64], CCAssignToReg<[R14, R15, RAX, R12]>>,
769+
// transfer of stack arguments. As before, RAX is the function ptr, and [R13, R12]
770+
// delimit arguments on the stack
771+
CCIfType<[i64], CCAssignToReg<[R14, R15, RAX, R13, R12]>>,
774772

775773
// Follow C convention normally otherwise
776774
CCDelegateTo<CC_X86_64_C>
@@ -1322,7 +1320,7 @@ def CSR_64_OxCaml_WithoutFP : CalleeSavedRegs<(add)>;
13221320
// R14 and R15 (and also R12 in the latter) are used as return registers,
13231321
// so they aren't callee saved.
13241322
def CSR_64_OxCaml_C_Call : CalleeSavedRegs<(sub CSR_64, R14, R15)>;
1325-
def CSR_64_OxCaml_C_Call_StackArgs : CalleeSavedRegs<(sub CSR_64, R14, R15, R12)>;
1323+
def CSR_64_OxCaml_C_Call_StackArgs : CalleeSavedRegs<(sub CSR_64, R14, R15, R13, R12)>;
13261324

13271325
// See [Proc.destroyed_at_alloc_or_poll] for more details:
13281326
// https://github.com/oxcaml/oxcaml/blob/main/backend/amd64/proc.ml#L457

llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3087,6 +3087,11 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
30873087
"Don't expect any other calls here!");
30883088
return false;
30893089
}
3090+
3091+
// `musttail` calls wrapped in statepoints fail to verify due to
3092+
// the intrinsic using variadic arguments.
3093+
if (Call->isMustTailCall()) return false;
3094+
30903095
return true;
30913096
}
30923097
return false;

0 commit comments

Comments
 (0)