-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
/
Copy pathllvm-alloc-opt.cpp
660 lines (617 loc) · 22.1 KB
/
llvm-alloc-opt.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
// This file is a part of Julia. License is MIT: https://julialang.org/license
#define DEBUG_TYPE "alloc_opt"
#undef DEBUG
#include "llvm-version.h"
#include <llvm/IR/Value.h>
#include <llvm/IR/CFG.h>
#include <llvm/IR/Dominators.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/IntrinsicInst.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Operator.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/Pass.h>
#include <llvm/Support/Debug.h>
#include "fix_llvm_assert.h"
#include "codegen_shared.h"
#include "julia.h"
#include "julia_internal.h"
#include <map>
#include <set>
using namespace llvm;
extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false);
namespace {
static void copyMetadata(Instruction *dest, const Instruction *src)
{
#if JL_LLVM_VERSION < 40000
if (!src->hasMetadata())
return;
SmallVector<std::pair<unsigned,MDNode*>,4> TheMDs;
src->getAllMetadataOtherThanDebugLoc(TheMDs);
for (const auto &MD : TheMDs)
dest->setMetadata(MD.first, MD.second);
dest->setDebugLoc(src->getDebugLoc());
#else
dest->copyMetadata(*src);
#endif
}
static bool isBundleOperand(CallInst *call, unsigned idx)
{
#if JL_LLVM_VERSION < 40000
return call->hasOperandBundles() && idx >= call->getBundleOperandsStartIndex() &&
idx < call->getBundleOperandsEndIndex();
#else
return call->isBundleOperand(idx);
#endif
}
/**
* Promote `julia.gc_alloc_obj` which do not have escaping root to a alloca.
* Uses that are not considered to escape the object (i.e. heap address) includes,
*
* * load
* * `pointer_from_objref`
* * `ccall` gcroot array (`jl_roots` operand bundle)
* * store (as address)
* * addrspacecast, bitcast, getelementptr
*
* The results of these cast instructions will be scanned recursively.
*
* All other uses are considered to escape conservatively.
*/
struct AllocOpt : public FunctionPass {
static char ID;
AllocOpt()
: FunctionPass(ID)
{
llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
}
private:
LLVMContext *ctx;
const DataLayout *DL;
Function *alloc_obj;
Function *ptr_from_objref;
Function *lifetime_start;
Function *lifetime_end;
Type *T_int8;
Type *T_int32;
Type *T_int64;
Type *T_size;
Type *T_pint8;
Type *T_prjlvalue;
Type *T_pjlvalue;
Type *T_pprjlvalue;
MDNode *tbaa_tag;
struct CheckInstFrame {
Instruction *parent;
uint64_t offset;
Instruction::use_iterator use_it;
Instruction::use_iterator use_end;
};
typedef SmallVector<CheckInstFrame, 4> CheckInstStack;
struct ReplaceUsesFrame {
Instruction *orig_i;
Instruction *new_i;
ReplaceUsesFrame(Instruction *orig_i, Instruction *new_i)
: orig_i(orig_i),
new_i(new_i)
{}
};
typedef SmallVector<ReplaceUsesFrame,4> ReplaceUsesStack;
struct LifetimeMarker {
LifetimeMarker(AllocOpt &pass)
: pass(pass),
first_safepoint{},
stack{}
{}
// insert llvm.lifetime.* calls for `ptr` with size `sz`
// based on the use of `orig` given in `alloc_uses`.
void insert(Instruction *ptr, Constant *sz, Instruction *orig,
const std::set<Instruction*> &alloc_uses);
private:
Instruction *getFirstSafepoint(BasicBlock *bb);
void insertEnd(Instruction *ptr, Constant *sz, Instruction *insert);
struct Frame {
BasicBlock *bb;
pred_iterator p_cur;
pred_iterator p_end;
Frame(BasicBlock *bb)
: bb(bb),
p_cur(pred_begin(bb)),
p_end(pred_end(bb))
{}
};
AllocOpt &pass;
std::map<BasicBlock*,Instruction*> first_safepoint;
SmallVector<Frame,4> stack;
};
bool doInitialization(Module &m) override;
bool runOnFunction(Function &F) override;
bool checkInst(Instruction *I, CheckInstStack &stack, std::set<Instruction*> &uses,
bool &ignore_tag);
void replaceUsesWith(Instruction *orig_i, Instruction *new_i, ReplaceUsesStack &stack);
bool isSafepoint(Instruction *inst);
void getAnalysisUsage(AnalysisUsage &AU) const override
{
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
};
Instruction *AllocOpt::LifetimeMarker::getFirstSafepoint(BasicBlock *bb)
{
auto it = first_safepoint.find(bb);
if (it != first_safepoint.end())
return it->second;
Instruction *first = nullptr;
for (auto &I: *bb) {
if (pass.isSafepoint(&I)) {
first = &I;
break;
}
}
first_safepoint[bb] = first;
return first;
}
void AllocOpt::LifetimeMarker::insertEnd(Instruction *ptr, Constant *sz, Instruction *insert)
{
BasicBlock::iterator it(insert);
BasicBlock::iterator begin(insert->getParent()->begin());
// Makes sure that the end is inserted before nearby start.
// We insert start before the allocation call, if it is the first safepoint we find for
// another instruction, it's better if we insert the end before the start instead of the
// allocation so that the two allocations do not have overlapping lifetime.
while (it != begin) {
--it;
if (auto II = dyn_cast<IntrinsicInst>(&*it)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
insert = II;
continue;
}
}
break;
}
CallInst::Create(pass.lifetime_end, {sz, ptr}, "", insert);
}
void AllocOpt::LifetimeMarker::insert(Instruction *ptr, Constant *sz, Instruction *orig,
const std::set<Instruction*> &alloc_uses)
{
CallInst::Create(pass.lifetime_start, {sz, ptr}, "", orig);
BasicBlock *def_bb = orig->getParent();
std::set<BasicBlock*> bbs{def_bb};
auto &DT = pass.getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Collect all BB where the allocation is live
for (auto use: alloc_uses) {
auto bb = use->getParent();
if (!bbs.insert(bb).second)
continue;
assert(stack.empty());
Frame cur{bb};
while (true) {
assert(cur.p_cur != cur.p_end);
auto pred = *cur.p_cur;
++cur.p_cur;
if (bbs.insert(pred).second) {
if (cur.p_cur != cur.p_end)
stack.push_back(cur);
cur = Frame(pred);
}
if (cur.p_cur == cur.p_end) {
if (stack.empty())
break;
cur = stack.back();
stack.pop_back();
}
}
}
#ifndef JL_NDEBUG
for (auto bb: bbs) {
if (bb == def_bb)
continue;
if (DT.dominates(orig, bb))
continue;
auto F = bb->getParent();
llvm_dump(F);
llvm_dump(orig);
jl_safe_printf("Does not dominate BB:\n");
llvm_dump(bb);
abort();
}
#endif
// For each BB, find the first instruction(s) where the allocation is possibly dead.
// If all successors are live, then there isn't one.
// If all successors are dead, then it's the first instruction after the last use
// within the BB.
// If some successors are live and others are dead, it's the first instruction in
// the successors that are dead.
std::vector<Instruction*> first_dead;
for (auto bb: bbs) {
bool has_use = false;
for (auto succ: successors(bb)) {
// def_bb is the only bb in bbs that's not dominated by orig
if (succ != def_bb && bbs.count(succ)) {
has_use = true;
break;
}
}
if (has_use) {
for (auto succ: successors(bb)) {
if (!bbs.count(succ)) {
first_dead.push_back(&*succ->begin());
}
}
}
else {
for (auto it = bb->rbegin(), end = bb->rend(); it != end; ++it) {
if (alloc_uses.count(&*it)) {
--it;
first_dead.push_back(&*it);
break;
}
}
}
}
bbs.clear();
// There can/need only be one lifetime.end for each allocation in each bb, use bbs
// to record that.
// Iterate through the first dead and find the first safepoint following each of them.
while (!first_dead.empty()) {
auto I = first_dead.back();
first_dead.pop_back();
auto bb = I->getParent();
if (!bbs.insert(bb).second)
continue;
if (I == &*bb->begin()) {
// There's no use in or after this bb. If this bb is not dominated by
// the def then it has to be dead on entering this bb.
// Otherwise, there could be use that we don't track
// before hitting the next safepoint.
if (!DT.dominates(orig, bb)) {
insertEnd(ptr, sz, &*bb->getFirstInsertionPt());
continue;
}
else if (auto insert = getFirstSafepoint(bb)) {
insertEnd(ptr, sz, insert);
}
}
else {
assert(bb == def_bb || DT.dominates(orig, I));
BasicBlock::iterator it(I);
BasicBlock::iterator end = bb->end();
bool safepoint_found = false;
for (; it != end; ++it) {
auto insert = &*it;
if (pass.isSafepoint(insert)) {
insertEnd(ptr, sz, insert);
safepoint_found = true;
break;
}
}
if (safepoint_found) {
continue;
}
}
for (auto succ: successors(bb)) {
first_dead.push_back(&*succ->begin());
}
}
}
bool AllocOpt::doInitialization(Module &M)
{
ctx = &M.getContext();
DL = &M.getDataLayout();
alloc_obj = M.getFunction("julia.gc_alloc_obj");
if (!alloc_obj)
return false;
ptr_from_objref = M.getFunction("julia.pointer_from_objref");
T_prjlvalue = alloc_obj->getReturnType();
T_pjlvalue = PointerType::get(cast<PointerType>(T_prjlvalue)->getElementType(), 0);
T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
T_int8 = Type::getInt8Ty(*ctx);
T_int32 = Type::getInt32Ty(*ctx);
T_int64 = Type::getInt64Ty(*ctx);
T_size = sizeof(void*) == 8 ? T_int64 : T_int32;
T_pint8 = PointerType::get(T_int8, 0);
#if JL_LLVM_VERSION >= 50000
lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 });
lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 });
#else
lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start);
lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end);
#endif
MDNode *tbaa_data;
MDNode *tbaa_data_scalar;
std::tie(tbaa_data, tbaa_data_scalar) = tbaa_make_child("jtbaa_data");
tbaa_tag = tbaa_make_child("jtbaa_tag", tbaa_data_scalar).first;
return true;
}
bool AllocOpt::checkInst(Instruction *I, CheckInstStack &stack, std::set<Instruction*> &uses,
bool &ignore_tag)
{
uses.clear();
if (I->use_empty())
return true;
CheckInstFrame cur{I, 0, I->use_begin(), I->use_end()};
stack.clear();
// Recursion
auto push_inst = [&] (Instruction *inst) {
if (cur.use_it != cur.use_end)
stack.push_back(cur);
cur.parent = inst;
cur.use_it = inst->use_begin();
cur.use_end = inst->use_end();
};
auto check_inst = [&] (Instruction *inst, Use *use) {
if (isa<LoadInst>(inst))
return true;
if (auto call = dyn_cast<CallInst>(inst)) {
// TODO: on LLVM 5.0 we may need to handle certain llvm intrinsics
// including `memcpy`, `memset` etc. We might also need to handle
// `memcmp` by coverting to our own intrinsic and lower it after the gc root pass.
if (ptr_from_objref && ptr_from_objref == call->getCalledFunction())
return true;
auto opno = use->getOperandNo();
// Uses in `jl_roots` operand bundle are not counted as escaping, everything else is.
if (!isBundleOperand(call, opno))
return false;
return call->getOperandBundleForOperand(opno).getTagName() == "jl_roots";
}
if (auto store = dyn_cast<StoreInst>(inst)) {
// Only store value count
if (use->getOperandNo() != StoreInst::getPointerOperandIndex())
return false;
auto storev = store->getValueOperand();
// There's GC root in this object.
if (auto ptrtype = dyn_cast<PointerType>(storev->getType())) {
if (ptrtype->getAddressSpace() == AddressSpace::Tracked) {
return false;
}
}
return true;
}
if (isa<AddrSpaceCastInst>(inst) || isa<BitCastInst>(inst)) {
push_inst(inst);
return true;
}
if (auto gep = dyn_cast<GetElementPtrInst>(inst)) {
APInt apoffset(sizeof(void*) * 8, cur.offset, true);
if (ignore_tag && (!gep->accumulateConstantOffset(*DL, apoffset) ||
apoffset.isNegative()))
ignore_tag = false;
push_inst(inst);
cur.offset = apoffset.getLimitedValue();
// Check overflow
if (cur.offset == UINT64_MAX)
ignore_tag = false;
return true;
}
return false;
};
while (true) {
assert(cur.use_it != cur.use_end);
auto use = &*cur.use_it;
auto inst = dyn_cast<Instruction>(use->getUser());
++cur.use_it;
if (!inst)
return false;
if (!check_inst(inst, use))
return false;
uses.insert(inst);
if (cur.use_it == cur.use_end) {
if (stack.empty())
return true;
cur = stack.back();
stack.pop_back();
}
}
}
// This function needs to handle all cases `AllocOpt::checkInst` can handle.
// This function should not erase any safepoint so that the lifetime marker can find and cache
// all the original safepoints.
void AllocOpt::replaceUsesWith(Instruction *orig_inst, Instruction *new_inst,
ReplaceUsesStack &stack)
{
auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) {
if (orig_i->user_empty()) {
if (orig_i != orig_inst)
orig_i->eraseFromParent();
return true;
}
Type *orig_t = orig_i->getType();
Type *new_t = new_i->getType();
if (orig_t == new_t) {
orig_i->replaceAllUsesWith(new_i);
if (orig_i != orig_inst)
orig_i->eraseFromParent();
return true;
}
return false;
};
if (simple_replace(orig_inst, new_inst))
return;
assert(stack.empty());
ReplaceUsesFrame cur{orig_inst, new_inst};
auto finish_cur = [&] () {
assert(cur.orig_i->user_empty());
if (cur.orig_i != orig_inst) {
cur.orig_i->eraseFromParent();
}
};
auto push_frame = [&] (Instruction *orig_i, Instruction *new_i) {
if (simple_replace(orig_i, new_i))
return;
stack.push_back(cur);
cur = {orig_i, new_i};
};
// Both `orig_i` and `new_i` should be pointer of the same type
// but possibly different address spaces. `new_i` is always in addrspace 0.
auto replace_inst = [&] (Instruction *user) {
Instruction *orig_i = cur.orig_i;
Instruction *new_i = cur.new_i;
if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
user->replaceUsesOfWith(orig_i, new_i);
}
else if (auto call = dyn_cast<CallInst>(user)) {
if (ptr_from_objref && ptr_from_objref == call->getCalledFunction()) {
call->replaceAllUsesWith(new_i);
call->eraseFromParent();
return;
}
// remove from operand bundle
Type *new_t = new_i->getType();
user->replaceUsesOfWith(orig_i, ConstantPointerNull::get(cast<PointerType>(new_t)));
}
else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
auto cast_t = PointerType::get(cast<PointerType>(user->getType())->getElementType(),
0);
auto replace_i = new_i;
Type *new_t = new_i->getType();
if (cast_t != new_t) {
replace_i = new BitCastInst(replace_i, cast_t, "", user);
replace_i->setDebugLoc(user->getDebugLoc());
replace_i->takeName(user);
}
push_frame(user, replace_i);
}
else if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
SmallVector<Value *, 4> IdxOperands(gep->idx_begin(), gep->idx_end());
auto new_gep = GetElementPtrInst::Create(gep->getSourceElementType(),
new_i, IdxOperands,
gep->getName(), gep);
new_gep->setIsInBounds(gep->isInBounds());
new_gep->takeName(gep);
copyMetadata(new_gep, gep);
push_frame(gep, new_gep);
}
else {
abort();
}
};
while (true) {
replace_inst(cast<Instruction>(*cur.orig_i->user_begin()));
while (cur.orig_i->use_empty()) {
finish_cur();
if (stack.empty())
return;
cur = stack.back();
stack.pop_back();
}
}
}
bool AllocOpt::isSafepoint(Instruction *inst)
{
auto call = dyn_cast<CallInst>(inst);
if (!call)
return false;
if (isa<IntrinsicInst>(call))
return false;
if (auto callee = call->getCalledFunction()) {
// Known functions emitted in codegen that are not safepoints
if (callee == ptr_from_objref || callee->getName() == "memcmp") {
return false;
}
}
return true;
}
bool AllocOpt::runOnFunction(Function &F)
{
if (!alloc_obj)
return false;
SmallVector<std::pair<CallInst*,size_t>,6> allocs;
for (auto &bb: F) {
for (auto &I: bb) {
auto call = dyn_cast<CallInst>(&I);
if (!call)
continue;
auto callee = call->getCalledFunction();
if (!callee)
continue;
size_t sz;
if (callee == alloc_obj) {
assert(call->getNumArgOperands() == 3);
sz = (size_t)cast<ConstantInt>(call->getArgOperand(1))->getZExtValue();
}
else {
continue;
}
if (sz < IntegerType::MAX_INT_BITS / 8 && sz < INT32_MAX) {
allocs.push_back(std::make_pair(call, sz));
}
}
}
auto &entry = F.getEntryBlock();
CheckInstStack check_stack;
ReplaceUsesStack replace_stack;
std::set<Instruction*> alloc_uses;
LifetimeMarker lifetime(*this);
for (auto &it: allocs) {
bool ignore_tag = true;
auto orig = it.first;
size_t &sz = it.second;
if (!checkInst(orig, check_stack, alloc_uses, ignore_tag)) {
sz = UINT32_MAX;
continue;
}
// The allocation does not escape or get used in a phi node so none of the derived
// SSA from it are live when we run the allocation again.
// It is now safe to promote the allocation to an entry block alloca.
size_t align = 1;
// TODO make codegen handling of alignment consistent and pass that as a parameter
// to the allocation function directly.
if (!ignore_tag) {
align = sz <= 8 ? 8 : JL_SMALL_BYTE_ALIGNMENT;
sz += align;
}
else if (sz > 1) {
align = JL_SMALL_BYTE_ALIGNMENT;
while (sz < align) {
align = align / 2;
}
}
// No debug info for prolog instructions
IRBuilder<> prolog_builder(&entry.front());
AllocaInst *buff;
Instruction *ptr;
if (sz == 0) {
buff = prolog_builder.CreateAlloca(T_int8, ConstantInt::get(T_int64, 0));
ptr = buff;
}
else {
buff = prolog_builder.CreateAlloca(Type::getIntNTy(*ctx, sz * 8));
buff->setAlignment(align);
ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, T_pint8));
}
lifetime.insert(ptr, ConstantInt::get(T_int64, sz), orig, alloc_uses);
// Someone might be reading the tag, initialize it.
if (!ignore_tag) {
ptr = cast<Instruction>(prolog_builder.CreateConstGEP1_32(T_int8, ptr, align));
auto casti = prolog_builder.CreateBitCast(ptr, T_pprjlvalue);
auto tagaddr = prolog_builder.CreateGEP(T_prjlvalue, casti,
ConstantInt::get(T_size, -1));
// Store should be created at the callsite and not in the prolog
auto store = new StoreInst(orig->getArgOperand(2), tagaddr, orig);
store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
store->setDebugLoc(orig->getDebugLoc());
}
auto casti = cast<Instruction>(prolog_builder.CreateBitCast(ptr, T_pjlvalue));
casti->takeName(orig);
replaceUsesWith(orig, cast<Instruction>(casti), replace_stack);
}
for (auto it: allocs) {
if (it.second == UINT32_MAX)
continue;
it.first->eraseFromParent();
}
return true;
}
char AllocOpt::ID = 0;
static RegisterPass<AllocOpt> X("AllocOpt", "Promote heap allocation to stack",
false /* Only looks at CFG */,
false /* Analysis Pass */);
}
Pass *createAllocOptPass()
{
return new AllocOpt();
}