Skip to content

Commit fac56d6

Browse files
committed
Store live_outs as a bitset
We were storing and passing them as std::unordered_map (and sometimes switching to std::vector). But the set can only contain the integers 0 through 15, so just represent it as a bitset.
1 parent 38cdeb5 commit fac56d6

File tree

9 files changed

+111
-56
lines changed

9 files changed

+111
-56
lines changed

src/asm_writing/icinfo.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -183,14 +183,14 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
183183
}
184184

185185
ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
186-
int slot_size, llvm::CallingConv::ID calling_conv, const std::unordered_set<int>& live_outs,
186+
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs,
187187
assembler::GenericRegister return_register, TypeRecorder* type_recorder)
188188
: next_slot_to_try(0),
189189
stack_info(stack_info),
190190
num_slots(num_slots),
191191
slot_size(slot_size),
192192
calling_conv(calling_conv),
193-
live_outs(live_outs.begin(), live_outs.end()),
193+
live_outs(std::move(_live_outs)),
194194
return_register(return_register),
195195
type_recorder(type_recorder),
196196
retry_in(0),
@@ -200,15 +200,14 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S
200200
slowpath_rtn_addr(slowpath_rtn_addr),
201201
continue_addr(continue_addr) {
202202
for (int i = 0; i < num_slots; i++) {
203-
slots.push_back(ICSlotInfo(this, i));
203+
slots.emplace_back(this, i);
204204
}
205205
}
206206

207207
static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr;
208208
std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* slowpath_start_addr,
209209
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
210-
const ICSetupInfo* ic, StackInfo stack_info,
211-
std::unordered_set<int> live_outs) {
210+
const ICSetupInfo* ic, StackInfo stack_info, LiveOutSet live_outs) {
212211
assert(slowpath_start_addr - start_addr >= ic->num_slots * ic->slot_size);
213212
assert(slowpath_rtn_addr > slowpath_start_addr);
214213
assert(slowpath_rtn_addr <= start_addr + ic->totalSize());
@@ -221,9 +220,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
221220
static const int DWARF_RAX = 0;
222221
// It's possible that the return value doesn't get used, in which case
223222
// we can avoid copying back into RAX at the end
224-
if (live_outs.count(DWARF_RAX)) {
225-
live_outs.erase(DWARF_RAX);
226-
}
223+
live_outs.clear(DWARF_RAX);
227224

228225
// TODO we only need to do this if 0 was in live_outs, since if it wasn't, that indicates
229226
// the return value won't be used and we can optimize based on that.
@@ -247,7 +244,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
247244
}
248245

249246
ICInfo* icinfo = new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->num_slots, ic->slot_size,
250-
ic->getCallingConvention(), live_outs, return_register, ic->type_recorder);
247+
ic->getCallingConvention(), std::move(live_outs), return_register, ic->type_recorder);
251248

252249
ics_by_return_addr[slowpath_rtn_addr] = icinfo;
253250

src/asm_writing/icinfo.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include "asm_writing/assembler.h"
2525
#include "asm_writing/types.h"
26+
#include "core/util.h"
2627

2728
namespace pyston {
2829

@@ -91,6 +92,8 @@ class ICSlotRewrite {
9192
friend class ICInfo;
9293
};
9394

95+
typedef BitSet<16> LiveOutSet;
96+
9497
class ICInfo {
9598
private:
9699
std::vector<ICSlotInfo> slots;
@@ -105,7 +108,7 @@ class ICInfo {
105108
const int num_slots;
106109
const int slot_size;
107110
const llvm::CallingConv::ID calling_conv;
108-
const std::vector<int> live_outs;
111+
LiveOutSet live_outs;
109112
const assembler::GenericRegister return_register;
110113
TypeRecorder* const type_recorder;
111114
int retry_in, retry_backoff;
@@ -116,14 +119,14 @@ class ICInfo {
116119

117120
public:
118121
ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
119-
int slot_size, llvm::CallingConv::ID calling_conv, const std::unordered_set<int>& live_outs,
122+
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet live_outs,
120123
assembler::GenericRegister return_register, TypeRecorder* type_recorder);
121124
void* const start_addr, *const slowpath_rtn_addr, *const continue_addr;
122125

123126
int getSlotSize() { return slot_size; }
124127
int getNumSlots() { return num_slots; }
125128
llvm::CallingConv::ID getCallingConvention() { return calling_conv; }
126-
const std::vector<int>& getLiveOuts() { return live_outs; }
129+
const LiveOutSet& getLiveOuts() { return live_outs; }
127130

128131
std::unique_ptr<ICSlotRewrite> startRewrite(const char* debug_name);
129132
void clear(ICSlotInfo* entry);
@@ -138,8 +141,7 @@ class ICSetupInfo;
138141
struct CompiledFunction;
139142
std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* slowpath_start_addr,
140143
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
141-
const ICSetupInfo*, StackInfo stack_info,
142-
std::unordered_set<int> live_outs);
144+
const ICSetupInfo*, StackInfo stack_info, LiveOutSet live_outs);
143145
void deregisterCompiledPatchpoint(ICInfo* ic);
144146

145147
ICInfo* getICInfo(void* rtn_addr);

src/asm_writing/rewriter.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,7 +1786,7 @@ TypeRecorder* Rewriter::getTypeRecorder() {
17861786
return rewrite->getTypeRecorder();
17871787
}
17881788

1789-
Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs)
1789+
Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const LiveOutSet& live_outs)
17901790
: rewrite(std::move(rewrite)),
17911791
assembler(this->rewrite->getAssembler()),
17921792
picked_slot(NULL),
@@ -1837,6 +1837,11 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const s
18371837
var->locations.push_back(l);
18381838
}
18391839

1840+
// Make sure there weren't duplicates in the live_outs list.
1841+
// Probably not a big deal if there were, but we shouldn't be generating those.
1842+
assert(std::find(this->live_out_regs.begin(), this->live_out_regs.end(), dwarf_regnum)
1843+
== this->live_out_regs.end());
1844+
18401845
this->live_outs.push_back(var);
18411846
this->live_out_regs.push_back(dwarf_regnum);
18421847
}
@@ -2034,25 +2039,23 @@ void setSlowpathFunc(uint8_t* pp_addr, void* func) {
20342039
}
20352040

20362041
PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
2037-
int scratch_offset, int scratch_size,
2038-
const std::unordered_set<int>& live_outs, SpillMap& remapped) {
2042+
int scratch_offset, int scratch_size, LiveOutSet live_outs,
2043+
SpillMap& remapped) {
20392044
assert(start_addr < end_addr);
20402045

20412046
int est_slowpath_size = INITIAL_CALL_SIZE;
20422047

20432048
std::vector<assembler::GenericRegister> regs_to_spill;
20442049
std::vector<assembler::Register> regs_to_reload;
20452050

2046-
std::unordered_set<int> live_outs_for_slot;
2047-
20482051
for (int dwarf_regnum : live_outs) {
20492052
assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(dwarf_regnum);
20502053

20512054
assert(!(ru.type == assembler::GenericRegister::GP && ru.gp == assembler::R11) && "We assume R11 is free!");
20522055

20532056
if (ru.type == assembler::GenericRegister::GP) {
20542057
if (ru.gp == assembler::RSP || ru.gp.isCalleeSave()) {
2055-
live_outs_for_slot.insert(dwarf_regnum);
2058+
live_outs.set(dwarf_regnum);
20562059
continue;
20572060
}
20582061
}
@@ -2068,7 +2071,7 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
20682071
continue;
20692072
}
20702073

2071-
live_outs_for_slot.insert(dwarf_regnum);
2074+
live_outs.set(dwarf_regnum);
20722075

20732076
regs_to_spill.push_back(ru);
20742077

@@ -2125,7 +2128,6 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
21252128
assem.fillWithNops();
21262129
assert(!assem.hasFailed());
21272130

2128-
return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr,
2129-
std::move(live_outs_for_slot));
2131+
return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr, std::move(live_outs));
21302132
}
21312133
}

src/asm_writing/rewriter.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ class Rewriter : public ICSlotRewrite::CommitHook {
355355
llvm::SmallVector<RewriterVar*, 8> args;
356356
llvm::SmallVector<RewriterVar*, 8> live_outs;
357357

358-
Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs);
358+
Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const LiveOutSet& live_outs);
359359

360360
llvm::SmallVector<RewriterAction, 32> actions;
361361
void addAction(std::function<void()> action, llvm::ArrayRef<RewriterVar*> vars, ActionType type) {
@@ -555,19 +555,19 @@ struct PatchpointInitializationInfo {
555555
uint8_t* slowpath_start;
556556
uint8_t* slowpath_rtn_addr;
557557
uint8_t* continue_addr;
558-
std::unordered_set<int> live_outs;
558+
LiveOutSet live_outs;
559559

560560
PatchpointInitializationInfo(uint8_t* slowpath_start, uint8_t* slowpath_rtn_addr, uint8_t* continue_addr,
561-
std::unordered_set<int>&& live_outs)
561+
LiveOutSet live_outs)
562562
: slowpath_start(slowpath_start),
563563
slowpath_rtn_addr(slowpath_rtn_addr),
564564
continue_addr(continue_addr),
565565
live_outs(std::move(live_outs)) {}
566566
};
567567

568568
PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
569-
int scratch_offset, int scratch_size,
570-
const std::unordered_set<int>& live_outs, SpillMap& remapped);
569+
int scratch_offset, int scratch_size, LiveOutSet live_outs,
570+
SpillMap& remapped);
571571

572572
template <> inline RewriterVar* RewriterVar::getAttrCast<bool, bool>(int offset, Location loc) {
573573
return getAttr(offset, loc, assembler::MovType::ZBL);

src/codegen/baseline_jit.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in
102102

103103
int scratch_offset = num_stack_args * 8;
104104
StackInfo stack_info(scratch_size, scratch_offset);
105-
std::unordered_set<int> live_outs;
105+
LiveOutSet live_outs;
106106

107107
void* fragment_start = a.curInstPointer() - patch_jump_offset;
108108
long fragment_offset = a.bytesWritten() - patch_jump_offset;
@@ -612,13 +612,13 @@ int JitFragmentWriter::finishCompilation() {
612612
uint8_t* end_addr = pp_info.end_addr;
613613
PatchpointInitializationInfo initialization_info
614614
= initializePatchpoint3(pp_info.func_addr, start_addr, end_addr, 0 /* scratch_offset */,
615-
0 /* scratch_size */, std::unordered_set<int>(), _spill_map);
615+
0 /* scratch_size */, LiveOutSet(), _spill_map);
616616
uint8_t* slowpath_start = initialization_info.slowpath_start;
617617
uint8_t* slowpath_rtn_addr = initialization_info.slowpath_rtn_addr;
618618

619-
std::unique_ptr<ICInfo> pp = registerCompiledPatchpoint(
620-
start_addr, slowpath_start, initialization_info.continue_addr, slowpath_rtn_addr, pp_info.ic.get(),
621-
pp_info.stack_info, std::unordered_set<int>());
619+
std::unique_ptr<ICInfo> pp
620+
= registerCompiledPatchpoint(start_addr, slowpath_start, initialization_info.continue_addr,
621+
slowpath_rtn_addr, pp_info.ic.get(), pp_info.stack_info, LiveOutSet());
622622
pp.release();
623623
}
624624

src/codegen/irgen/irgenerator.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ ScopeInfo* IRGenState::getScopeInfoForNode(AST* node) {
247247
class IREmitterImpl : public IREmitter {
248248
private:
249249
IRGenState* irstate;
250-
IRBuilder* builder;
250+
std::unique_ptr<IRBuilder> builder;
251251
llvm::BasicBlock*& curblock;
252252
IRGenerator* irgenerator;
253253

@@ -348,6 +348,9 @@ class IREmitterImpl : public IREmitter {
348348
public:
349349
explicit IREmitterImpl(IRGenState* irstate, llvm::BasicBlock*& curblock, IRGenerator* irgenerator)
350350
: irstate(irstate), builder(new IRBuilder(g.context)), curblock(curblock), irgenerator(irgenerator) {
351+
// Perf note: it seems to be more efficient to separately allocate the "builder" member,
352+
// even though we could allocate it in-line; maybe it's infrequently used enough that it's better
353+
// to not have it take up cache space.
351354

352355
RELEASE_ASSERT(irstate->getSourceInfo()->scoping->areGlobalsFromModule(),
353356
"jit doesn't support custom globals yet");
@@ -356,7 +359,7 @@ class IREmitterImpl : public IREmitter {
356359
builder->SetInsertPoint(curblock);
357360
}
358361

359-
IRBuilder* getBuilder() override { return builder; }
362+
IRBuilder* getBuilder() override { return &*builder; }
360363

361364
GCBuilder* getGC() override { return irstate->getGC(); }
362365

@@ -557,8 +560,6 @@ class IRGeneratorImpl : public IRGenerator {
557560
types(types),
558561
state(RUNNING) {}
559562

560-
~IRGeneratorImpl() { delete emitter.getBuilder(); }
561-
562563
private:
563564
OpInfo getOpInfoForNode(AST* ast, const UnwindInfo& unw_info) {
564565
assert(ast);

src/codegen/patchpoints.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,14 @@ static int extractScratchOffset(PatchpointInfo* pp, StackMap::Record* r) {
117117
return l.offset;
118118
}
119119

120-
static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc) {
121-
std::unordered_set<int> live_outs;
120+
static LiveOutSet extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc) {
121+
LiveOutSet live_outs;
122122

123123
// Using the C calling convention, there shouldn't be any non-callee-save registers in here,
124124
// but LLVM is conservative and will add some. So with the C cc, ignored the specified live outs
125125
if (cc != llvm::CallingConv::C) {
126126
for (const auto& live_out : r->live_outs) {
127-
live_outs.insert(live_out.regnum);
127+
live_outs.set(live_out.regnum);
128128
}
129129
}
130130

@@ -133,11 +133,11 @@ static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::Callin
133133
// sense to track them as live_outs.
134134
// Unfortunately this means we need to be conservative about it unless
135135
// we can change llvm's behavior.
136-
live_outs.insert(3);
137-
live_outs.insert(12);
138-
live_outs.insert(13);
139-
live_outs.insert(14);
140-
live_outs.insert(15);
136+
live_outs.set(3);
137+
live_outs.set(12);
138+
live_outs.set(13);
139+
live_outs.set(14);
140+
live_outs.set(15);
141141

142142
return live_outs;
143143
}
@@ -212,12 +212,12 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
212212
if (ic == NULL) {
213213
// We have to be using the C calling convention here, so we don't need to check the live outs
214214
// or save them across the call.
215-
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size,
216-
std::unordered_set<int>(), frame_remapped);
215+
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size, LiveOutSet(),
216+
frame_remapped);
217217
continue;
218218
}
219219

220-
std::unordered_set<int> live_outs(extractLiveOuts(r, ic->getCallingConvention()));
220+
LiveOutSet live_outs(extractLiveOuts(r, ic->getCallingConvention()));
221221

222222
if (ic->hasReturnValue()) {
223223
assert(ic->getCallingConvention() == llvm::CallingConv::C
@@ -226,14 +226,12 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
226226
static const int DWARF_RAX = 0;
227227
// It's possible that the return value doesn't get used, in which case
228228
// we can avoid copying back into RAX at the end
229-
if (live_outs.count(DWARF_RAX)) {
230-
live_outs.erase(DWARF_RAX);
231-
}
229+
live_outs.clear(DWARF_RAX);
232230
}
233231

234232

235233
auto initialization_info = initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset,
236-
scratch_size, live_outs, frame_remapped);
234+
scratch_size, std::move(live_outs), frame_remapped);
237235

238236
ASSERT(initialization_info.slowpath_start - start_addr >= ic->num_slots * ic->slot_size,
239237
"Used more slowpath space than expected; change ICSetupInfo::totalSize()?");

0 commit comments

Comments
 (0)