-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Split the JIT compiler into an optimizer and concurrent compiler layer #44364
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -8,6 +8,7 @@ | |||||||||||
#include "llvm/IR/LegacyPassManager.h" | ||||||||||||
|
||||||||||||
#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h> | ||||||||||||
#include <llvm/ExecutionEngine/Orc/IRTransformLayer.h> | ||||||||||||
#include <llvm/ExecutionEngine/JITEventListener.h> | ||||||||||||
|
||||||||||||
#include <llvm/Target/TargetMachine.h> | ||||||||||||
|
@@ -176,28 +177,46 @@ typedef JITSymbol JL_JITSymbol; | |||||||||||
typedef JITSymbol JL_SymbolInfo; | ||||||||||||
|
||||||||||||
using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>; | ||||||||||||
using OptimizerResultT = Expected<orc::ThreadSafeModule>; | ||||||||||||
|
||||||||||||
class JuliaOJIT { | ||||||||||||
struct CompilerT : public orc::IRCompileLayer::IRCompiler { | ||||||||||||
CompilerT(JuliaOJIT *pjit) | ||||||||||||
: IRCompiler(orc::IRSymbolMapper::ManglingOptions{}), | ||||||||||||
jit(*pjit) {} | ||||||||||||
virtual CompilerResultT operator()(Module &M) override; | ||||||||||||
private: | ||||||||||||
JuliaOJIT &jit; | ||||||||||||
}; | ||||||||||||
// Custom object emission notification handler for the JuliaOJIT | ||||||||||||
template <typename ObjT, typename LoadResult> | ||||||||||||
void registerObject(const ObjT &Obj, const LoadResult &LO); | ||||||||||||
|
||||||||||||
public: | ||||||||||||
#ifdef JL_USE_JITLINK | ||||||||||||
typedef orc::ObjectLinkingLayer ObjLayerT; | ||||||||||||
#else | ||||||||||||
typedef orc::RTDyldObjectLinkingLayer ObjLayerT; | ||||||||||||
#endif | ||||||||||||
typedef orc::IRCompileLayer CompileLayerT; | ||||||||||||
typedef orc::IRTransformLayer OptimizeLayerT; | ||||||||||||
typedef object::OwningBinary<object::ObjectFile> OwningObj; | ||||||||||||
private: | ||||||||||||
struct OptimizerT { | ||||||||||||
OptimizerT(legacy::PassManager &PM, int optlevel) : optlevel(optlevel), PM(PM) {} | ||||||||||||
|
||||||||||||
OptimizerResultT operator()(orc::ThreadSafeModule M, orc::MaterializationResponsibility &R); | ||||||||||||
private: | ||||||||||||
int optlevel; | ||||||||||||
legacy::PassManager &PM; | ||||||||||||
}; | ||||||||||||
// Custom object emission notification handler for the JuliaOJIT | ||||||||||||
template <typename ObjT, typename LoadResult> | ||||||||||||
void registerObject(const ObjT &Obj, const LoadResult &LO); | ||||||||||||
|
||||||||||||
struct OptSelLayerT : orc::IRLayer { | ||||||||||||
|
||||||||||||
template<size_t N> | ||||||||||||
OptSelLayerT(OptimizeLayerT (&optimizers)[N]) : orc::IRLayer(optimizers[0].getExecutionSession(), optimizers[0].getManglingOptions()), optimizers(optimizers), count(N) { | ||||||||||||
static_assert(N > 0, "Expected array with at least one optimizer!"); | ||||||||||||
} | ||||||||||||
|
||||||||||||
void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override; | ||||||||||||
|
||||||||||||
private: | ||||||||||||
OptimizeLayerT *optimizers; | ||||||||||||
size_t count; | ||||||||||||
}; | ||||||||||||
|
||||||||||||
public: | ||||||||||||
|
||||||||||||
JuliaOJIT(TargetMachine &TM, LLVMContext *Ctx); | ||||||||||||
|
||||||||||||
|
@@ -227,14 +246,11 @@ class JuliaOJIT { | |||||||||||
const DataLayout DL; | ||||||||||||
// Should be big enough that in the common case, The | ||||||||||||
// object fits in its entirety | ||||||||||||
SmallVector<char, 4096> ObjBufferSV; | ||||||||||||
raw_svector_ostream ObjStream; | ||||||||||||
legacy::PassManager PM0; // per-optlevel pass managers | ||||||||||||
legacy::PassManager PM1; | ||||||||||||
legacy::PassManager PM2; | ||||||||||||
legacy::PassManager PM3; | ||||||||||||
TargetMachine *TMs[4]; | ||||||||||||
MCContext *Ctx; | ||||||||||||
std::unique_ptr<TargetMachine> TMs[4]; | ||||||||||||
|
||||||||||||
orc::ThreadSafeContext TSCtx; | ||||||||||||
orc::ExecutionSession ES; | ||||||||||||
|
@@ -245,7 +261,12 @@ class JuliaOJIT { | |||||||||||
std::shared_ptr<RTDyldMemoryManager> MemMgr; | ||||||||||||
#endif | ||||||||||||
ObjLayerT ObjectLayer; | ||||||||||||
CompileLayerT CompileLayer; | ||||||||||||
CompileLayerT CompileLayer0; | ||||||||||||
CompileLayerT CompileLayer1; | ||||||||||||
CompileLayerT CompileLayer2; | ||||||||||||
CompileLayerT CompileLayer3; | ||||||||||||
Comment on lines
+264
to
+267
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason not to make this an array too (like the OptimizeLayers)
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The compiler was complaining about the compile layer's internal std::mutex not being move-constructible, which prevented me from actually creating the CompileLayerT instances during construction. I suspect this will be less of an issue in C++17 with compile-time copy elision, so it might be good to revisit then. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay, I see the necessary expression (brace initialization) changed meaning in C++20, so can only be compiled with |
||||||||||||
OptimizeLayerT OptimizeLayers[4]; | ||||||||||||
OptSelLayerT OptSelLayer; | ||||||||||||
|
||||||||||||
DenseMap<void*, std::string> ReverseLocalSymbolTable; | ||||||||||||
}; | ||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any measurable overhead to now recreating a SimpleCompiler and allocating a new legacy::PassManager for each function we compile?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll look into gathering these measurements soon
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a compilation time difference between this PR and the equivalent master branch
Master
PR
I would guess that much of the new overhead comes from some combination of creating a new TargetMachine every time we compile a module vs just reusing the same one, creating that extra PassManager every time, or reallocating the object buffer every time. One thing we could do is simply lock around these shared resources, but if we move to a parallelized middle-end/backend we might want the extra concurrency opportunity here.