JuliaLang · vtjnash · Mar 2, 2022 · Feb 19, 2022 · Feb 26, 2022 · Feb 27, 2022
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
@@ -456,108 +456,91 @@ CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
 #endif
 }
 
-static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, raw_svector_ostream &ObjStream, MCContext *Ctx, int optlevel)
+static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, int optlevel)
 {
     addTargetPasses(&PM, &TM);
     addOptimizationPasses(&PM, optlevel);
     addMachinePasses(&PM, &TM, optlevel);
-    if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
-        llvm_unreachable("Target does not support MC emission.");
 }
 
 static auto countBasicBlocks(const Function &F)
 {
     return std::distance(F.begin(), F.end());
 }
 
-CompilerResultT JuliaOJIT::CompilerT::operator()(Module &M)
-{
-    uint64_t start_time = 0;
-    if (dump_llvm_opt_stream != NULL) {
-        // Print LLVM function statistics _before_ optimization
-        // Print all the information about this invocation as a YAML object
-        jl_printf(dump_llvm_opt_stream, "- \n");
-        // We print the name and some statistics for each function in the module, both
-        // before optimization and again afterwards.
-        jl_printf(dump_llvm_opt_stream, "  before: \n");
-        for (auto &F : M.functions()) {
-            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
-                continue;
+OptimizerResultT JuliaOJIT::OptimizerT::operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
+    TSM.withModuleDo([&](Module &M){
+        uint64_t start_time = 0;
+        if (dump_llvm_opt_stream != NULL) {
+            // Print LLVM function statistics _before_ optimization
+            // Print all the information about this invocation as a YAML object
+            jl_printf(dump_llvm_opt_stream, "- \n");
+            // We print the name and some statistics for each function in the module, both
+            // before optimization and again afterwards.
+            jl_printf(dump_llvm_opt_stream, "  before: \n");
+            for (auto &F : M.functions()) {
+                if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                    continue;
+                }
+                // Each function is printed as a YAML object with several attributes
+                jl_printf(dump_llvm_opt_stream, "    \"%s\":\n", F.getName().str().c_str());
+                jl_printf(dump_llvm_opt_stream, "        instructions: %u\n", F.getInstructionCount());
+                jl_printf(dump_llvm_opt_stream, "        basicblocks: %lu\n", countBasicBlocks(F));
             }
-            // Each function is printed as a YAML object with several attributes
-            jl_printf(dump_llvm_opt_stream, "    \"%s\":\n", F.getName().str().c_str());
-            jl_printf(dump_llvm_opt_stream, "        instructions: %u\n", F.getInstructionCount());
-            jl_printf(dump_llvm_opt_stream, "        basicblocks: %lu\n", countBasicBlocks(F));
+
+            start_time = jl_hrtime();
         }
 
-        start_time = jl_hrtime();
-    }
+        JL_TIMING(LLVM_OPT);
+
+        PM.run(M);
 
-    JL_TIMING(LLVM_OPT);
+        uint64_t end_time = 0;
+        if (dump_llvm_opt_stream != NULL) {
+            end_time = jl_hrtime();
+            jl_printf(dump_llvm_opt_stream, "  time_ns: %" PRIu64 "\n", end_time - start_time);
+            jl_printf(dump_llvm_opt_stream, "  optlevel: %d\n", optlevel);
 
-    int optlevel;
-    int optlevel_min;
-    if (jl_generating_output()) {
-        optlevel = 0;
-    }
-    else {
-        optlevel = jl_options.opt_level;
-        optlevel_min = jl_options.opt_level_min;
-        for (auto &F : M.functions()) {
-            if (!F.getBasicBlockList().empty()) {
-                Attribute attr = F.getFnAttribute("julia-optimization-level");
-                StringRef val = attr.getValueAsString();
-                if (val != "") {
-                    int ol = (int)val[0] - '0';
-                    if (ol >= 0 && ol < optlevel)
-                        optlevel = ol;
+            // Print LLVM function statistics _after_ optimization
+            jl_printf(dump_llvm_opt_stream, "  after: \n");
+            for (auto &F : M.functions()) {
+                if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                    continue;
                 }
+                jl_printf(dump_llvm_opt_stream, "    \"%s\":\n", F.getName().str().c_str());
+                jl_printf(dump_llvm_opt_stream, "        instructions: %u\n", F.getInstructionCount());
+                jl_printf(dump_llvm_opt_stream, "        basicblocks: %lu\n", countBasicBlocks(F));
             }
         }
-        optlevel = std::max(optlevel, optlevel_min);
-    }
-    if (optlevel == 0)
-        jit.PM0.run(M);
-    else if (optlevel == 1)
-        jit.PM1.run(M);
-    else if (optlevel == 2)
-        jit.PM2.run(M);
-    else if (optlevel >= 3)
-        jit.PM3.run(M);
-
-    std::unique_ptr<MemoryBuffer> ObjBuffer(
-        new SmallVectorMemoryBuffer(std::move(jit.ObjBufferSV)));
-    auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
-
-    if (!Obj) {
-        llvm_dump(&M);
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(Obj.takeError(), OS, "");
-        OS.flush();
-        llvm::report_fatal_error(llvm::Twine("FATAL: Unable to compile LLVM Module: '") + Buf + "'\n"
-                                 "The module's content was printed above. Please file a bug report");
-    }
-
-    uint64_t end_time = 0;
-    if (dump_llvm_opt_stream != NULL) {
-        end_time = jl_hrtime();
-        jl_printf(dump_llvm_opt_stream, "  time_ns: %" PRIu64 "\n", end_time - start_time);
-        jl_printf(dump_llvm_opt_stream, "  optlevel: %d\n", optlevel);
+    });
+    return Expected<orc::ThreadSafeModule>{std::move(TSM)};
+}
 
-        // Print LLVM function statistics _after_ optimization
-        jl_printf(dump_llvm_opt_stream, "  after: \n");
-        for (auto &F : M.functions()) {
-            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
-                continue;
+void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
+    size_t optlevel = ~0ull;
+    TSM.withModuleDo([&](Module &M) {
+        if (jl_generating_output()) {
+            optlevel = 0;
+        }
+        else {
+            optlevel = std::max(static_cast<int>(jl_options.opt_level), 0);
+            size_t optlevel_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
+            for (auto &F : M.functions()) {
+                if (!F.getBasicBlockList().empty()) {
+                    Attribute attr = F.getFnAttribute("julia-optimization-level");
+                    StringRef val = attr.getValueAsString();
+                    if (val != "") {
+                        size_t ol = (size_t)val[0] - '0';
+                        if (ol >= 0 && ol < optlevel)
+                            optlevel = ol;
+                    }
+                }
             }
-            jl_printf(dump_llvm_opt_stream, "    \"%s\":\n", F.getName().str().c_str());
-            jl_printf(dump_llvm_opt_stream, "        instructions: %u\n", F.getInstructionCount());
-            jl_printf(dump_llvm_opt_stream, "        basicblocks: %lu\n", countBasicBlocks(F));
+            optlevel = std::min(std::max(optlevel, optlevel_min), this->count);
         }
-    }
-
-    return CompilerResultT(std::move(ObjBuffer));
+    });
+    assert(optlevel != ~0ull && "Failed to select a valid optimization level!");
+    this->optimizers[optlevel].emit(std::move(R), std::move(TSM));
 }
 
 void jl_register_jit_object(const object::ObjectFile &debugObj,
@@ -807,10 +790,27 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
 }
 #endif
 
+namespace {
+    orc::JITTargetMachineBuilder createJTMBFromTM(TargetMachine &TM, int optlevel) {
+        return orc::JITTargetMachineBuilder(TM.getTargetTriple())
+        .setCPU(TM.getTargetCPU().str())
+        .setFeatures(TM.getTargetFeatureString())
+        .setOptions(TM.Options)
+        .setRelocationModel(Reloc::Static)
+        .setCodeModel(TM.getCodeModel())
+        .setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
+    }
+}
+
 JuliaOJIT::JuliaOJIT(TargetMachine &TM, LLVMContext *LLVMCtx)
   : TM(TM),
     DL(TM.createDataLayout()),
-    ObjStream(ObjBufferSV),
+    TMs{
+        cantFail(createJTMBFromTM(TM, 0).createTargetMachine()),
+        cantFail(createJTMBFromTM(TM, 1).createTargetMachine()),
+        cantFail(createJTMBFromTM(TM, 2).createTargetMachine()),
+        cantFail(createJTMBFromTM(TM, 3).createTargetMachine())
+    },
     TSCtx(std::unique_ptr<LLVMContext>(LLVMCtx)),
 #if JL_LLVM_VERSION >= 130000
     ES(cantFail(orc::SelfExecutorProcessControl::Create())),
@@ -837,7 +837,17 @@ JuliaOJIT::JuliaOJIT(TargetMachine &TM, LLVMContext *LLVMCtx)
             }
         ),
 #endif
-    CompileLayer(ES, ObjectLayer, std::make_unique<CompilerT>(this))
+    CompileLayer0(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(TM, 0))),
+    CompileLayer1(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(TM, 1))),
+    CompileLayer2(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(TM, 2))),
+    CompileLayer3(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(TM, 3))),
+    OptimizeLayers{
+        {ES, CompileLayer0, OptimizerT(PM0, 0)},
+        {ES, CompileLayer1, OptimizerT(PM1, 1)},
+        {ES, CompileLayer2, OptimizerT(PM2, 2)},
+        {ES, CompileLayer3, OptimizerT(PM3, 3)},
+    },
+    OptSelLayer(OptimizeLayers)
 {
 #ifdef JL_USE_JITLINK
 # if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
@@ -859,15 +869,10 @@ JuliaOJIT::JuliaOJIT(TargetMachine &TM, LLVMContext *LLVMCtx)
             registerRTDyldJITObject(Object, LO, MemMgr);
         });
 #endif
-    for (int i = 0; i < 4; i++) {
-        TMs[i] = TM.getTarget().createTargetMachine(TM.getTargetTriple().getTriple(), TM.getTargetCPU(),
-                TM.getTargetFeatureString(), TM.Options, Reloc::Static, TM.getCodeModel(),
-                CodeGenOptLevelFor(i), true);
-    }
-    addPassesForOptLevel(PM0, *TMs[0], ObjStream, Ctx, 0);
-    addPassesForOptLevel(PM1, *TMs[1], ObjStream, Ctx, 1);
-    addPassesForOptLevel(PM2, *TMs[2], ObjStream, Ctx, 2);
-    addPassesForOptLevel(PM3, *TMs[3], ObjStream, Ctx, 3);
+    addPassesForOptLevel(PM0, *TMs[0], 0);
+    addPassesForOptLevel(PM1, *TMs[1], 1);
+    addPassesForOptLevel(PM2, *TMs[2], 2);
+    addPassesForOptLevel(PM3, *TMs[3], 3);
 
     // Make sure SectionMemoryManager::getSymbolAddressInProcess can resolve
     // symbols in the program as well. The nullptr argument to the function
@@ -943,7 +948,7 @@ void JuliaOJIT::addModule(std::unique_ptr<Module> M)
     }
 #endif
     // TODO: what is the performance characteristics of this?
-    cantFail(CompileLayer.add(JD, orc::ThreadSafeModule(std::move(M), TSCtx)));
+    cantFail(OptSelLayer.add(JD, orc::ThreadSafeModule(std::move(M), TSCtx)));
     // force eager compilation (for now), due to memory management specifics
     // (can't handle compilation recursion)
     for (auto Name : NewExports)

diff --git a/src/jitlayers.h b/src/jitlayers.h
@@ -8,6 +8,7 @@
 #include "llvm/IR/LegacyPassManager.h"
 
 #include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
+#include <llvm/ExecutionEngine/Orc/IRTransformLayer.h>
 #include <llvm/ExecutionEngine/JITEventListener.h>
 
 #include <llvm/Target/TargetMachine.h>
@@ -176,28 +177,46 @@ typedef JITSymbol JL_JITSymbol;
 typedef JITSymbol JL_SymbolInfo;
 
 using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
+using OptimizerResultT = Expected<orc::ThreadSafeModule>;
 
 class JuliaOJIT {
-    struct CompilerT : public orc::IRCompileLayer::IRCompiler {
-        CompilerT(JuliaOJIT *pjit)
-            : IRCompiler(orc::IRSymbolMapper::ManglingOptions{}),
-              jit(*pjit) {}
-        virtual CompilerResultT operator()(Module &M) override;
-    private:
-        JuliaOJIT &jit;
-    };
-    // Custom object emission notification handler for the JuliaOJIT
-    template <typename ObjT, typename LoadResult>
-    void registerObject(const ObjT &Obj, const LoadResult &LO);
-
 public:
 #ifdef JL_USE_JITLINK
     typedef orc::ObjectLinkingLayer ObjLayerT;
 #else
     typedef orc::RTDyldObjectLinkingLayer ObjLayerT;
 #endif
     typedef orc::IRCompileLayer CompileLayerT;
+    typedef orc::IRTransformLayer OptimizeLayerT;
     typedef object::OwningBinary<object::ObjectFile> OwningObj;
+private:
+    struct OptimizerT {
+        OptimizerT(legacy::PassManager &PM, int optlevel) : optlevel(optlevel), PM(PM) {}
+
+        OptimizerResultT operator()(orc::ThreadSafeModule M, orc::MaterializationResponsibility &R);
+    private:
+        int optlevel;
+        legacy::PassManager &PM;
+    };
+    // Custom object emission notification handler for the JuliaOJIT
+    template <typename ObjT, typename LoadResult>
+    void registerObject(const ObjT &Obj, const LoadResult &LO);
+
+    struct OptSelLayerT : orc::IRLayer {
+
+        template<size_t N>
+        OptSelLayerT(OptimizeLayerT (&optimizers)[N]) : orc::IRLayer(optimizers[0].getExecutionSession(), optimizers[0].getManglingOptions()), optimizers(optimizers), count(N) {
+            static_assert(N > 0, "Expected array with at least one optimizer!");
+        }
+
+        void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override;
+
+        private:
+        OptimizeLayerT *optimizers;
+        size_t count;
+    };
+
+public:
 
     JuliaOJIT(TargetMachine &TM, LLVMContext *Ctx);
 
@@ -227,14 +246,11 @@ class JuliaOJIT {
     const DataLayout DL;
     // Should be big enough that in the common case, The
     // object fits in its entirety
-    SmallVector<char, 4096> ObjBufferSV;
-    raw_svector_ostream ObjStream;
     legacy::PassManager PM0;  // per-optlevel pass managers
     legacy::PassManager PM1;
     legacy::PassManager PM2;
     legacy::PassManager PM3;
-    TargetMachine *TMs[4];
-    MCContext *Ctx;
+    std::unique_ptr<TargetMachine> TMs[4];
 
     orc::ThreadSafeContext TSCtx;
     orc::ExecutionSession ES;
@@ -245,7 +261,12 @@ class JuliaOJIT {
     std::shared_ptr<RTDyldMemoryManager> MemMgr;
 #endif
     ObjLayerT ObjectLayer;
-    CompileLayerT CompileLayer;
+    CompileLayerT CompileLayer0;
+    CompileLayerT CompileLayer1;
+    CompileLayerT CompileLayer2;
+    CompileLayerT CompileLayer3;
-    CompileLayerT CompileLayer0;
-    CompileLayerT CompileLayer1;
-    CompileLayerT CompileLayer2;
-    CompileLayerT CompileLayer3;
+    CompileLayerT CompileLayer[4];
-    CompileLayerT CompileLayer0;
-    CompileLayerT CompileLayer1;
-    CompileLayerT CompileLayer2;
-    CompileLayerT CompileLayer3;
+    CompileLayerT CompileLayer[4];
+    OptimizeLayerT OptimizeLayers[4];
+    OptSelLayerT OptSelLayer;
 
     DenseMap<void*, std::string> ReverseLocalSymbolTable;
 };