forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL][Fusion] JIT compiler kernel fusion passes (intel#7661)
This is the fourth patch in a series of patches to add an implementation of the [kernel fusion extension](intel#7098). We have split the implementation into multiple patches to make them more easy to review. This patch adds the LLVM passes that perform the kernel fusion and related optimizations: * A pass creating the function definition for the fused kernel from the input kernel definitions. * A pass performing internalization of dataflow internal to the fused kernel into either private or local memory. The type of memory to use is currently specified by the user in the runtime. * A pass propagating values for scalars and by-val aggregates from the SYCL runtime to the fused kernel as constants. The information is propagated from the SYCL runtime to the passes via LLVM metadata inserted by the JIT compiler frontend. After and between the fusion passes, some standard LLVM optimization and transformation passes are executed to enable passes and optimize the fused kernel. Signed-off-by: Lukas Sommer <lukas.sommer@codeplay.com> Co-authored-by: Victor Perez <victor.perez@codeplay.com>
- Loading branch information
1 parent
1a1fd8d
commit e1e6df5
Showing
38 changed files
with
4,375 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
//==-------------------------- FusionPipeline.cpp --------------------------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "FusionPipeline.h" | ||
|
||
#include "debug/PassDebug.h" | ||
#include "helper/ConfigHelper.h" | ||
#include "internalization/Internalization.h" | ||
#include "kernel-fusion/SYCLKernelFusion.h" | ||
#include "kernel-info/SYCLKernelInfo.h" | ||
#include "syclcp/SYCLCP.h" | ||
|
||
#include "llvm/IR/PassManager.h" | ||
#include "llvm/Transforms/Scalar/IndVarSimplify.h" | ||
#include "llvm/Transforms/Scalar/InferAddressSpaces.h" | ||
#include "llvm/Transforms/Scalar/LoopUnrollPass.h" | ||
#ifndef NDEBUG | ||
#include "llvm/IR/Verifier.h" | ||
#endif // NDEBUG | ||
#include "llvm/Passes/PassBuilder.h" | ||
#include "llvm/Transforms/InstCombine/InstCombine.h" | ||
#include "llvm/Transforms/Scalar/ADCE.h" | ||
#include "llvm/Transforms/Scalar/EarlyCSE.h" | ||
#include "llvm/Transforms/Scalar/SCCP.h" | ||
#include "llvm/Transforms/Scalar/SROA.h" | ||
#include "llvm/Transforms/Scalar/SimplifyCFG.h" | ||
|
||
using namespace llvm; | ||
using namespace jit_compiler; | ||
using namespace jit_compiler::fusion; | ||
|
||
std::unique_ptr<SYCLModuleInfo> | ||
FusionPipeline::runFusionPasses(Module &Mod, SYCLModuleInfo &InputInfo, | ||
int BarriersFlags) { | ||
// Perform the actual kernel fusion, i.e., generate a kernel function for the | ||
// fused kernel from the kernel functions of the input kernels. This is done | ||
// by the SYCLKernelFusion LLVM pass, which is run here through a custom LLVM | ||
// pass pipeline. In order to perform internalization, we run the | ||
// SYCLInternalizer pass. | ||
|
||
bool DebugEnabled = ConfigHelper::get<option::JITEnableVerbose>(); | ||
if (DebugEnabled) { | ||
// Enabled debug output from the fusion passes. | ||
jit_compiler::PassDebug = true; | ||
} | ||
|
||
// Initialize the analysis managers with all the registered analyses. | ||
PassBuilder PB; | ||
LoopAnalysisManager LAM; | ||
FunctionAnalysisManager FAM; | ||
CGSCCAnalysisManager CGAM; | ||
ModuleAnalysisManager MAM; | ||
PB.registerModuleAnalyses(MAM); | ||
PB.registerCGSCCAnalyses(CGAM); | ||
PB.registerFunctionAnalyses(FAM); | ||
PB.registerLoopAnalyses(LAM); | ||
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); | ||
|
||
// Make the existing SYCLModuleInfo available to the pass pipeline via the | ||
// corresponding analysis pass. | ||
MAM.registerPass([&]() { | ||
auto ModInfo = std::make_unique<SYCLModuleInfo>(InputInfo); | ||
return SYCLModuleInfoAnalysis{std::move(ModInfo)}; | ||
}); | ||
ModulePassManager MPM; | ||
// Run the fusion pass on the LLVM IR module. | ||
MPM.addPass(SYCLKernelFusion{BarriersFlags}); | ||
{ | ||
FunctionPassManager FPM; | ||
// Run loop unrolling and SROA to split the kernel functor struct into its | ||
// scalar parts, to avoid problems with address-spaces and enable | ||
// internalization. | ||
FPM.addPass(createFunctionToLoopPassAdaptor(IndVarSimplifyPass{})); | ||
LoopUnrollOptions UnrollOptions; | ||
FPM.addPass(LoopUnrollPass{UnrollOptions}); | ||
FPM.addPass(SROAPass{}); | ||
// Run the InferAddressSpace pass to remove as many address-space casts | ||
// to/from generic address-space as possible, because these hinder | ||
// internalization. | ||
// FIXME: TTI should tell the pass which address space to use. | ||
// Ideally, the static compiler should have performed that job. | ||
constexpr unsigned FlatAddressSpace = 4; | ||
FPM.addPass(InferAddressSpacesPass(FlatAddressSpace)); | ||
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); | ||
} | ||
// Run dataflow internalization and runtime constant propagation. | ||
MPM.addPass(SYCLInternalizer{}); | ||
MPM.addPass(SYCLCP{}); | ||
// Run additional optimization passes after completing fusion. | ||
{ | ||
FunctionPassManager FPM; | ||
FPM.addPass(SROAPass{}); | ||
FPM.addPass(SCCPPass{}); | ||
FPM.addPass(InstCombinePass{}); | ||
FPM.addPass(SimplifyCFGPass{}); | ||
FPM.addPass(SROAPass{}); | ||
FPM.addPass(InstCombinePass{}); | ||
FPM.addPass(SimplifyCFGPass{}); | ||
FPM.addPass(ADCEPass{}); | ||
FPM.addPass(EarlyCSEPass{/*UseMemorySSA*/ true}); | ||
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); | ||
} | ||
MPM.run(Mod, MAM); | ||
|
||
if (DebugEnabled) { | ||
// Restore debug option | ||
jit_compiler::PassDebug = false; | ||
} | ||
|
||
assert(!verifyModule(Mod, &errs()) && "Invalid LLVM IR generated"); | ||
|
||
auto NewModInfo = MAM.getResult<SYCLModuleInfoAnalysis>(Mod); | ||
assert(NewModInfo.ModuleInfo && "Failed to retrieve SYCL module info"); | ||
|
||
return std::make_unique<SYCLModuleInfo>(std::move(*NewModInfo.ModuleInfo)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
//==--- FusionPipeline - LLVM pass pipeline definition for kernel fusion ---==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef SYCL_FUSION_JIT_COMPILER_FUSION_FUSIONPIPELINE_H | ||
#define SYCL_FUSION_JIT_COMPILER_FUSION_FUSIONPIPELINE_H | ||
|
||
#include "Kernel.h" | ||
#include "llvm/IR/Module.h" | ||
|
||
namespace jit_compiler { | ||
namespace fusion { | ||
|
||
class FusionPipeline { | ||
public: | ||
/// | ||
/// Run the necessary passes in a custom pass pipeline to perform kernel | ||
/// fusion on the given module. The module should contain the stub functions | ||
/// and fusion metadata. The given SYCLModuleInfo must contain information | ||
/// about all input kernels. The returned SYCLModuleInfo will additionally | ||
/// contain an entry for the fused kernel. | ||
static std::unique_ptr<SYCLModuleInfo> | ||
runFusionPasses(llvm::Module &Mod, SYCLModuleInfo &InputInfo, | ||
int BarriersFlags); | ||
}; | ||
} // namespace fusion | ||
} // namespace jit_compiler | ||
|
||
#endif // SYCL_FUSION_JIT_COMPILER_FUSION_FUSIONPIPELINE_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Module library for usage as library/pass-plugin with LLVM opt. | ||
add_llvm_library(SYCLKernelFusion SHARED | ||
SYCLFusionPasses.cpp | ||
kernel-fusion/SYCLKernelFusion.cpp | ||
kernel-info/SYCLKernelInfo.cpp | ||
internalization/Internalization.cpp | ||
syclcp/SYCLCP.cpp | ||
cleanup/Cleanup.cpp | ||
debug/PassDebug.cpp | ||
) | ||
|
||
target_include_directories(SYCLKernelFusion | ||
PUBLIC | ||
${CMAKE_CURRENT_SOURCE_DIR} | ||
PRIVATE | ||
${SYCL_JIT_BASE_DIR}/common/include | ||
) | ||
|
||
# Static library for linking with the jit_compiler | ||
add_llvm_library(SYCLKernelFusionPasses | ||
SYCLFusionPasses.cpp | ||
kernel-fusion/SYCLKernelFusion.cpp | ||
kernel-info/SYCLKernelInfo.cpp | ||
internalization/Internalization.cpp | ||
syclcp/SYCLCP.cpp | ||
cleanup/Cleanup.cpp | ||
debug/PassDebug.cpp | ||
|
||
LINK_COMPONENTS | ||
Core | ||
Support | ||
TransformUtils | ||
Passes | ||
) | ||
|
||
target_include_directories(SYCLKernelFusionPasses | ||
PUBLIC | ||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> | ||
PRIVATE | ||
${SYCL_JIT_BASE_DIR}/common/include | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
//==------------------------ SYCLFusionPasses.cpp --------------------------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "llvm/Passes/PassBuilder.h" | ||
#include "llvm/Passes/PassPlugin.h" | ||
|
||
#include "internalization/Internalization.h" | ||
#include "kernel-fusion/SYCLKernelFusion.h" | ||
#include "kernel-info/SYCLKernelInfo.h" | ||
#include "syclcp/SYCLCP.h" | ||
|
||
using namespace llvm; | ||
|
||
cl::opt<bool> | ||
NoBarriers("sycl-kernel-fusion-no-barriers", | ||
cl::desc("Disable barrier insertion for SYCL kernel fusion.")); | ||
|
||
llvm::PassPluginLibraryInfo getSYCLKernelFusionPluginInfo() { | ||
return { | ||
LLVM_PLUGIN_API_VERSION, "SYCL-Module-Info", LLVM_VERSION_STRING, | ||
[](PassBuilder &PB) { | ||
PB.registerPipelineParsingCallback( | ||
[](StringRef Name, ModulePassManager &MPM, | ||
ArrayRef<PassBuilder::PipelineElement>) { | ||
if (Name == "sycl-kernel-fusion") { | ||
int BarrierFlag = | ||
(NoBarriers) ? -1 : SYCLKernelFusion::DefaultBarriersFlags; | ||
MPM.addPass(SYCLKernelFusion(BarrierFlag)); | ||
return true; | ||
} | ||
if (Name == "sycl-internalization") { | ||
MPM.addPass(SYCLInternalizer()); | ||
return true; | ||
} | ||
if (Name == "sycl-cp") { | ||
MPM.addPass(SYCLCP()); | ||
return true; | ||
} | ||
if (Name == "print-sycl-module-info") { | ||
MPM.addPass(SYCLModuleInfoPrinter()); | ||
return true; | ||
} | ||
return false; | ||
}); | ||
PB.registerAnalysisRegistrationCallback([](ModuleAnalysisManager &MAM) { | ||
MAM.registerPass([]() { return SYCLModuleInfoAnalysis{}; }); | ||
}); | ||
}}; | ||
} | ||
|
||
extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo | ||
llvmGetPassPluginInfo() { | ||
return getSYCLKernelFusionPluginInfo(); | ||
} |
Oops, something went wrong.