Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
Expand Down Expand Up @@ -959,6 +960,10 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
PerModulePasses.add(createSPIRITTAnnotationsPass());
}

// This pass should be always called for SYCL device code.
if (LangOpts.SYCLIsDevice)
PerModulePasses.add(createSYCLLowerWGLocalMemoryPass());

switch (Action) {
case Backend_EmitNothing:
break;
Expand Down
25 changes: 25 additions & 0 deletions clang/test/CodeGenSYCL/Inputs/sycl.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
#pragma once

typedef __UINT8_TYPE__ uint8_t;
typedef __SIZE_TYPE__ size_t;

#define ATTR_SYCL_KERNEL __attribute__((sycl_kernel))

#ifndef __SYCL_ALWAYS_INLINE
#if __has_attribute(always_inline)
#define __SYCL_ALWAYS_INLINE __attribute__((always_inline))
#else
#define __SYCL_ALWAYS_INLINE
#endif
#endif // __SYCL_ALWAYS_INLINE

// Dummy runtime classes to model SYCL API.
namespace cl {
namespace sycl {
Expand Down Expand Up @@ -494,5 +505,19 @@ class image {
}
};

extern "C" SYCL_EXTERNAL __attribute__((opencl_local)) uint8_t *
__sycl_allocateLocalMemory(size_t Size, size_t Alignment);

template <typename T>
__attribute__((opencl_local)) T *
__SYCL_ALWAYS_INLINE
group_local_memory() {
#ifdef __SYCL_DEVICE_ONLY__
__attribute__((opencl_local)) uint8_t *AllocatedMem =
__sycl_allocateLocalMemory(sizeof(T), alignof(T));
return (__attribute__((opencl_local)) T *)AllocatedMem;
#endif
}

} // namespace sycl
} // namespace cl
39 changes: 39 additions & 0 deletions clang/test/CodeGenSYCL/group-local-memory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -S -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -S -emit-llvm %s -o - | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think llvm/test/SYCLLowerIR/group_local_memory.ll should be enough to validate the pass. Just make sure it covers both optimized and unoptimized IR.
Testing a single pass by running the whole compilation stack seems like an overkill to me.
Are there any other reasons to have this test in addition to llvm/test/SYCLLowerIR/group_local_memory.ll?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main perpose of current check is to test that pass is executed during device code compilation in clang in both cases when llvm passes are turned on and turned off.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is much chipper way to achieve this with -mllvm -debug-pass=Structure - see https://github.com/intel/llvm/blob/sycl/clang/test/CodeGenCUDA/link-device-bitcode.cu#L37-L41 as an example.
Test program might be an empty file, if I'm not mistaken.


// CHECK: [[WGLOCALMEM_1:@WGLocalMem.*]] = internal addrspace(3) global [8 x i8] undef, align 8
// CHECK: [[WGLOCALMEM_2:@WGLocalMem.*]] = internal addrspace(3) global [4 x i8] undef, align 4
// CHECK: [[WGLOCALMEM_3:@WGLocalMem.*]] = internal addrspace(3) global [128 x i8] undef, align 4

#include "Inputs/sycl.hpp"

constexpr size_t WgSize = 32;
constexpr size_t WgCount = 4;
constexpr size_t Size = WgSize * WgCount;

class KernelA;
class KernelB;

using namespace cl::sycl;

int main() {
queue Q;
{
Q.submit([&](handler &cgh) {
cgh.parallel_for<KernelA>(
range<1>(Size), [=](item<1> Item) {
auto *Ptr1 = group_local_memory<long>();
auto *Ptr2 = group_local_memory<float>();
});
});
}

{
Q.submit([&](handler &cgh) {
cgh.parallel_for<KernelB>(
range<1>(Size), [=](item<1> Item) {
auto *Ptr3 = group_local_memory<int[WgSize]>();
});
});
}
}
1 change: 1 addition & 0 deletions llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ void initializeSYCLLowerESIMDLegacyPassPass(PassRegistry &);
void initializeSPIRITTAnnotationsLegacyPassPass(PassRegistry &);
void initializeESIMDLowerLoadStorePass(PassRegistry &);
void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &);
void initializeSYCLLowerWGLocalMemoryLegacyPass(PassRegistry &);
void initializeTailCallElimPass(PassRegistry&);
void initializeTailDuplicatePass(PassRegistry&);
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/LinkAllPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/SYCLLowerIR/LowerESIMD.h"
#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
#include "llvm/SYCLLowerIR/LowerWGScope.h"
#include "llvm/Support/Valgrind.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
Expand Down Expand Up @@ -206,6 +207,7 @@ namespace {
(void)llvm::createESIMDLowerLoadStorePass();
(void)llvm::createESIMDLowerVecArgPass();
(void)llvm::createSPIRITTAnnotationsPass();
(void)llvm::createSYCLLowerWGLocalMemoryPass();
std::string buf;
llvm::raw_string_ostream os(buf);
(void) llvm::createPrintModulePass(os);
Expand Down
33 changes: 33 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/LowerWGLocalMemory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//===-- LowerWGLocalMemory.h - SYCL kernel local memory allocation pass ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Replaces calls to __sycl_allocateLocalMemory(Size, Alignment) function with
// allocation of memory in local address space at the kernel scope.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SYCLLOWERIR_LOWERWGLOCALMEMORY_H
#define LLVM_SYCLLOWERIR_LOWERWGLOCALMEMORY_H

#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"

namespace llvm {

class SYCLLowerWGLocalMemoryPass
: public PassInfoMixin<SYCLLowerWGLocalMemoryPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
};

ModulePass *createSYCLLowerWGLocalMemoryPass();
void initializeSYCLLowerWGLocalMemoryLegacyPass(PassRegistry &);

} // namespace llvm

#endif // LLVM_SYCLLOWERIR_LOWERWGLOCALMEMORY_H
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ add_llvm_component_library(LLVMCodeGen
ProfileData
Scalar
Support
SYCLLowerIR
Target
TransformUtils
)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeStripDebugMachineModulePass(Registry);
initializeSYCLLowerWGLocalMemoryLegacyPass(Registry);
initializeTailDuplicatePass(Registry);
initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
LowerESIMD.cpp
LowerESIMDVLoadVStore.cpp
LowerESIMDVecArg.cpp
LowerWGLocalMemory.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Expand Down
124 changes: 124 additions & 0 deletions llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
//===-- LowerWGLocalMemory.cpp - SYCL kernel local memory allocation pass -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass replaces calls to __sycl_allocateLocalMemory(Size, Alignment)
// function with allocation of memory in local address space at the kernel
// scope.
//
//===----------------------------------------------------------------------===//

#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"

using namespace llvm;

#define DEBUG_TYPE "LowerWGLocalMemory"

static constexpr char SYCL_ALLOCLOCALMEM_CALL[] = "__sycl_allocateLocalMemory";
static constexpr char LOCALMEMORY_GV_PREF[] = "WGLocalMem";

namespace {
class SYCLLowerWGLocalMemoryLegacy : public ModulePass {
public:
static char ID;

SYCLLowerWGLocalMemoryLegacy() : ModulePass(ID) {
initializeSYCLLowerWGLocalMemoryLegacyPass(
*PassRegistry::getPassRegistry());
}

bool runOnModule(Module &M) override {
ModuleAnalysisManager DummyMAM;
auto PA = Impl.run(M, DummyMAM);
return !PA.areAllPreserved();
}

private:
SYCLLowerWGLocalMemoryPass Impl;
};
} // namespace

char SYCLLowerWGLocalMemoryLegacy::ID = 0;
INITIALIZE_PASS(SYCLLowerWGLocalMemoryLegacy, "sycllowerwglocalmemory",
"Replace __sycl_allocateLocalMemory with allocation of memory "
"in local address space",
false, false)

ModulePass *llvm::createSYCLLowerWGLocalMemoryPass() {
return new SYCLLowerWGLocalMemoryLegacy();
}

static bool lowerAllocaLocalMem(Module &M) {
SmallVector<CallInst *, 8> ToReplace;
for (Function &F : M) {
CallingConv::ID CC = F.getCallingConv();

for (auto &I : instructions(F)) {
auto *CI = dyn_cast<CallInst>(&I);
Function *Callee = nullptr;
if (!CI || !(Callee = CI->getCalledFunction()))
continue;
StringRef Name = Callee->getName();
if (Name != SYCL_ALLOCLOCALMEM_CALL)
continue;

// TODO: Static local memory allocation should be requested only in
// spir kernel scope.
assert((CC == llvm::CallingConv::SPIR_FUNC ||
CC == llvm::CallingConv::SPIR_KERNEL) &&
"WG static local memery can be allocated only in kernel scope");

ToReplace.push_back(CI);
}
}

if (ToReplace.empty())
return false;

for (auto *CI : ToReplace) {
Value *ArgSize = CI->getArgOperand(0);
uint64_t Size = cast<llvm::ConstantInt>(ArgSize)->getZExtValue();
Value *ArgAlign = CI->getArgOperand(1);
uint64_t Alignment = cast<llvm::ConstantInt>(ArgAlign)->getZExtValue();

IRBuilder<> Builder(CI);
Type *LocalMemArrayTy = ArrayType::get(Builder.getInt8Ty(), Size);
unsigned LocalAS =
CI->getFunctionType()->getReturnType()->getPointerAddressSpace();
auto *LocalMemArrayGV =
new GlobalVariable(M, // module
LocalMemArrayTy, // type
false, // isConstant
GlobalValue::InternalLinkage, // Linkage
UndefValue::get(LocalMemArrayTy), // Initializer
LOCALMEMORY_GV_PREF, // Name prefix
nullptr, // InsertBefore
GlobalVariable::NotThreadLocal, // ThreadLocalMode
LocalAS // AddressSpace
);
LocalMemArrayGV->setAlignment(Align(Alignment));

Value *LocalMemArrayGVPtr = Builder.CreatePointerCast(
LocalMemArrayGV,
Builder.getInt8PtrTy(LocalMemArrayGV->getAddressSpace()));
CI->replaceAllUsesWith(LocalMemArrayGVPtr);
CI->eraseFromParent();
}
return true;
}

PreservedAnalyses SYCLLowerWGLocalMemoryPass::run(Module &M,
ModuleAnalysisManager &) {
if (lowerAllocaLocalMem(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
Loading