Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ add_clang_library(TargetLowering
TargetInfo.cpp
TargetLoweringInfo.cpp
Targets/AArch64.cpp
Targets/AMDGPU.cpp
Targets/NVPTX.cpp
Targets/SPIR.cpp
Targets/X86.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ createTargetLoweringInfo(LowerModule &LM) {

return createAArch64TargetLoweringInfo(LM, Kind);
}
case llvm::Triple::amdgcn:
return createAMDGPUTargetLoweringInfo(LM);
case llvm::Triple::x86_64: {
switch (Triple.getOS()) {
case llvm::Triple::Win32:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ createSPIRVTargetLoweringInfo(LowerModule &CGM);
std::unique_ptr<TargetLoweringInfo>
createNVPTXTargetLoweringInfo(LowerModule &CGM);

std::unique_ptr<TargetLoweringInfo>
createAMDGPUTargetLoweringInfo(LowerModule &CGM);

} // namespace cir

#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETINFO_H
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
//===- AMDGPU.cpp - TargetInfo for AMDGPU ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "ABIInfoImpl.h"
#include "LowerFunctionInfo.h"
#include "LowerTypes.h"
#include "TargetInfo.h"
#include "TargetLoweringInfo.h"
#include "clang/CIR/ABIArgInfo.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"

using ABIArgInfo = cir::ABIArgInfo;
using MissingFeature = cir::MissingFeatures;

namespace cir {

//===----------------------------------------------------------------------===//
// AMDGPU ABI Implementation
//===----------------------------------------------------------------------===//

namespace {

class AMDGPUABIInfo : public ABIInfo {
public:
AMDGPUABIInfo(LowerTypes &lt) : ABIInfo(lt) {}

private:
void computeInfo(LowerFunctionInfo &fi) const override {
llvm_unreachable("NYI");
}
};

class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
public:
AMDGPUTargetLoweringInfo(LowerTypes &lt)
: TargetLoweringInfo(std::make_unique<AMDGPUABIInfo>(lt)) {}
// Taken from here: https://llvm.org/docs/AMDGPUUsage.html#address-spaces
unsigned getTargetAddrSpaceFromCIRAddrSpace(
cir::AddressSpace addrSpace) const override {
switch (addrSpace) {
case cir::AddressSpace::OffloadPrivate:
return 5;
case cir::AddressSpace::OffloadLocal:
return 3;
case cir::AddressSpace::OffloadGlobal:
return 1;
case cir::AddressSpace::OffloadConstant:
return 4;
case cir::AddressSpace::OffloadGeneric:
return 0;
default:
cir_cconv_unreachable("Unknown CIR address space for this target");
}
}
};

} // namespace
std::unique_ptr<TargetLoweringInfo>
createAMDGPUTargetLoweringInfo(LowerModule &lowerModule) {
return std::make_unique<AMDGPUTargetLoweringInfo>(lowerModule.getTypes());
}

} // namespace cir
33 changes: 22 additions & 11 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1749,19 +1749,31 @@ mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite(
convertTypeForMemory(*getTypeConverter(), dataLayout, op.getAllocaType());
auto resultTy = getTypeConverter()->convertType(op.getType());
// Verification between the CIR alloca AS and the one from data layout.
{
auto allocaAS = [&]() {
auto resPtrTy = mlir::cast<mlir::LLVM::LLVMPointerType>(resultTy);
auto dlAllocaASAttr = mlir::cast_if_present<mlir::IntegerAttr>(
dataLayout.getAllocaMemorySpace());
// Absence means 0
// TODO: The query for the alloca AS should be done through CIRDataLayout
// instead to reuse the logic of interpret null attr as 0.
auto dlAllocaAS = dlAllocaASAttr ? dlAllocaASAttr.getInt() : 0;
if (dlAllocaAS != resPtrTy.getAddressSpace()) {
return op.emitError() << "alloca address space doesn't match the one "
"from the target data layout: "
<< dlAllocaAS;
}
if (!dlAllocaASAttr)
return 0u;
return static_cast<unsigned>(dlAllocaASAttr.getValue().getZExtValue());
}();

auto resPtrTy =
mlir::LLVM::LLVMPointerType::get(elementTy.getContext(), allocaAS);

auto llvmAlloca = rewriter.create<mlir::LLVM::AllocaOp>(
op.getLoc(), resPtrTy, elementTy, size, op.getAlignmentAttr().getInt());

auto expectedPtrTy = mlir::cast<mlir::LLVM::LLVMPointerType>(
getTypeConverter()->convertType(op.getResult().getType()));

mlir::Value finalPtr = llvmAlloca.getResult();

if (expectedPtrTy.getAddressSpace() != allocaAS) {
finalPtr = rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
op.getLoc(), expectedPtrTy, finalPtr);
}

// If there are annotations available, copy them out before we destroy the
Expand All @@ -1770,11 +1782,10 @@ mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite(
if (op.getAnnotations())
annotations = op.getAnnotationsAttr();

auto llvmAlloca = rewriter.replaceOpWithNewOp<mlir::LLVM::AllocaOp>(
op, resultTy, elementTy, size, op.getAlignmentAttr().getInt());

if (annotations && !annotations.empty())
buildAllocaAnnotations(llvmAlloca, adaptor, rewriter, annotations);
rewriter.replaceOp(op, finalPtr);

return mlir::success();
}

Expand Down
19 changes: 19 additions & 0 deletions clang/test/CIR/CodeGen/HIP/address-spaces.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "cuda.h"

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.ll
// RUN: FileCheck --check-prefix=CIR --input-file=%t.ll %s

__global__ void fn() {
int i = 0;
__shared__ int j;
j = i;
}

// CIR: cir.global "private" internal dso_local addrspace(offload_local) @_ZZ2fnvE1j : !s32i
// CIR: cir.func dso_local @_Z2fnv
// CIR: [[Local:%[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
// CIR: [[Shared:%[0-9]+]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i, addrspace(offload_local)>
// CIR: [[Tmp:%[0-9]+]] = cir.load {{.*}} [[Local]] : !cir.ptr<!s32i>, !s32i
// CIR: cir.store{{.*}} [[Tmp]], [[Shared]] : !s32i, !cir.ptr<!s32i, addrspace(offload_local)>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be testing LLVM and OGCG here as well? Or is such lowering not yet added? Anyways it's something that can be added in a follow up PR.

25 changes: 25 additions & 0 deletions clang/test/CIR/CodeGen/HIP/addrspace-lowering.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include "cuda.h"

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s


__shared__ int a;
// LLVM-DEVICE: @a = addrspace(3) global i32 undef, align 4
// OGCG-DEVICE: @a = addrspace(3) global i32 undef, align 4

__device__ int b;
// LLVM-DEVICE: @b = addrspace(1) externally_initialized global i32 0, align 4
// OGCG-DEVICE: @b = addrspace(1) externally_initialized global i32 0, align 4

__constant__ int c;
// LLVM-DEVICE: @c = addrspace(4) externally_initialized constant i32 0, align 4
// OGCG-DEVICE: @c = addrspace(4) externally_initialized constant i32 0, align 4

47 changes: 43 additions & 4 deletions clang/test/CIR/CodeGen/HIP/simple.cpp
Original file line number Diff line number Diff line change
@@ -1,20 +1,36 @@
#include "../Inputs/cuda.h"
#include "cuda.h"

// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
// RUN: -x hip -fhip-new-launch-api \
// RUN: -emit-cir %s -o %t.cir
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -emit-cir %s -o %t.cir
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
//
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
// RUN: -x hip -emit-llvm -fhip-new-launch-api \
// RUN: %s -o %t.ll
// RUN: -I%S/../Inputs/ %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s

// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
// RUN: -x hip -emit-llvm -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s


// Attribute for global_fn
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}

Expand All @@ -29,6 +45,8 @@ __device__ void device_fn(int* a, double b, float c) {}

__global__ void global_fn(int a) {}
// CIR-DEVICE: @_Z9global_fni
// LLVM-DEVICE: define dso_local void @_Z9global_fni
// OGCG-DEVICE: define dso_local amdgpu_kernel void @_Z9global_fni

// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])
// CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args"
Expand All @@ -43,6 +61,12 @@ __global__ void global_fn(int a) {}
// LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr], ptr %[[#KernelArgs]], i32 0, i64 0
// LLVM-HOST: call i32 @__hipPopCallConfiguration
// LLVM-HOST: call i32 @hipLaunchKernel(ptr @_Z9global_fni
//
// OGCG-HOST: define dso_local void @_Z24__device_stub__global_fni
// OGCG-HOST: %kernel_args = alloca ptr, i64 1, align 16
// OGCG-HOST: getelementptr ptr, ptr %kernel_args, i32 0
// OGCG-HOST: call i32 @__hipPopCallConfiguration
// OGCG-HOST: %call = call noundef i32 @hipLaunchKernel(ptr noundef @_Z9global_fni


int main() {
Expand Down Expand Up @@ -78,3 +102,18 @@ int main() {
// LLVM-HOST: %[[#]] = load i32
// LLVM-HOST: ret i32

// OGCG-HOST: define dso_local noundef i32 @main
// OGCG-HOST: %agg.tmp = alloca %struct.dim3, align 4
// OGCG-HOST: %agg.tmp1 = alloca %struct.dim3, align 4
// OGCG-HOST: call void @_ZN4dim3C1Ejjj
// OGCG-HOST: call void @_ZN4dim3C1Ejjj
// OGCG-HOST: %call = call i32 @__hipPushCallConfiguration
// OGCG-HOST: %tobool = icmp ne i32 %call, 0
// OGCG-HOST: br i1 %tobool, label %kcall.end, label %kcall.configok
// OGCG-HOST: kcall.configok:
// OGCG-HOST: call void @_Z24__device_stub__global_fni(i32 noundef 1)
// OGCG-HOST: br label %kcall.end
// OGCG-HOST: kcall.end:
// OGCG-HOST: %{{[0-9]+}} = load i32, ptr %retval, align 4
// OGCG-HOST: ret i32 %8

Loading