Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HIP] Adds support for hip-nvcc (Thanks @noelchalmers!) #266

Merged
merged 3 commits into from
Dec 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config.default.json
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@
compiler_flags: "-O3",
compiler_env_script: "",

// Flags passed to hipcc and not the underlying compiler
hipcc_compiler_flags: "",

// Auto-detected
arch: {
major: "<HIP-major-version>",
Expand Down
6 changes: 4 additions & 2 deletions include/occa/modes/hip/polyfill.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace occa {
//---[ Types ]------------------------
typedef struct _hipCtx_t* hipCtx_t;
typedef int hipDevice_t;
typedef char* hipDeviceptr_t;
typedef void* hipDeviceptr_t;
typedef struct _hipEvent_t* hipEvent_t;
typedef struct _hipFunction_t* hipFunction_t;
typedef struct _hipFunctionAttribute_t* hipFunctionAttribute_t;
Expand All @@ -32,6 +32,8 @@ namespace occa {
size_t totalGlobalMem;
int maxThreadsPerBlock;
int gcnArch;
int major;
int minor;
};

enum hipError_t {
Expand Down Expand Up @@ -200,7 +202,7 @@ namespace occa {
return OCCA_HIP_IS_NOT_ENABLED;
}

inline hipError_t hipMemPrefetchAsync(hipDeviceptr_t *devPtr, size_t count, hipDevice_t dstDevice, hipStream_t hStream) {
inline hipError_t hipMemPrefetchAsync(hipDeviceptr_t *dptr, size_t count, hipDevice_t dstDevice, hipStream_t hStream) {
return OCCA_HIP_IS_NOT_ENABLED;
}

Expand Down
6 changes: 6 additions & 0 deletions include/occa/modes/hip/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ namespace occa {

std::string getVersion();

std::string getDeviceArch(
const int deviceId,
const int majorVersion = -1,
const int minorVersion = -1
);

void enablePeerToPeer(hipCtx_t context);
void checkPeerToPeer(hipDevice_t destDevice,
hipDevice_t srcDevice);
Expand Down
58 changes: 37 additions & 21 deletions scripts/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,16 @@ hipEnabled = 0
openclEnabled = 0
metalEnabled = 0


#---[ MPI ]-----------------------------
ifdef OCCA_MPI_ENABLED
mpiEnabled = $(OCCA_MPI_ENABLED)
else
mpiEnabled = $(call compilerSupportsMPI)
endif


#---[ OpenMP ]--------------------------
ifdef OCCA_OPENMP_ENABLED
openmpEnabled = $(OCCA_OPENMP_ENABLED)
fOpenmpEnabled = $(OCCA_OPENMP_ENABLED)
Expand All @@ -235,6 +239,8 @@ else
endif
endif


#---[ CUDA ]----------------------------
ifdef OCCA_CUDA_ENABLED
cudaEnabled = $(OCCA_CUDA_ENABLED)

Expand Down Expand Up @@ -264,37 +270,45 @@ else
endif
endif

ifdef OCCA_HIP_ENABLED
hipEnabled = $(OCCA_HIP_ENABLED)

ifeq ($(hipEnabled),1)
ifeq ($(usingLinux),1)
linkerFlags += -lhip_hcc
#---[ HIP ]-----------------------------
hipEnabled = 0
ifneq ($(OCCA_HIP_ENABLED),0)
ifeq ($(usingLinux),1)
hipIncFlags = $(call includeFlagsFor,hip/hip_runtime_api.h)

ifneq (,$(hipIncFlags))
hipEnabled := 1
endif
endif
else
hipIncFlags = $(call includeFlagsFor,hip/hip_runtime_api.h)
endif

ifneq (,$(hipIncFlags))
ifndef HIP_PATH
HIP_PATH = ${hipIncFlags:-I%=%}/../..
endif
ifeq ($(hipEnabled),1)
hipIncFlags = $(call includeFlagsFor,hip/hip_runtime_api.h)

hipconfig = $(shell $(HIP_PATH)/bin/hipconfig --cpp_config)
ifndef HIP_PATH
HIP_PATH = ${hipIncFlags:-I%=%}/../..
endif

ifeq ($(usingLinux),1)
hipLibFlags = $(call libraryFlagsFor,hip_hcc)
endif
hipPlatform = $(shell $(HIP_PATH)/bin/hipconfig --platform)
hipConfig = $(shell $(HIP_PATH)/bin/hipconfig --cpp_config)

ifneq (,$(hipLibFlags))
hipEnabled = 1
paths += $(hipconfig)
paths += $(hipIncFlags)
linkerFlags += $(hipLibFlags)
endif
ifeq ($(hipPlatform),nvcc)
linkerFlags += -lcuda -lcudart
hipLibFlags = $(call libraryFlagsFor,cuda)
hipLibFlags += $(call libraryFlagsFor,cudart)
else ifeq ($(hipPlatform),hcc)
linkerFlags += -lhip_hcc
hipLibFlags = $(call libraryFlagsFor,hip_hcc)
endif

paths += $(hipConfig)
paths += $(hipIncFlags)
linkerFlags += $(hipLibFlags)
endif


#---[ OpenCL ]--------------------------
ifdef OCCA_OPENCL_ENABLED
openclEnabled = $(OCCA_OPENCL_ENABLED)

Expand Down Expand Up @@ -328,6 +342,8 @@ else
endif
endif


#---[ Metal ]---------------------------
# Metal is only supported with
# - MacOS
# - clang++ compiler
Expand Down
73 changes: 44 additions & 29 deletions src/modes/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace occa {
device::device(const occa::properties &properties_) :
occa::launchedModeDevice_t(properties_) {

hipDeviceProp_t props;
hipDeviceProp_t hipProps;
if (!properties.has("wrapped")) {
OCCA_ERROR("[HIP] device not given a [device_id] integer",
properties.has("device_id") &&
Expand All @@ -35,7 +35,7 @@ namespace occa {
hipSetDevice(deviceID));

OCCA_HIP_ERROR("Getting device properties",
hipGetDeviceProperties(&props, deviceID));
hipGetDeviceProperties(&hipProps, deviceID));
}

p2pEnabled = false;
Expand All @@ -57,18 +57,23 @@ namespace occa {
compilerFlags = "-O3";
}

kernelProps["compiler"] = compiler;
kernelProps["compilerFlags"] = compilerFlags;
kernelProps["compiler"] = compiler;
kernelProps["compiler_flags"] = compilerFlags;

OCCA_HIP_ERROR("Device: Getting HIP Device Arch",
hipDeviceComputeCapability(&archMajorVersion,
&archMinorVersion,
hipDevice) );
archMajorVersion = kernelProps.get<int>("arch/major", hipProps.major);
archMinorVersion = kernelProps.get<int>("arch/minor", hipProps.minor);

archMajorVersion = kernelProps.get("arch/major", archMajorVersion);
archMinorVersion = kernelProps.get("arch/minor", archMinorVersion);
std::string arch = getDeviceArch(deviceID, archMajorVersion, archMinorVersion);
std::string archFlag;
if (startsWith(arch, "sm_")) {
archFlag = "-arch=" + arch;
} else if (startsWith(arch, "gfx")) {
archFlag = "-t " + arch;
} else {
OCCA_FORCE_ERROR("Unknown HIP arch");
}

kernelProps["target"] = toString(props.gcnArch);
kernelProps["compiler_flag_arch"] = archFlag;
}

device::~device() { }
Expand Down Expand Up @@ -220,13 +225,14 @@ namespace occa {
}

void device::setArchCompilerFlags(occa::properties &kernelProps) {
if (kernelProps.get<std::string>("compiler_flags").find("-t gfx") == std::string::npos) {
std::stringstream ss;
std::string arch = kernelProps["target"];
if (arch.size()) {
ss << " -t gfx" << arch << ' ';
kernelProps["compiler_flags"] += ss.str();
}
const std::string hipccCompilerFlags = (
kernelProps.get<std::string>("hipcc_compiler_flags")
);

if (hipccCompilerFlags.find("-t gfx") == std::string::npos
&& hipccCompilerFlags.find("-arch=sm") == std::string::npos) {
kernelProps["hipcc_compiler_flags"] += " ";
kernelProps["hipcc_compiler_flags"] += kernelProps["compiler_flag_arch"];
}
}

Expand All @@ -243,22 +249,31 @@ namespace occa {

setArchCompilerFlags(allProps);

const std::string compilerFlags = allProps["compiler_flags"];
const std::string hipccCompilerFlags = allProps["hipcc_compiler_flags"];

std::stringstream command;
if (allProps.has("compiler_env_script")) {
command << allProps["compiler_env_script"] << " && ";
}

//---[ Compiling Command ]--------
command << allProps["compiler"]
<< " --genco "
<< ' ' << allProps["compiler_flags"]
// #if (OCCA_OS == OCCA_WINDOWS_OS)
// << " -D OCCA_OS=OCCA_WINDOWS_OS -D _MSC_VER=1800"
// #endif
// << " -I" << env::OCCA_DIR << "include"
// << " -L" << env::OCCA_DIR << "lib -locca"
<< " " << sourceFilename
<< " -o " << binaryFilename;
<< " --genco";

if (compilerFlags.size()) {
#ifdef __HIP_PLATFORM_NVCC__
command << ' ' << compilerFlags;
#else
command << " -f=\\\"" << compilerFlags << "\\\"";
#endif
}
if (hipccCompilerFlags.size()) {
command << ' ' << hipccCompilerFlags;
}

command << ' ' << sourceFilename
<< " -o " << binaryFilename;

if (!verbose) {
command << " > /dev/null 2>&1";
Expand Down Expand Up @@ -381,7 +396,7 @@ namespace occa {
hipSetDevice(deviceID));

OCCA_HIP_ERROR("Device: malloc",
hipMalloc(&(mem.hipPtr), bytes));
hipMalloc((void**) &(mem.hipPtr), bytes));

if (src != NULL) {
mem.copyFrom(src, bytes, 0);
Expand All @@ -400,7 +415,7 @@ namespace occa {
OCCA_HIP_ERROR("Device: malloc host",
hipHostMalloc((void**) &(mem.mappedPtr), bytes));
OCCA_HIP_ERROR("Device: get device pointer from host",
hipHostGetDevicePointer(&(mem.hipPtr),
hipHostGetDevicePointer((void**) &(mem.hipPtr),
mem.mappedPtr,
0));

Expand Down
28 changes: 18 additions & 10 deletions src/modes/hip/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@

namespace occa {
namespace hip {
inline hipDeviceptr_t addHipPtrOffset(hipDeviceptr_t hipPtr, const udim_t offset) {
return (hipDeviceptr_t) (((char*) hipPtr) + offset);
}

memory::memory(modeDevice_t *modeDevice_,
udim_t size_,
const occa::properties &properties_) :
occa::modeMemory_t(modeDevice_, size_, properties_),
#ifdef __HIP_PLATFORM_HCC__
hipPtr(ptr),
#else
hipPtr((hipDeviceptr_t&) ptr),
#endif
mappedPtr(NULL) {}

memory::~memory() {
Expand All @@ -24,7 +32,7 @@ namespace occa {
hipHostFree(mappedPtr));
mappedPtr = NULL;
} else if (hipPtr) {
hipFree(hipPtr);
hipFree((void*) hipPtr);
hipPtr = 0;
}
size = 0;
Expand All @@ -49,7 +57,7 @@ namespace occa {
memory *m = new memory(modeDevice,
size - offset,
properties);
m->hipPtr = (char*) hipPtr + offset;
m->hipPtr = addHipPtrOffset(hipPtr, offset);
if (mappedPtr) {
m->mappedPtr = mappedPtr + offset;
}
Expand All @@ -71,12 +79,12 @@ namespace occa {

if (!async) {
OCCA_HIP_ERROR("Memory: Copy From",
hipMemcpyHtoD((char*) hipPtr + offset,
hipMemcpyHtoD(addHipPtrOffset(hipPtr, offset),
const_cast<void*>(src),
bytes) );
} else {
OCCA_HIP_ERROR("Memory: Async Copy From",
hipMemcpyHtoDAsync((char*) hipPtr + offset,
hipMemcpyHtoDAsync(addHipPtrOffset(hipPtr, offset),
const_cast<void*>(src),
bytes,
getHipStream()) );
Expand All @@ -92,13 +100,13 @@ namespace occa {

if (!async) {
OCCA_HIP_ERROR("Memory: Copy From",
hipMemcpyDtoD((char*) hipPtr + destOffset,
(char*) ((memory*) src)->hipPtr + srcOffset,
hipMemcpyDtoD(addHipPtrOffset(hipPtr, destOffset),
addHipPtrOffset(((memory*) src)->hipPtr, srcOffset),
bytes) );
} else {
OCCA_HIP_ERROR("Memory: Async Copy From",
hipMemcpyDtoDAsync((char*) hipPtr + destOffset,
(char*) ((memory*) src)->hipPtr + srcOffset,
hipMemcpyDtoDAsync(addHipPtrOffset(hipPtr, destOffset),
addHipPtrOffset(((memory*) src)->hipPtr, srcOffset),
bytes,
getHipStream()) );
}
Expand All @@ -113,12 +121,12 @@ namespace occa {
if (!async) {
OCCA_HIP_ERROR("Memory: Copy From",
hipMemcpyDtoH(dest,
(char*) hipPtr + offset,
addHipPtrOffset(hipPtr, offset),
bytes) );
} else {
OCCA_HIP_ERROR("Memory: Async Copy From",
hipMemcpyDtoHAsync(dest,
(char*) hipPtr + offset,
addHipPtrOffset(hipPtr, offset),
bytes,
getHipStream()) );
}
Expand Down
10 changes: 5 additions & 5 deletions src/modes/hip/registration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ namespace occa {
const std::string bytesStr = stringifyBytes(bytes);

section
.add("Device Name", deviceName)
.add("Device ID" , toString(deviceId))
.add("Arch" , "gfx" + toString(props.gcnArch))
.add("Memory" , bytesStr)
.addDivider();
.add("Device Name", deviceName)
.add("Device ID" , toString(deviceId))
.add("Arch" , getDeviceArch(deviceId))
.add("Memory" , bytesStr)
.addDivider();
}
// Remove last divider
section.groups.pop_back();
Expand Down
Loading