Skip to content

Commit

Permalink
[Core] Use new getDeviceCount OCCA api (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
noelchalmers authored Dec 28, 2020
1 parent d2ad425 commit bf7286b
Showing 1 changed file with 50 additions and 105 deletions.
155 changes: 50 additions & 105 deletions libs/core/platformDeviceConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,126 +27,26 @@ SOFTWARE.
#include "platform.hpp"
// #include "omp.h"

//hack the hook to ask OCCA to return a device count
namespace occa {
#if OCCA_CUDA_ENABLED
namespace cuda {
int getDeviceCount();
}
#endif
#if OCCA_HIP_ENABLED
namespace hip {
int getDeviceCount();
}
#endif
#if OCCA_OPENCL_ENABLED
namespace opencl {
namespace info {
static const int CPU = (1 << 0);
static const int GPU = (1 << 1);
static const int FPGA = (1 << 3);
static const int XeonPhi = (1 << 2);
static const int anyType = (CPU | GPU | FPGA | XeonPhi);

static const int Intel = (1 << 4);
static const int AMD = (1 << 5);
static const int Altera = (1 << 6);
static const int NVIDIA = (1 << 7);
static const int anyVendor = (Intel | AMD | Altera | NVIDIA);

static const int any = (anyType | anyVendor);

std::string deviceType(int type);
std::string vendor(int type);
}

int getDeviceCountInPlatform(int pID, int type = info::any);
}
#endif
}

// OCCA build stuff
void platform_t::DeviceConfig(){

int plat=0;
int device_id=0;

//for testing a single device, run with 1 rank and specify DEVICE NUMBER
if (size==1) {
settings.getSetting("DEVICE NUMBER",device_id);
} else {
//find out how many ranks and devices are on this system
char* hostnames = (char *) ::malloc(size*sizeof(char)*MPI_MAX_PROCESSOR_NAME);
char* hostname = hostnames+rank*MPI_MAX_PROCESSOR_NAME;

int namelen;
MPI_Get_processor_name(hostname,&namelen);

MPI_Allgather(MPI_IN_PLACE , MPI_MAX_PROCESSOR_NAME, MPI_CHAR,
hostnames, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, MPI_COMM_WORLD);

int localRank = 0;
int localSize = 0;
for (int n=0; n<rank; n++){
if (!strcmp(hostname, hostnames+n*MPI_MAX_PROCESSOR_NAME)) localRank++;
}
for (int n=0; n<size; n++){
if (!strcmp(hostname, hostnames+n*MPI_MAX_PROCESSOR_NAME)) localSize++;
}

device_id = localRank;

//check for over-subscribing devices
if(settings.compareSetting("THREAD MODEL", "CUDA")){
#if OCCA_CUDA_ENABLED
int deviceCount = occa::cuda::getDeviceCount();
if (deviceCount>0 && localRank>=deviceCount) {
stringstream ss;
ss << "Rank " << rank << " oversubscribing CUDA device " << device_id%deviceCount << " on node \"" << hostname<< "\"";
LIBP_WARNING(ss.str());
device_id = device_id%deviceCount;
}
#endif
}
else if(settings.compareSetting("THREAD MODEL", "HIP")){
#if OCCA_HIP_ENABLED
int deviceCount = occa::hip::getDeviceCount();
if (deviceCount>0 && localRank>=deviceCount) {
stringstream ss;
ss << "Rank " << rank << " oversubscribing HIP device " << device_id%deviceCount << " on node \"" << hostname<< "\"";
LIBP_WARNING(ss.str());
device_id = device_id%deviceCount;
}
#endif
}
else if(settings.compareSetting("THREAD MODEL", "OpenCL")){
#if OCCA_OPENCL_ENABLED
settings.getSetting("PLATFORM NUMBER", plat);
int deviceCount = occa::opencl::getDeviceCountInPlatform(plat);
if (deviceCount>0 && localRank>=deviceCount) {
stringstream ss;
ss << "Rank " << rank << " oversubscribing OpenCL device " << device_id%deviceCount << " on node \"" << hostname<< "\"";
LIBP_WARNING(ss.str());
device_id = device_id%deviceCount;
}
#endif
}
MPI_Barrier(MPI_COMM_WORLD);
free(hostnames);
}
if(settings.compareSetting("THREAD MODEL", "OpenCL"))
settings.getSetting("PLATFORM NUMBER", plat);

// read thread model/device/platform from settings
std::string mode;

if(settings.compareSetting("THREAD MODEL", "CUDA")){
mode = "mode: 'CUDA', device_id: " + std::to_string(device_id);
mode = "mode: 'CUDA'";
}
else if(settings.compareSetting("THREAD MODEL", "HIP")){
mode = "mode: 'HIP', device_id: " + std::to_string(device_id);
mode = "mode: 'HIP'";
}
else if(settings.compareSetting("THREAD MODEL", "OpenCL")){
mode = "mode: 'OpenCL', platform_id : " + std::to_string(plat)
+ ", device_id: " + std::to_string(device_id);
mode = "mode: 'OpenCL', platform_id : " + std::to_string(plat);
}
else if(settings.compareSetting("THREAD MODEL", "OpenMP")){
mode = "mode: 'OpenMP'";
Expand All @@ -155,6 +55,51 @@ void platform_t::DeviceConfig(){
mode = "mode: 'Serial'";
}

//add a device_id number for some modes
if ( settings.compareSetting("THREAD MODEL", "CUDA")
||settings.compareSetting("THREAD MODEL", "HIP")
||settings.compareSetting("THREAD MODEL", "OpenCL")) {
//for testing a single device, run with 1 rank and specify DEVICE NUMBER
if (size==1) {
settings.getSetting("DEVICE NUMBER",device_id);
} else {
//find out how many ranks and devices are on this system
char* hostnames = (char *) ::malloc(size*sizeof(char)*MPI_MAX_PROCESSOR_NAME);
char* hostname = hostnames+rank*MPI_MAX_PROCESSOR_NAME;

int namelen;
MPI_Get_processor_name(hostname,&namelen);

MPI_Allgather(MPI_IN_PLACE , MPI_MAX_PROCESSOR_NAME, MPI_CHAR,
hostnames, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, MPI_COMM_WORLD);

int localRank = 0;
int localSize = 0;
for (int n=0; n<rank; n++){
if (!strcmp(hostname, hostnames+n*MPI_MAX_PROCESSOR_NAME)) localRank++;
}
for (int n=0; n<size; n++){
if (!strcmp(hostname, hostnames+n*MPI_MAX_PROCESSOR_NAME)) localSize++;
}

device_id = localRank;

//check for over-subscribing devices
int deviceCount = occa::getDeviceCount(mode);
if (deviceCount>0 && localRank>=deviceCount) {
stringstream ss;
ss << "Rank " << rank << " oversubscribing device " << device_id%deviceCount << " on node \"" << hostname<< "\"";
LIBP_WARNING(ss.str());
device_id = device_id%deviceCount;
}
MPI_Barrier(MPI_COMM_WORLD);
free(hostnames);
}

// add device_id to setup string
mode += ", device_id: " + std::to_string(device_id);
}

//set number of omp threads to use
//int Ncores = sysconf(_SC_NPROCESSORS_ONLN);
//int Nthreads = Ncores/localSize;
Expand Down

0 comments on commit bf7286b

Please sign in to comment.