From bf7286b202c6b2bf97ae743ed0bc964a8618ae66 Mon Sep 17 00:00:00 2001 From: Noel Chalmers Date: Mon, 28 Dec 2020 14:52:23 -0600 Subject: [PATCH] [Core] Use new getDeviceCount OCCA api (#58) --- libs/core/platformDeviceConfig.cpp | 155 ++++++++++------------------- 1 file changed, 50 insertions(+), 105 deletions(-) diff --git a/libs/core/platformDeviceConfig.cpp b/libs/core/platformDeviceConfig.cpp index 66c4d098c..ead1378b5 100644 --- a/libs/core/platformDeviceConfig.cpp +++ b/libs/core/platformDeviceConfig.cpp @@ -27,126 +27,26 @@ SOFTWARE. #include "platform.hpp" // #include "omp.h" -//hack the hook to ask OCCA to return a device count -namespace occa { -#if OCCA_CUDA_ENABLED - namespace cuda { - int getDeviceCount(); - } -#endif -#if OCCA_HIP_ENABLED - namespace hip { - int getDeviceCount(); - } -#endif -#if OCCA_OPENCL_ENABLED - namespace opencl { - namespace info { - static const int CPU = (1 << 0); - static const int GPU = (1 << 1); - static const int FPGA = (1 << 3); - static const int XeonPhi = (1 << 2); - static const int anyType = (CPU | GPU | FPGA | XeonPhi); - - static const int Intel = (1 << 4); - static const int AMD = (1 << 5); - static const int Altera = (1 << 6); - static const int NVIDIA = (1 << 7); - static const int anyVendor = (Intel | AMD | Altera | NVIDIA); - - static const int any = (anyType | anyVendor); - - std::string deviceType(int type); - std::string vendor(int type); - } - - int getDeviceCountInPlatform(int pID, int type = info::any); - } -#endif -} - // OCCA build stuff void platform_t::DeviceConfig(){ int plat=0; int device_id=0; - //for testing a single device, run with 1 rank and specify DEVICE NUMBER - if (size==1) { - settings.getSetting("DEVICE NUMBER",device_id); - } else { - //find out how many ranks and devices are on this system - char* hostnames = (char *) ::malloc(size*sizeof(char)*MPI_MAX_PROCESSOR_NAME); - char* hostname = hostnames+rank*MPI_MAX_PROCESSOR_NAME; - - int namelen; - MPI_Get_processor_name(hostname,&namelen); - - MPI_Allgather(MPI_IN_PLACE , MPI_MAX_PROCESSOR_NAME, MPI_CHAR, - hostnames, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, MPI_COMM_WORLD); - - int localRank = 0; - int localSize = 0; - for (int n=0; n0 && localRank>=deviceCount) { - stringstream ss; - ss << "Rank " << rank << " oversubscribing CUDA device " << device_id%deviceCount << " on node \"" << hostname<< "\""; - LIBP_WARNING(ss.str()); - device_id = device_id%deviceCount; - } -#endif - } - else if(settings.compareSetting("THREAD MODEL", "HIP")){ -#if OCCA_HIP_ENABLED - int deviceCount = occa::hip::getDeviceCount(); - if (deviceCount>0 && localRank>=deviceCount) { - stringstream ss; - ss << "Rank " << rank << " oversubscribing HIP device " << device_id%deviceCount << " on node \"" << hostname<< "\""; - LIBP_WARNING(ss.str()); - device_id = device_id%deviceCount; - } -#endif - } - else if(settings.compareSetting("THREAD MODEL", "OpenCL")){ -#if OCCA_OPENCL_ENABLED - settings.getSetting("PLATFORM NUMBER", plat); - int deviceCount = occa::opencl::getDeviceCountInPlatform(plat); - if (deviceCount>0 && localRank>=deviceCount) { - stringstream ss; - ss << "Rank " << rank << " oversubscribing OpenCL device " << device_id%deviceCount << " on node \"" << hostname<< "\""; - LIBP_WARNING(ss.str()); - device_id = device_id%deviceCount; - } -#endif - } - MPI_Barrier(MPI_COMM_WORLD); - free(hostnames); - } + if(settings.compareSetting("THREAD MODEL", "OpenCL")) + settings.getSetting("PLATFORM NUMBER", plat); // read thread model/device/platform from settings std::string mode; if(settings.compareSetting("THREAD MODEL", "CUDA")){ - mode = "mode: 'CUDA', device_id: " + std::to_string(device_id); + mode = "mode: 'CUDA'"; } else if(settings.compareSetting("THREAD MODEL", "HIP")){ - mode = "mode: 'HIP', device_id: " + std::to_string(device_id); + mode = "mode: 'HIP'"; } else if(settings.compareSetting("THREAD MODEL", "OpenCL")){ - mode = "mode: 'OpenCL', platform_id : " + std::to_string(plat) - + ", device_id: " + std::to_string(device_id); + mode = "mode: 'OpenCL', platform_id : " + std::to_string(plat); } else if(settings.compareSetting("THREAD MODEL", "OpenMP")){ mode = "mode: 'OpenMP'"; @@ -155,6 +55,51 @@ void platform_t::DeviceConfig(){ mode = "mode: 'Serial'"; } + //add a device_id number for some modes + if ( settings.compareSetting("THREAD MODEL", "CUDA") + ||settings.compareSetting("THREAD MODEL", "HIP") + ||settings.compareSetting("THREAD MODEL", "OpenCL")) { + //for testing a single device, run with 1 rank and specify DEVICE NUMBER + if (size==1) { + settings.getSetting("DEVICE NUMBER",device_id); + } else { + //find out how many ranks and devices are on this system + char* hostnames = (char *) ::malloc(size*sizeof(char)*MPI_MAX_PROCESSOR_NAME); + char* hostname = hostnames+rank*MPI_MAX_PROCESSOR_NAME; + + int namelen; + MPI_Get_processor_name(hostname,&namelen); + + MPI_Allgather(MPI_IN_PLACE , MPI_MAX_PROCESSOR_NAME, MPI_CHAR, + hostnames, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, MPI_COMM_WORLD); + + int localRank = 0; + int localSize = 0; + for (int n=0; n0 && localRank>=deviceCount) { + stringstream ss; + ss << "Rank " << rank << " oversubscribing device " << device_id%deviceCount << " on node \"" << hostname<< "\""; + LIBP_WARNING(ss.str()); + device_id = device_id%deviceCount; + } + MPI_Barrier(MPI_COMM_WORLD); + free(hostnames); + } + + // add device_id to setup string + mode += ", device_id: " + std::to_string(device_id); + } + //set number of omp threads to use //int Ncores = sysconf(_SC_NPROCESSORS_ONLN); //int Nthreads = Ncores/localSize;