diff --git a/ccminer.cpp b/ccminer.cpp index 2f357cc06a..690d76ab0f 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -127,6 +127,7 @@ uint16_t opt_vote = 9999; int num_cpus; int active_gpus; bool need_nvsettings = false; +bool need_memclockrst = false; char * device_name[MAX_GPUS]; short device_map[MAX_GPUS] = { 0 }; long device_sm[MAX_GPUS] = { 0 }; @@ -346,6 +347,7 @@ Options:\n\ #else /* via nvapi.dll */ "\ --mem-clock=3505 Set the gpu memory boost clock\n\ + --mem-clock=+500 Set the gpu memory offset\n\ --gpu-clock=1150 Set the gpu engine boost clock\n\ --plimit=100 Set the gpu power limit in percentage\n\ --tlimit=80 Set the gpu thermal limit in degrees\n\ @@ -605,6 +607,13 @@ void proper_exit(int reason) } nvml_destroy(hnvml); } + if (need_memclockrst) { +# ifdef WIN32 + for (int n = 0; n < opt_n_threads && !opt_keep_clocks; n++) { + nvapi_toggle_clocks(n, false); + } +# endif + } #endif free(opt_syslog_pfx); free(opt_api_bind); @@ -2065,7 +2074,11 @@ static void *miner_thread(void *userdata) if (!wanna_mine(thr_id)) { // reset default mem offset before idle.. +#if defined(WIN32) && defined(USE_WRAPNVML) + if (need_memclockrst) nvapi_toggle_clocks(thr_id, false); +#else if (need_nvsettings) nvs_reset_clocks(dev_id); +#endif // free gpu resources algo_free_all(thr_id); // clear any free error (algo switch) @@ -2092,7 +2105,11 @@ static void *miner_thread(void *userdata) continue; } else { // reapply mem offset if needed +#if defined(WIN32) && defined(USE_WRAPNVML) + if (need_memclockrst) nvapi_toggle_clocks(thr_id, true); +#else if (need_nvsettings) nvs_set_clocks(dev_id); +#endif } pool_on_hold = false; diff --git a/nvml.cpp b/nvml.cpp index daa570c171..6a77986a31 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -34,6 +34,7 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; extern uint32_t device_gpu_clocks[MAX_GPUS]; extern uint32_t device_mem_clocks[MAX_GPUS]; +extern int32_t device_mem_offsets[MAX_GPUS]; extern uint8_t device_tlimit[MAX_GPUS]; extern int8_t device_pstate[MAX_GPUS]; extern int32_t device_led[MAX_GPUS]; @@ -45,6 +46,7 @@ uint32_t clock_prev_mem[MAX_GPUS] = { 0 }; uint32_t limit_prev[MAX_GPUS] = { 0 }; static bool nvml_plimit_set = false; +extern bool need_memclockrst; /* * Wrappers to emulate dlopen() on other systems like Windows @@ -1735,6 +1737,30 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock) return ret; } +static int nvapi_set_memoffset(unsigned int devNum, int32_t delta, bool log=true) +{ + NvAPI_Status ret; + NvS32 deltaKHz = delta * 1000; + + if (devNum >= nvapi_dev_cnt) + return -ENODEV; + + // todo: bounds check with GetPstates20 + + NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; + pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; + pset1.numPstates = 1; + pset1.numClocks = 1; + pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; + pset1.pstates[0].clocks[0].freqDelta_kHz.value = deltaKHz; + ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); + if (ret == NVAPI_OK) { + if (log) applog(LOG_INFO, "GPU #%u: Memory clock offset set to %+d MHz", devNum, deltaKHz / 1000); + need_memclockrst = true; + } + return ret; +} + // Replacement for WIN32 CUDA 6.5 on pascal int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total) { @@ -1844,15 +1870,23 @@ int nvapi_init_settings() if (ret) { NvAPI_ShortString string; NvAPI_GetErrorMessage((NvAPI_Status) ret, string); - gpulog(LOG_WARNING, n, "Boost gpu clock %s", string); + gpulog(LOG_WARNING, n, "nvapi_set_gpuclock %s", string); } } - if (device_mem_clocks[dev_id]) { + if (device_mem_offsets[dev_id]) { + ret = nvapi_set_memoffset(nvapi_dev_map[dev_id], device_mem_offsets[dev_id]); + if (ret) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage((NvAPI_Status)ret, string); + gpulog(LOG_WARNING, n, "nvapi_set_memoffset %s", string); + } + } + else if (device_mem_clocks[dev_id]) { ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]); if (ret) { NvAPI_ShortString string; NvAPI_GetErrorMessage((NvAPI_Status) ret, string); - gpulog(LOG_WARNING, n, "Boost mem clock %s", string); + gpulog(LOG_WARNING, n, "nvapi_set_memclock %s", string); } } if (device_pstate[dev_id]) { @@ -1870,6 +1904,14 @@ int nvapi_init_settings() return ret; } +void nvapi_toggle_clocks(int thr_id, bool enable) +{ + int dev_id = device_map[thr_id % MAX_GPUS]; + if (device_mem_offsets[dev_id]) { + nvapi_set_memoffset(nvapi_dev_map[dev_id], enable ? device_mem_offsets[dev_id] : 0, false); + } +} + unsigned int nvapi_devnum(int dev_id) { return nvapi_dev_map[dev_id]; diff --git a/nvml.h b/nvml.h index de5d16dff1..9145daa46d 100644 --- a/nvml.h +++ b/nvml.h @@ -236,6 +236,8 @@ uint8_t nvapi_get_plimit(unsigned int devNum); unsigned int nvapi_devnum(int dev_id); int nvapi_devid(unsigned int devNum); +void nvapi_toggle_clocks(int thr_id, bool enable); + // cuda Replacement for 6.5 compat int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total); #endif