Skip to content

Commit

Permalink
Bring back MXNET_GPU_COPY_NTHREADS env variable (apache#11237)
Browse files Browse the repository at this point in the history
* bring back MXNET_GPU_COPY_NTHREADS env variable

* make variable size_t for consistency
  • Loading branch information
ctcyang authored and szha committed Jun 18, 2018
1 parent 92fde19 commit 0ac9f8b
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion docs/faq/env_var.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export MXNET_GPU_WORKER_NTHREADS=3
- Values: Int ```(default=2)```
- The maximum number of threads to use on each GPU. This parameter is used to parallelize the computation within a single GPU card.
* MXNET_GPU_COPY_NTHREADS
- Values: Int ```(default=1)```
- Values: Int ```(default=2)```
- The maximum number of concurrent threads that do the memory copy job on each GPU.
* MXNET_CPU_WORKER_NTHREADS
- Values: Int ```(default=1)```
Expand Down
6 changes: 5 additions & 1 deletion src/engine/threaded_engine_perdevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
if (is_worker_) return;
gpu_worker_nthreads_ = common::GetNumThreadsPerGPU();
cpu_worker_nthreads_ = dmlc::GetEnv("MXNET_CPU_WORKER_NTHREADS", 1);
gpu_copy_nthreads_ = dmlc::GetEnv("MXNET_GPU_COPY_NTHREADS", 2);
// create CPU task
int cpu_priority_nthreads = dmlc::GetEnv("MXNET_CPU_PRIORITY_NTHREADS", 4);
cpu_priority_worker_.reset(new ThreadWorkerBlock<kPriorityQueue>());
Expand Down Expand Up @@ -128,8 +129,8 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
const FnProperty prop = opr_block->opr->prop;
const bool is_copy = (prop == FnProperty::kCopyFromGPU ||
prop == FnProperty::kCopyToGPU);
const size_t nthread = gpu_worker_nthreads_;
if (is_copy) {
const size_t nthread = gpu_copy_nthreads_;
auto ptr = gpu_copy_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
// Signify to kernel that GPU is being used, so reserve cores as necessary
OpenMP::Get()->set_reserve_cores(GetReserveCoreCount(true));
Expand All @@ -150,6 +151,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
}
}
} else {
const size_t nthread = gpu_worker_nthreads_;
auto ptr = gpu_normal_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
// Signify to kernel that GPU is being used, so reserve cores as necessary
OpenMP::Get()->set_reserve_cores(GetReserveCoreCount(true));
Expand Down Expand Up @@ -194,6 +196,8 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
size_t cpu_worker_nthreads_;
/*! \brief number of concurrent thread each gpu worker uses */
size_t gpu_worker_nthreads_;
/*! \brief number of concurrent thread each gpu copy worker uses */
size_t gpu_copy_nthreads_;
// cpu worker
common::LazyAllocArray<ThreadWorkerBlock<kWorkerQueue> > cpu_normal_workers_;
// cpu priority worker
Expand Down

0 comments on commit 0ac9f8b

Please sign in to comment.