Skip to content

[INTEL MKL] Added the feature to disable MKL support of TensorFlow by environment… #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions tensorflow/core/common_runtime/mkl_cpu_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ limitations under the License.
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/mem.h"
#include "tensorflow/core/util/util.h"
#include "tensorflow/core/platform/numa.h"

#ifndef INTEL_MKL_DNN_ONLY
Expand Down Expand Up @@ -163,6 +164,12 @@ class MklCPUAllocator : public Allocator {
}

Status Initialize() {
if (DisableMKL()) {
VLOG(1) << "TF-MKL: Disabling pool allocator";
tf_disable_pool_allocator_flag_ = true;
return Status::OK();
}

VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";

// Set upper bound on memory allocation to physical RAM available on the
Expand Down Expand Up @@ -217,6 +224,10 @@ class MklCPUAllocator : public Allocator {
inline string Name() override { return kName; }

inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
if (tf_disable_pool_allocator_flag_) {
return port::AlignedMalloc(num_bytes, alignment);
}

// If the allocation size is less than threshold, call small allocator,
// otherwise call large-size allocator (BFC). We found that BFC allocator
// does not deliver good performance for small allocations when
Expand All @@ -227,6 +238,10 @@ class MklCPUAllocator : public Allocator {
}

inline void DeallocateRaw(void* ptr) override {
if (tf_disable_pool_allocator_flag_) {
port::AlignedFree(ptr);
return;
}
// Check if ptr is for "small" allocation. If it is, then call Free
// directly. Otherwise, call BFC to handle free.
if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
Expand All @@ -237,25 +252,29 @@ class MklCPUAllocator : public Allocator {
}

void GetStats(AllocatorStats* stats) override {
AllocatorStats l_stats, s_stats;
small_size_allocator_->GetStats(&s_stats);
large_size_allocator_->GetStats(&l_stats);

// Combine statistics from small-size and large-size allocator.
stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
stats->max_bytes_in_use =
l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;

// Since small-size allocations go to MklSmallSizeAllocator,
// max_alloc_size from large_size_allocator would be the maximum
// size allocated by MklCPUAllocator.
stats->max_alloc_size = l_stats.max_alloc_size;
if (!tf_disable_pool_allocator_flag_) {
AllocatorStats l_stats, s_stats;
small_size_allocator_->GetStats(&s_stats);
large_size_allocator_->GetStats(&l_stats);

// Combine statistics from small-size and large-size allocator.
stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
stats->max_bytes_in_use =
l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;

// Since small-size allocations go to MklSmallSizeAllocator,
// max_alloc_size from large_size_allocator would be the maximum
// size allocated by MklCPUAllocator.
stats->max_alloc_size = l_stats.max_alloc_size;
}
}

void ClearStats() override {
small_size_allocator_->ClearStats();
large_size_allocator_->ClearStats();
if (!tf_disable_pool_allocator_flag_) {
small_size_allocator_->ClearStats();
large_size_allocator_->ClearStats();
}
}

private:
Expand Down Expand Up @@ -294,6 +313,7 @@ class MklCPUAllocator : public Allocator {
// The alignment that we need for the allocations
static constexpr const size_t kAlignment = 64;

bool tf_disable_pool_allocator_flag_ = false;
Allocator* large_size_allocator_; // owned by this class
MklSmallSizeAllocator* small_size_allocator_; // owned by this class.

Expand Down
5 changes: 5 additions & 0 deletions tensorflow/core/common_runtime/process_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ limitations under the License.
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/util.h"

namespace tensorflow {

Expand Down Expand Up @@ -56,6 +57,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
const int32 inter_op = options.config.inter_op_parallelism_threads();
if (inter_op != 0) return inter_op;
#ifdef INTEL_MKL
// Early return if MKL is disabled
if (DisableMKL())
return port::NumSchedulableCPUs();

// MKL library executes ops in parallel using OMP threads
// Set inter_op conservatively to avoid thread oversubscription that could
// lead to severe perf degradations and OMP resource exhaustion
Expand Down
4 changes: 4 additions & 0 deletions tensorflow/core/common_runtime/threadpool_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session_options.h"
#include "tensorflow/core/util/util.h"

#ifdef INTEL_MKL
#ifdef _OPENMP
Expand All @@ -49,6 +50,9 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
allocator_(allocator),
scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
#ifdef INTEL_MKL
// Eearly return when MKL is disabled
if (DisableMKL())
return;
#ifdef _OPENMP
const char* user_omp_threads = getenv("OMP_NUM_THREADS");
if (user_omp_threads == nullptr) {
Expand Down
5 changes: 5 additions & 0 deletions tensorflow/core/graph/mkl_layout_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ limitations under the License.
#include "tensorflow/core/lib/hash/hash.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/tensor_format.h"
#include "tensorflow/core/util/util.h"

#include "tensorflow/core/graph/mkl_graph_util.h"
#include "tensorflow/core/graph/mkl_layout_pass.h"
Expand Down Expand Up @@ -4488,6 +4489,10 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
if (options.graph == nullptr && options.partition_graphs == nullptr) {
return Status::OK();
}
if (DisableMKL()) {
VLOG(2) << "TF-MKL: Disabling MKL";
return Status::OK();
}

auto process_graph = [&](std::unique_ptr<Graph>* g) {
// Get the ownership of a graph
Expand Down
5 changes: 5 additions & 0 deletions tensorflow/core/graph/mkl_tfconversion_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/lib/hash/hash.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/util.h"

#include "tensorflow/core/graph/mkl_graph_util.h"
#include "tensorflow/core/graph/mkl_tfconversion_pass.h"
Expand Down Expand Up @@ -422,6 +423,10 @@ Status MklToTfConversionPass::Run(const GraphOptimizationPassOptions& options) {
if (options.graph == nullptr && options.partition_graphs == nullptr) {
return Status::OK();
}
if (DisableMKL()) {
VLOG(2) << "TF-MKL: Disabling MKL";
return Status::OK();
}

auto process_graph = [&](std::unique_ptr<Graph>* g) {
// Get the ownership of graph
Expand Down
20 changes: 20 additions & 0 deletions tensorflow/core/util/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,24 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) {
return result;
}

#ifdef INTEL_MKL
bool DisableMKL() {
enum MklStatus {
MKL_DEFAULT = 0,
MKL_ON = 1,
MKL_OFF = 2
};
static MklStatus status = MKL_DEFAULT;
if (status == MKL_DEFAULT) {
char* tf_disable_mkl = getenv("TF_DISABLE_MKL");
if ((tf_disable_mkl != NULL) && (std::stoi(tf_disable_mkl) == 1)) {
VLOG(2) << "TF-MKL: Disabling MKL";
status = MKL_OFF;
} else {
status = MKL_ON;
}
}
return status == MKL_OFF ? true : false;
}
#endif
} // namespace tensorflow
5 changes: 5 additions & 0 deletions tensorflow/core/util/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ string PrintMemory(const char* ptr, size_t n);
// "tensor", "tensor[i]", "tensor[i, j]", etc.
string SliceDebugString(const TensorShape& shape, const int64 flat);

// disable MKL in runtime
#ifdef INTEL_MKL
bool DisableMKL();
#endif

} // namespace tensorflow

#endif // TENSORFLOW_CORE_UTIL_UTIL_H_