Intel-tensorflow · cuixiaom · Sep 25, 2018
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
+#include "tensorflow/core/util/util.h"
 #include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
@@ -163,6 +164,12 @@ class MklCPUAllocator : public Allocator {
   }
 
   Status Initialize() {
+    if (DisableMKL()) {
+        VLOG(1) << "TF-MKL: Disabling pool allocator";
+        tf_disable_pool_allocator_flag_ = true;
+        return Status::OK();
+    }
+
     VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
 
     // Set upper bound on memory allocation to physical RAM available on the
@@ -217,6 +224,10 @@ class MklCPUAllocator : public Allocator {
   inline string Name() override { return kName; }
 
   inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
+    if (tf_disable_pool_allocator_flag_) {
+      return port::AlignedMalloc(num_bytes, alignment);
+    }
+
     // If the allocation size is less than threshold, call small allocator,
     // otherwise call large-size allocator (BFC). We found that BFC allocator
     // does not deliver good performance for small allocations when
@@ -227,6 +238,10 @@ class MklCPUAllocator : public Allocator {
   }
 
   inline void DeallocateRaw(void* ptr) override {
+    if (tf_disable_pool_allocator_flag_) {
+      port::AlignedFree(ptr);
+      return;
+    }
     // Check if ptr is for "small" allocation. If it is, then call Free
     // directly. Otherwise, call BFC to handle free.
     if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -237,25 +252,29 @@ class MklCPUAllocator : public Allocator {
   }
 
   void GetStats(AllocatorStats* stats) override {
-    AllocatorStats l_stats, s_stats;
-    small_size_allocator_->GetStats(&s_stats);
-    large_size_allocator_->GetStats(&l_stats);
-
-    // Combine statistics from small-size and large-size allocator.
-    stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
-    stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
-    stats->max_bytes_in_use =
-        l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
-    // Since small-size allocations go to MklSmallSizeAllocator,
-    // max_alloc_size from large_size_allocator would be the maximum
-    // size allocated by MklCPUAllocator.
-    stats->max_alloc_size = l_stats.max_alloc_size;
+    if (!tf_disable_pool_allocator_flag_) {
+      AllocatorStats l_stats, s_stats;
+      small_size_allocator_->GetStats(&s_stats);
+      large_size_allocator_->GetStats(&l_stats);
+
+      // Combine statistics from small-size and large-size allocator.
+      stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+      stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+      stats->max_bytes_in_use =
+          l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+      // Since small-size allocations go to MklSmallSizeAllocator,
+      // max_alloc_size from large_size_allocator would be the maximum
+      // size allocated by MklCPUAllocator.
+      stats->max_alloc_size = l_stats.max_alloc_size;
+    }
   }
 
   void ClearStats() override {
-    small_size_allocator_->ClearStats();
-    large_size_allocator_->ClearStats();
+    if (!tf_disable_pool_allocator_flag_) {
+      small_size_allocator_->ClearStats();
+      large_size_allocator_->ClearStats();
+    }
   }
 
  private:
@@ -294,6 +313,7 @@ class MklCPUAllocator : public Allocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
+  bool tf_disable_pool_allocator_flag_ = false;
   Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 

diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/util.h"
 
 namespace tensorflow {
 
@@ -56,6 +57,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
 #ifdef INTEL_MKL
+  // Early return if MKL is disabled
+  if (DisableMKL())
+    return port::NumSchedulableCPUs();
+
   // MKL library executes ops in parallel using OMP threads
   // Set inter_op conservatively to avoid thread oversubscription that could
   // lead to severe perf degradations and OMP resource exhaustion

diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/util/util.h"
 
 #ifdef INTEL_MKL
 #ifdef _OPENMP
@@ -49,6 +50,9 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       allocator_(allocator),
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
+  // Eearly return when MKL is disabled
+  if (DisableMKL())
+    return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/util/util.h"
 
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_layout_pass.h"
@@ -4488,6 +4489,10 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
   if (options.graph == nullptr && options.partition_graphs == nullptr) {
     return Status::OK();
   }
+  if (DisableMKL()) {
+    VLOG(2) << "TF-MKL: Disabling MKL";
+    return Status::OK();
+  }
 
   auto process_graph = [&](std::unique_ptr<Graph>* g) {
     // Get the ownership of a graph

diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/util.h"
 
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_tfconversion_pass.h"
@@ -422,6 +423,10 @@ Status MklToTfConversionPass::Run(const GraphOptimizationPassOptions& options) {
   if (options.graph == nullptr && options.partition_graphs == nullptr) {
     return Status::OK();
   }
+  if (DisableMKL()) {
+    VLOG(2) << "TF-MKL: Disabling MKL";
+    return Status::OK();
+  }
 
   auto process_graph = [&](std::unique_ptr<Graph>* g) {
     // Get the ownership of graph

diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
@@ -120,4 +120,24 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) {
   return result;
 }
 
+#ifdef INTEL_MKL
+bool DisableMKL() {
+  enum MklStatus {
+    MKL_DEFAULT = 0,
+    MKL_ON = 1,
+    MKL_OFF = 2
+  };
+  static MklStatus status = MKL_DEFAULT;
+  if (status == MKL_DEFAULT) {
+    char* tf_disable_mkl = getenv("TF_DISABLE_MKL");
+    if ((tf_disable_mkl != NULL) && (std::stoi(tf_disable_mkl) == 1)) {
+      VLOG(2) << "TF-MKL: Disabling MKL";
+      status = MKL_OFF;
+    } else {
+      status = MKL_ON;
+    }
+  }
+  return status == MKL_OFF ? true : false;
+}
+#endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
@@ -56,6 +56,11 @@ string PrintMemory(const char* ptr, size_t n);
 // "tensor", "tensor[i]", "tensor[i, j]", etc.
 string SliceDebugString(const TensorShape& shape, const int64 flat);
 
+// disable MKL in runtime
+#ifdef INTEL_MKL
+bool DisableMKL();
+#endif
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_UTIL_UTIL_H_