Merge pull request #11525 from tensor-tang/refine/mem

tensor-tang · web-flow · commit e8f5757d6692 · 2018-06-19T14:35:28.000+08:00
refine the initial cpu memory flag for mkldnn
diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc
@@ -28,9 +28,15 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1,
               "Default use 100% of CPU memory for PaddlePaddle,"
               "reserve the rest for page tables, etc");
 
-DEFINE_uint64(
-    initial_cpu_memory_in_mb, 500,
-    "Default initial 500MB of CPU memory for PaddlePaddle, in MD unit.");
+DEFINE_uint64(initial_cpu_memory_in_mb,
+#ifdef PADDLE_WITH_MKLDNN
+              /* Aligned with mozga-intel, MKLDNN need at least 5000 MB
+               * to obtain the best performance*/
+              5000,
+#else
+              500,
+#endif
+              "Initial CPU memory for PaddlePaddle, in MD unit.");
 
 DEFINE_double(
     fraction_of_cuda_pinned_memory_to_use, 0.5,
@@ -59,10 +65,7 @@ inline size_t CpuTotalPhysicalMemory() {
 size_t CpuMaxAllocSize() {
   // For distributed systems, it requires configuring and limiting
   // the fraction of memory to use.
-  return std::min(
-      static_cast<size_t>(FLAGS_fraction_of_cpu_memory_to_use *
-                          CpuTotalPhysicalMemory()),
-      static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
+  return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory();
 }
 
 size_t CpuMinChunkSize() {
@@ -71,8 +74,11 @@ size_t CpuMinChunkSize() {
 }
 
 size_t CpuMaxChunkSize() {
-  // Allow to allocate the maximum chunk size is roughly 3% of CPU memory.
-  return CpuMaxAllocSize() / 32;
+  // Allow to allocate the maximum chunk size is roughly 3% of CPU memory,
+  // or the initial_cpu_memory_in_mb.
+  return std::min(
+      static_cast<size_t>(CpuMaxAllocSize() / 32),
+      static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
 }
 
 size_t CUDAPinnedMaxAllocSize() {
diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc
@@ -30,8 +30,9 @@ int main(int argc, char** argv) {
   new_argv.push_back(
       strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
 #else
-  new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn"));
-  new_argv.push_back(strdup("--undefok=use_mkldnn"));
+  new_argv.push_back(strdup(
+      "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb"));
+  new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb"));
 #endif
   int new_argc = static_cast<int>(new_argv.size());
   char** new_argv_address = new_argv.data();
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
@@ -117,7 +117,7 @@ def __bootstrap__():
 
     read_env_flags = [
         'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
-        'eager_delete_scope', 'use_mkldnn'
+        'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb'
     ]
     if core.is_compiled_with_cuda():
         read_env_flags += [

Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,7 @@ def __bootstrap__():`
`117`	`117`
`118`	`118`	`read_env_flags = [`
`119`	`119`	`'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',`
`120`		`- 'eager_delete_scope', 'use_mkldnn'`
	`120`	`+ 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb'`
`121`	`121`	`]`
`122`	`122`	`if core.is_compiled_with_cuda():`
`123`	`123`	`read_env_flags += [`