55
66#include " cpu_types.hpp"
77
8- void init_cpu_threads_env (const std::string& cpu_ids) {
8+ std::string init_cpu_threads_env (const std::string& cpu_ids) {
99 bitmask* omp_cpu_mask = numa_parse_cpustring (cpu_ids.c_str ());
1010 TORCH_CHECK (omp_cpu_mask->size > 0 );
1111 std::vector<int > omp_cpu_ids;
@@ -51,6 +51,12 @@ void init_cpu_threads_env(const std::string& cpu_ids) {
5151 torch::set_num_threads ((int )omp_cpu_ids.size ());
5252 TORCH_CHECK_EQ (omp_cpu_ids.size (), torch::get_num_threads ());
5353 TORCH_CHECK_EQ (omp_cpu_ids.size (), omp_get_max_threads ());
54+
55+ std::vector<std::pair<int , int >> thread_core_mapping;
56+ thread_core_mapping.reserve (omp_cpu_ids.size ());
57+ omp_lock_t writelock;
58+ omp_init_lock (&writelock);
59+
5460#pragma omp parallel for schedule(static, 1)
5561 for (size_t i = 0 ; i < omp_cpu_ids.size (); ++i) {
5662 cpu_set_t mask;
@@ -61,7 +67,24 @@ void init_cpu_threads_env(const std::string& cpu_ids) {
6167 TORCH_CHECK (false ,
6268 " sched_setaffinity failed. errno: " + std::to_string (errno));
6369 }
70+
71+ omp_set_lock (&writelock);
72+ thread_core_mapping.emplace_back (gettid (), omp_cpu_ids[i]);
73+ omp_unset_lock (&writelock);
6474 }
6575
76+ omp_destroy_lock (&writelock);
77+
6678 numa_free_nodemask (omp_cpu_mask);
79+
80+ std::stringstream ss;
81+ ss << " OMP threads binding of Process " << getpid () << " :\n " ;
82+ std::sort (thread_core_mapping.begin (), thread_core_mapping.end (),
83+ [](auto && a, auto && b) { return a.second < b.second ; });
84+ for (auto && item : thread_core_mapping) {
85+ ss << " \t "
86+ << " OMP tid: " << item.first << " , core " << item.second << " \n " ;
87+ }
88+
89+ return ss.str ();
6790}
0 commit comments