From 01ec0f860315e966ce042551eaaea7080d920673 Mon Sep 17 00:00:00 2001 From: "Isaev, Ilya" Date: Tue, 13 Jun 2023 07:24:56 -0500 Subject: [PATCH] Allow building and loading of TBBBind on macOS Signed-off-by: Isaev, Ilya --- CMakeLists.txt | 2 +- include/oneapi/tbb/detail/_config.h | 3 +++ src/tbb/arena.cpp | 2 +- src/tbb/arena.h | 2 +- src/tbb/governor.cpp | 12 +++++++++--- src/tbbbind/def/mac64-tbbbind.def | 18 ++++++++++++++++++ src/tbbbind/tbb_bind.cpp | 5 +++++ 7 files changed, 38 insertions(+), 6 deletions(-) create mode 100755 src/tbbbind/def/mac64-tbbbind.def diff --git a/CMakeLists.txt b/CMakeLists.txt index 4787294177..7bc872b5c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,7 +244,7 @@ else() add_subdirectory(src/tbbmalloc_proxy) endif() endif() - if (APPLE OR NOT BUILD_SHARED_LIBS) + if (NOT BUILD_SHARED_LIBS) message(STATUS "TBBBind build targets are disabled due to unsupported environment") else() add_subdirectory(src/tbbbind) diff --git a/include/oneapi/tbb/detail/_config.h b/include/oneapi/tbb/detail/_config.h index ad9f0f3112..0cd592807b 100644 --- a/include/oneapi/tbb/detail/_config.h +++ b/include/oneapi/tbb/detail/_config.h @@ -380,6 +380,9 @@ #define __TBB_ARENA_BINDING 1 #endif +// Thread pinning is not available on macOS* +#define __TBB_CPUBIND_PRESENT (__TBB_ARENA_BINDING && !__APPLE__) + #ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1 #endif diff --git a/src/tbb/arena.cpp b/src/tbb/arena.cpp index 8c2490670a..e54a40cb4d 100644 --- a/src/tbb/arena.cpp +++ b/src/tbb/arena.cpp @@ -452,7 +452,7 @@ void task_arena_impl::initialize(d1::task_arena_base& ta) { ta.my_arena.store(a, std::memory_order_release); // add an internal market reference; a public reference was added in create_arena market::global_market( /*is_public=*/false); -#if __TBB_ARENA_BINDING +#if __TBB_CPUBIND_PRESENT a->my_numa_binding_observer = construct_binding_observer( static_cast(&ta), a->my_num_slots, ta.my_numa_id, ta.core_type(), ta.max_threads_per_core()); #endif /*__TBB_ARENA_BINDING*/ diff --git a/src/tbb/arena.h b/src/tbb/arena.h index 73dd116cb3..22cbc1e715 100644 --- a/src/tbb/arena.h +++ b/src/tbb/arena.h @@ -243,7 +243,7 @@ struct arena_base : padded { #if __TBB_ARENA_BINDING //! Pointer to internal observer that allows to bind threads in arena to certain NUMA node. - numa_binding_observer* my_numa_binding_observer; + numa_binding_observer* my_numa_binding_observer{nullptr}; #endif /*__TBB_ARENA_BINDING*/ // Below are rarely modified members diff --git a/src/tbb/governor.cpp b/src/tbb/governor.cpp index 3111ab3e7b..f262803627 100644 --- a/src/tbb/governor.cpp +++ b/src/tbb/governor.cpp @@ -367,15 +367,18 @@ static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core ) = dummy_get_default_concurrency; -#if _WIN32 || _WIN64 || __unix__ +#if _WIN32 || _WIN64 || __unix__ || __APPLE__ + // Table describing how to link the handlers. static const dynamic_link_descriptor TbbBindLinkTable[] = { DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr), DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr), +#if __TBB_CPUBIND_PRESENT DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr), DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr), DLD(__TBB_internal_apply_affinity, apply_affinity_ptr), DLD(__TBB_internal_restore_affinity, restore_affinity_ptr), +#endif DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr) }; @@ -390,6 +393,9 @@ static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_ #if _WIN32 || _WIN64 #define LIBRARY_EXTENSION ".dll" #define LIBRARY_PREFIX +#elif __APPLE__ +#define LIBRARY_EXTENSION __TBB_STRING(.3.dylib) +#define LIBRARY_PREFIX "lib" #elif __unix__ #define LIBRARY_EXTENSION __TBB_STRING(.so.3) #define LIBRARY_PREFIX "lib" @@ -418,7 +424,7 @@ int core_types_count = 0; int* core_types_indexes = nullptr; const char* load_tbbbind_shared_object() { -#if _WIN32 || _WIN64 || __unix__ +#if _WIN32 || _WIN64 || __unix__ || __APPLE__ #if _WIN32 && !_WIN64 // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. SYSTEM_INFO si; @@ -430,7 +436,7 @@ const char* load_tbbbind_shared_object() { return tbbbind_version; } } -#endif /* _WIN32 || _WIN64 || __unix__ */ +#endif /* _WIN32 || _WIN64 || __unix__ || __APPLE__ */ return nullptr; } diff --git a/src/tbbbind/def/mac64-tbbbind.def b/src/tbbbind/def/mac64-tbbbind.def new file mode 100755 index 0000000000..be72bcf9a6 --- /dev/null +++ b/src/tbbbind/def/mac64-tbbbind.def @@ -0,0 +1,18 @@ +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +___TBB_internal_initialize_system_topology +___TBB_internal_get_default_concurrency +___TBB_internal_destroy_system_topology + diff --git a/src/tbbbind/tbb_bind.cpp b/src/tbbbind/tbb_bind.cpp index 38201c710b..c61b0ce215 100644 --- a/src/tbbbind/tbb_bind.cpp +++ b/src/tbbbind/tbb_bind.cpp @@ -104,6 +104,7 @@ class system_topology { if ( initialization_state != topology_loaded ) return; +#if __TBB_CPUBIND_PRESENT // Getting process affinity mask if ( intergroup_binding_allowed(groups_num) ) { process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology)); @@ -115,6 +116,10 @@ class system_topology { assertion_hwloc_wrapper(hwloc_get_cpubind, topology, process_cpu_affinity_mask, 0); hwloc_cpuset_to_nodeset(topology, process_cpu_affinity_mask, process_node_affinity_mask); } +#else + process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology)); + process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology)); +#endif number_of_processors_groups = groups_num; }