Skip to content

Commit

Permalink
[OpenMP][libomp] Add topology information to thread structure
Browse files Browse the repository at this point in the history
Each time a thread gets a new affinity assigned, it will not
only assign its mask, but also topology information including
which socket, core, thread and core-attributes (if available)
it is now assigned. This occurs for all non-disabled KMP_AFFINITY
values as well as OMP_PLACES/OMP_PROC_BIND.

The information regarding which socket, core, etc. can take on three
values:
  1) The actual ID of the unit (0 - (N-1)), given N units
  2) UNKNOWN_ID (-1) which indicates it does not know which ID
  3) MULTIPLE_ID (-2) which indicates the thread is spread across
     multiple of this unit (e.g., affinity mask is spread across
     multiple hardware threads)
This new information is stored in th_topology_ids[] array. An example
how to get the socket Id, one would read th_topology_ids[KMP_HW_SOCKET].
This could be expanded in the future to something more descriptive for
the "multiple" case, like a range of values. For now, the single
value suffices.

The information regarding the core attributes can take on two values:
  1) The actual core-type or core-eff
  2) KMP_HW_CORE_TYPE_UNKNOWN if the core type is unknown, and
     UNKNOWN_CORE_EFF (-1) if the core eff is unknown.
This new information is stored in th_topology_attrs. An example
how to get the core type, one would read
th_topology_attrs.core_type.

Differential Revision: https://reviews.llvm.org/D139854
  • Loading branch information
jpeyton52 committed Jan 17, 2023
1 parent 4e27097 commit f4cce0f
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 2 deletions.
35 changes: 34 additions & 1 deletion openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,15 @@ class KMPAffinity {
// Only 1 DWORD in the mask should have any procs set.
// Return the appropriate index, or -1 for an invalid mask.
virtual int get_proc_group() const { return -1; }
int get_max_cpu() const {
int cpu;
int max_cpu = -1;
KMP_CPU_SET_ITERATE(cpu, this) {
if (cpu > max_cpu)
max_cpu = cpu;
}
return max_cpu;
}
};
void *operator new(size_t n);
void operator delete(void *p);
Expand Down Expand Up @@ -836,6 +845,26 @@ typedef struct kmp_affinity_flags_t {
} kmp_affinity_flags_t;
KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);

typedef struct kmp_affinity_ids_t {
int ids[KMP_HW_LAST];
int operator[](size_t idx) const { return ids[idx]; }
int &operator[](size_t idx) { return ids[idx]; }
kmp_affinity_ids_t &operator=(const kmp_affinity_ids_t &rhs) {
for (int i = 0; i < KMP_HW_LAST; ++i)
ids[i] = rhs[i];
return *this;
}
} kmp_affinity_ids_t;

typedef struct kmp_affinity_attrs_t {
int core_type : 8;
int core_eff : 8;
unsigned valid : 1;
unsigned reserved : 15;
} kmp_affinity_attrs_t;
#define KMP_AFFINITY_ATTRS_UNKNOWN \
{ KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0, 0 }

typedef struct kmp_affinity_t {
char *proclist;
enum affinity_type type;
Expand All @@ -846,6 +875,8 @@ typedef struct kmp_affinity_t {
kmp_affinity_flags_t flags;
unsigned num_masks;
kmp_affin_mask_t *masks;
kmp_affinity_ids_t *ids;
kmp_affinity_attrs_t *attrs;
unsigned num_os_id_masks;
kmp_affin_mask_t *os_id_masks;
const char *env_var;
Expand All @@ -855,7 +886,7 @@ typedef struct kmp_affinity_t {
{ \
nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
{TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \
nullptr, 0, nullptr, env \
nullptr, nullptr, nullptr, 0, nullptr, env \
}

extern enum affinity_top_method __kmp_affinity_top_method;
Expand Down Expand Up @@ -2711,6 +2742,8 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {

#if KMP_AFFINITY_SUPPORTED
kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
kmp_affinity_ids_t th_topology_ids; /* thread's current topology ids */
kmp_affinity_attrs_t th_topology_attrs; /* thread's current topology attrs */
#endif
omp_allocator_handle_t th_def_allocator; /* default allocator */
/* The data set by the primary thread at reinit, then R/W by the worker */
Expand Down
133 changes: 132 additions & 1 deletion openmp/runtime/src/kmp_affinity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4053,6 +4053,107 @@ static int __kmp_affinity_max_proc_per_core(int nprocs, int bottom_level,

static int *procarr = NULL;
static int __kmp_aff_depth = 0;
static int *__kmp_osid_to_hwthread_map = NULL;

static void __kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t *mask,
kmp_affinity_ids_t &ids,
kmp_affinity_attrs_t &attrs) {
if (!KMP_AFFINITY_CAPABLE())
return;

// Initiailze ids and attrs thread data
for (int i = 0; i < KMP_HW_LAST; ++i)
ids[i] = kmp_hw_thread_t::UNKNOWN_ID;
attrs = KMP_AFFINITY_ATTRS_UNKNOWN;

// Iterate through each os id within the mask and determine
// the topology id and attribute information
int cpu;
int depth = __kmp_topology->get_depth();
KMP_CPU_SET_ITERATE(cpu, mask) {
int osid_idx = __kmp_osid_to_hwthread_map[cpu];
const kmp_hw_thread_t &hw_thread = __kmp_topology->at(osid_idx);
for (int level = 0; level < depth; ++level) {
kmp_hw_t type = __kmp_topology->get_type(level);
int id = hw_thread.sub_ids[level];
if (ids[type] == kmp_hw_thread_t::UNKNOWN_ID || ids[type] == id) {
ids[type] = id;
} else {
// This mask spans across multiple topology units, set it as such
// and mark every level below as such as well.
ids[type] = kmp_hw_thread_t::MULTIPLE_ID;
for (; level < depth; ++level) {
kmp_hw_t type = __kmp_topology->get_type(level);
ids[type] = kmp_hw_thread_t::MULTIPLE_ID;
}
}
}
if (!attrs.valid) {
attrs.core_type = hw_thread.attrs.get_core_type();
attrs.core_eff = hw_thread.attrs.get_core_eff();
attrs.valid = 1;
} else {
// This mask spans across multiple attributes, set it as such
if (attrs.core_type != hw_thread.attrs.get_core_type())
attrs.core_type = KMP_HW_CORE_TYPE_UNKNOWN;
if (attrs.core_eff != hw_thread.attrs.get_core_eff())
attrs.core_eff = kmp_hw_attr_t::UNKNOWN_CORE_EFF;
}
}
}

static void __kmp_affinity_get_thread_topology_info(kmp_info_t *th) {
if (!KMP_AFFINITY_CAPABLE())
return;
const kmp_affin_mask_t *mask = th->th.th_affin_mask;
kmp_affinity_ids_t &ids = th->th.th_topology_ids;
kmp_affinity_attrs_t &attrs = th->th.th_topology_attrs;
__kmp_affinity_get_mask_topology_info(mask, ids, attrs);
}

// Assign the topology information to each place in the place list
// A thread can then grab not only its affinity mask, but the topology
// information associated with that mask. e.g., Which socket is a thread on
static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
if (!KMP_AFFINITY_CAPABLE())
return;
if (affinity.type != affinity_none) {
KMP_ASSERT(affinity.num_os_id_masks);
KMP_ASSERT(affinity.os_id_masks);
}
KMP_ASSERT(affinity.num_masks);
KMP_ASSERT(affinity.masks);
KMP_ASSERT(__kmp_affin_fullMask);

int max_cpu = __kmp_affin_fullMask->get_max_cpu();
int num_hw_threads = __kmp_topology->get_num_hw_threads();

// Allocate thread topology information
if (!affinity.ids) {
affinity.ids = (kmp_affinity_ids_t *)__kmp_allocate(
sizeof(kmp_affinity_ids_t) * affinity.num_masks);
}
if (!affinity.attrs) {
affinity.attrs = (kmp_affinity_attrs_t *)__kmp_allocate(
sizeof(kmp_affinity_attrs_t) * affinity.num_masks);
}
if (!__kmp_osid_to_hwthread_map) {
// Want the +1 because max_cpu should be valid index into map
__kmp_osid_to_hwthread_map =
(int *)__kmp_allocate(sizeof(int) * (max_cpu + 1));
}

// Create the OS proc to hardware thread map
for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread)
__kmp_osid_to_hwthread_map[__kmp_topology->at(hw_thread).os_id] = hw_thread;

for (unsigned i = 0; i < affinity.num_masks; ++i) {
kmp_affinity_ids_t &ids = affinity.ids[i];
kmp_affinity_attrs_t &attrs = affinity.attrs[i];
kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.masks, i);
__kmp_affinity_get_mask_topology_info(mask, ids, attrs);
}
}

// Create a one element mask array (set of places) which only contains the
// initial process's affinity mask
Expand All @@ -4063,6 +4164,7 @@ static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
KMP_CPU_COPY(dest, __kmp_affin_fullMask);
__kmp_affinity_get_topology_info(affinity);
}

static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
Expand Down Expand Up @@ -4432,6 +4534,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
affinity.type = affinity_none;
__kmp_create_affinity_none_places(affinity);
affinity.flags.initialized = TRUE;
return;
}
Expand Down Expand Up @@ -4508,6 +4611,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
default:
KMP_ASSERT2(0, "Unexpected affinity setting");
}
__kmp_affinity_get_topology_info(affinity);
affinity.flags.initialized = TRUE;
}

Expand Down Expand Up @@ -4538,6 +4642,10 @@ void __kmp_affinity_uninitialize(void) {
KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks);
if (affinity->proclist != NULL)
__kmp_free(affinity->proclist);
if (affinity->ids != NULL)
__kmp_free(affinity->ids);
if (affinity->attrs != NULL)
__kmp_free(affinity->attrs);
*affinity = KMP_AFFINITY_INIT(affinity->env_var);
}
if (__kmp_affin_origMask != NULL) {
Expand All @@ -4552,6 +4660,10 @@ void __kmp_affinity_uninitialize(void) {
__kmp_free(procarr);
procarr = NULL;
}
if (__kmp_osid_to_hwthread_map) {
__kmp_free(__kmp_osid_to_hwthread_map);
__kmp_osid_to_hwthread_map = NULL;
}
#if KMP_USE_HWLOC
if (__kmp_hwloc_topology != NULL) {
hwloc_topology_destroy(__kmp_hwloc_topology);
Expand Down Expand Up @@ -4584,12 +4696,21 @@ static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
*mask = KMP_CPU_INDEX(affinity->masks, *place);
}

// This function initializes the per-thread data concerning affinity including
// the mask and topology information
void __kmp_affinity_set_init_mask(int gtid, int isa_root) {

kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);

// Set the thread topology information to default of unknown
for (int id = 0; id < KMP_HW_LAST; ++id)
th->th.th_topology_ids[id] = kmp_hw_thread_t::UNKNOWN_ID;
th->th.th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN;

if (!KMP_AFFINITY_CAPABLE()) {
return;
}

kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
if (th->th.th_affin_mask == NULL) {
KMP_CPU_ALLOC(th->th.th_affin_mask);
} else {
Expand Down Expand Up @@ -4654,6 +4775,11 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
th->th.th_first_place = 0;
th->th.th_last_place = affinity->num_masks - 1;
}
// Copy topology information associated with the place
if (i >= 0) {
th->th.th_topology_ids = __kmp_affinity.ids[i];
th->th.th_topology_attrs = __kmp_affinity.attrs[i];
}

if (i == KMP_PLACE_ALL) {
KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
Expand Down Expand Up @@ -4718,6 +4844,9 @@ void __kmp_affinity_set_place(int gtid) {
KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
KMP_CPU_COPY(th->th.th_affin_mask, mask);
th->th.th_current_place = th->th.th_new_place;
// Copy topology information associated with the place
th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];

if (__kmp_affinity.flags.verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
Expand Down Expand Up @@ -5037,6 +5166,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
tid, buf);
}
__kmp_affinity_get_thread_topology_info(th);
__kmp_set_system_affinity(mask, TRUE);
} else { // Non-uniform topology

Expand Down Expand Up @@ -5203,6 +5333,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
tid, buf);
}
__kmp_affinity_get_thread_topology_info(th);
__kmp_set_system_affinity(mask, TRUE);
}
}
Expand Down
5 changes: 5 additions & 0 deletions openmp/runtime/src/kmp_affinity.h
Original file line number Diff line number Diff line change
Expand Up @@ -681,9 +681,14 @@ struct kmp_hw_attr_t {
bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
};

#if KMP_AFFINITY_SUPPORTED
KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
#endif

class kmp_hw_thread_t {
public:
static const int UNKNOWN_ID = -1;
static const int MULTIPLE_ID = -2;
static int compare_ids(const void *a, const void *b);
static int compare_compact(const void *a, const void *b);
int ids[KMP_HW_LAST];
Expand Down

0 comments on commit f4cce0f

Please sign in to comment.