Skip to content

Commit

Permalink
perf bench: Fix numa bench to fix usage of affinity for machines with…
Browse files Browse the repository at this point in the history
… #CPUs > 1K

The 'perf bench numa' testcase fails on systems with more than 1K CPUs.

Testcase: perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp  1

Snippet of code:

  <<>>
  perf: bench/numa.c:302: bind_to_node: Assertion `!(ret)' failed.
  Aborted (core dumped)
  <<>>

bind_to_node() uses "sched_getaffinity" to save the original cpumask and
this call is returning EINVAL ((invalid argument).

This happens because the default mask size in glibc is 1024.  To
overcome this 1024 CPUs mask size limitation of cpu_set_t, change the
mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate
cpumask, CPU_ALLOC_SIZE for size.

Apart from fixing this for "orig_mask", apply same logic to "mask" as
well which is used to setaffinity so that mask size is large enough to
represent number of possible CPU's in the system.

sched_getaffinity is used in one more place in perf numa bench. It is in
"bind_to_cpu" function. Apply the same logic there also. Though
currently no failure is reported from there, it is ideal to change
getaffinity to work with such system configurations having CPU's more
than default mask size supported by glibc.

Also fix "sched_setaffinity" to use mask size which is large enough to
represent number of possible CPU's in the system.

Fixed all places where "bind_cpumask" which is part of "struct
thread_data" is used such that bind_cpumask works in all configuration.

Reported-by: Disha Goel <disgoel@linux.vnet.ibm.com>
Signed-off-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nageswara R Sastry <rnsastry@linux.ibm.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lore.kernel.org/r/20220412164059.42654-3-atrajeev@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
athira-rajeev authored and acmel committed Apr 14, 2022
1 parent 8cb7a18 commit f58faed
Showing 1 changed file with 95 additions and 33 deletions.
128 changes: 95 additions & 33 deletions tools/perf/bench/numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

struct thread_data {
int curr_cpu;
cpu_set_t bind_cpumask;
cpu_set_t *bind_cpumask;
int bind_node;
u8 *process_data;
int process_nr;
Expand Down Expand Up @@ -267,71 +267,115 @@ static bool node_has_cpus(int node)
return ret;
}

static cpu_set_t bind_to_cpu(int target_cpu)
static cpu_set_t *bind_to_cpu(int target_cpu)
{
cpu_set_t orig_mask, mask;
int ret;
int nrcpus = numa_num_possible_cpus();
cpu_set_t *orig_mask, *mask;
size_t size;

ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
orig_mask = CPU_ALLOC(nrcpus);
BUG_ON(!orig_mask);
size = CPU_ALLOC_SIZE(nrcpus);
CPU_ZERO_S(size, orig_mask);

if (sched_getaffinity(0, size, orig_mask))
goto err_out;

mask = CPU_ALLOC(nrcpus);
if (!mask)
goto err_out;

CPU_ZERO(&mask);
CPU_ZERO_S(size, mask);

if (target_cpu == -1) {
int cpu;

for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
CPU_SET_S(cpu, size, mask);
} else {
BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
CPU_SET(target_cpu, &mask);
if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
goto err;

CPU_SET_S(target_cpu, size, mask);
}

ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
if (sched_setaffinity(0, size, mask))
goto err;

return orig_mask;

err:
CPU_FREE(mask);
err_out:
CPU_FREE(orig_mask);

/* BUG_ON due to failure in allocation of orig_mask/mask */
BUG_ON(-1);
}

static cpu_set_t bind_to_node(int target_node)
static cpu_set_t *bind_to_node(int target_node)
{
cpu_set_t orig_mask, mask;
int nrcpus = numa_num_possible_cpus();
size_t size;
cpu_set_t *orig_mask, *mask;
int cpu;
int ret;

ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
orig_mask = CPU_ALLOC(nrcpus);
BUG_ON(!orig_mask);
size = CPU_ALLOC_SIZE(nrcpus);
CPU_ZERO_S(size, orig_mask);

if (sched_getaffinity(0, size, orig_mask))
goto err_out;

mask = CPU_ALLOC(nrcpus);
if (!mask)
goto err_out;

CPU_ZERO(&mask);
CPU_ZERO_S(size, mask);

if (target_node == NUMA_NO_NODE) {
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
CPU_SET_S(cpu, size, mask);
} else {
struct bitmask *cpumask = numa_allocate_cpumask();

BUG_ON(!cpumask);
if (!cpumask)
goto err;

if (!numa_node_to_cpus(target_node, cpumask)) {
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
if (numa_bitmask_isbitset(cpumask, cpu))
CPU_SET(cpu, &mask);
CPU_SET_S(cpu, size, mask);
}
}
numa_free_cpumask(cpumask);
}

ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
if (sched_setaffinity(0, size, mask))
goto err;

return orig_mask;

err:
CPU_FREE(mask);
err_out:
CPU_FREE(orig_mask);

/* BUG_ON due to failure in allocation of orig_mask/mask */
BUG_ON(-1);
}

static void bind_to_cpumask(cpu_set_t mask)
static void bind_to_cpumask(cpu_set_t *mask)
{
int ret;
size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());

ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
ret = sched_setaffinity(0, size, mask);
if (ret) {
CPU_FREE(mask);
BUG_ON(ret);
}
}

static void mempol_restore(void)
Expand Down Expand Up @@ -377,7 +421,7 @@ do { \
static u8 *alloc_data(ssize_t bytes0, int map_flags,
int init_zero, int init_cpu0, int thp, int init_random)
{
cpu_set_t orig_mask;
cpu_set_t *orig_mask = NULL;
ssize_t bytes;
u8 *buf;
int ret;
Expand Down Expand Up @@ -435,6 +479,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
/* Restore affinity: */
if (init_cpu0) {
bind_to_cpumask(orig_mask);
CPU_FREE(orig_mask);
mempol_restore();
}

Expand Down Expand Up @@ -595,6 +640,7 @@ static int parse_setup_cpu_list(void)
BUG_ON(bind_cpu_0 > bind_cpu_1);

for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int i;

for (i = 0; i < mul; i++) {
Expand All @@ -614,10 +660,15 @@ static int parse_setup_cpu_list(void)
tprintf("%2d", bind_cpu);
}

CPU_ZERO(&td->bind_cpumask);
td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
BUG_ON(!td->bind_cpumask);
CPU_ZERO_S(size, td->bind_cpumask);
for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
CPU_SET(cpu, &td->bind_cpumask);
if (cpu < 0 || cpu >= g->p.nr_cpus) {
CPU_FREE(td->bind_cpumask);
BUG_ON(-1);
}
CPU_SET_S(cpu, size, td->bind_cpumask);
}
t++;
}
Expand Down Expand Up @@ -1245,7 +1296,7 @@ static void *worker_thread(void *__tdata)
* by migrating to CPU#0:
*/
if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
cpu_set_t orig_mask;
cpu_set_t *orig_mask;
int target_cpu;
int this_cpu;

Expand All @@ -1269,6 +1320,7 @@ static void *worker_thread(void *__tdata)
printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);

bind_to_cpumask(orig_mask);
CPU_FREE(orig_mask);
}

if (details >= 3) {
Expand Down Expand Up @@ -1402,21 +1454,31 @@ static void init_thread_data(void)

for (t = 0; t < g->p.nr_tasks; t++) {
struct thread_data *td = g->threads + t;
size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int cpu;

/* Allow all nodes by default: */
td->bind_node = NUMA_NO_NODE;

/* Allow all CPUs by default: */
CPU_ZERO(&td->bind_cpumask);
td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
BUG_ON(!td->bind_cpumask);
CPU_ZERO_S(cpuset_size, td->bind_cpumask);
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &td->bind_cpumask);
CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
}
}

static void deinit_thread_data(void)
{
ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
int t;

/* Free the bind_cpumask allocated for thread_data */
for (t = 0; t < g->p.nr_tasks; t++) {
struct thread_data *td = g->threads + t;
CPU_FREE(td->bind_cpumask);
}

free_data(g->threads, size);
}
Expand Down

0 comments on commit f58faed

Please sign in to comment.