-
Notifications
You must be signed in to change notification settings - Fork 115
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf bench: Add futex-hash microbenchmark
Introduce futexes to perf-bench and add a program that stresses and measures the kernel's implementation of the hash table. This is a multi-threaded program that simply measures the amount of failed futex wait calls - we only want to deal with the hashing overhead, so a negative return of futex_wait_setup() is enough to do the trick. An example run: $ perf bench futex hash -t 32 Run summary [PID 10989]: 32 threads, each operating on 1024 [private] futexes for 10 secs. [thread 0] futexes: 0x19d9b10 ... 0x19dab0c [ 418713 ops/sec ] [thread 1] futexes: 0x19daca0 ... 0x19dbc9c [ 469913 ops/sec ] [thread 2] futexes: 0x19dbe30 ... 0x19dce2c [ 479744 ops/sec ] ... [thread 31] futexes: 0x19fbb80 ... 0x19fcb7c [ 464179 ops/sec ] Averaged 454310 operations/sec (+- 0.84%), total secs = 10 Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Acked-by: Darren Hart <dvhart@linux.intel.com> Cc: Aswin Chandramouleeswaran <aswin@hp.com> Cc: Darren Hart <dvhart@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Low <jason.low2@hp.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Scott J Norton <scott.norton@hp.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Waiman Long <Waiman.Long@hp.com> Link: http://lkml.kernel.org/r/1387081917-9102-2-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
- Loading branch information
Showing
6 changed files
with
277 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
/* | ||
* Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com> | ||
* | ||
* futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing. | ||
* | ||
* This program is particularly useful for measuring the kernel's futex hash | ||
* table/function implementation. In order for it to make sense, use with as | ||
* many threads and futexes as possible. | ||
*/ | ||
|
||
#include "../perf.h" | ||
#include "../util/util.h" | ||
#include "../util/stat.h" | ||
#include "../util/parse-options.h" | ||
#include "../util/header.h" | ||
#include "bench.h" | ||
#include "futex.h" | ||
|
||
#include <err.h> | ||
#include <stdlib.h> | ||
#include <sys/time.h> | ||
#include <pthread.h> | ||
|
||
static unsigned int nthreads = 0; | ||
static unsigned int nsecs = 10; | ||
/* amount of futexes per thread */ | ||
static unsigned int nfutexes = 1024; | ||
static bool fshared = false, done = false, silent = false; | ||
|
||
struct timeval start, end, runtime; | ||
static pthread_mutex_t thread_lock; | ||
static unsigned int threads_starting; | ||
static struct stats throughput_stats; | ||
static pthread_cond_t thread_parent, thread_worker; | ||
|
||
struct worker { | ||
int tid; | ||
u_int32_t *futex; | ||
pthread_t thread; | ||
unsigned long ops; | ||
}; | ||
|
||
static const struct option options[] = { | ||
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), | ||
OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), | ||
OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"), | ||
OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), | ||
OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), | ||
OPT_END() | ||
}; | ||
|
||
static const char * const bench_futex_hash_usage[] = { | ||
"perf bench futex hash <options>", | ||
NULL | ||
}; | ||
|
||
static void *workerfn(void *arg) | ||
{ | ||
int ret; | ||
unsigned int i; | ||
struct worker *w = (struct worker *) arg; | ||
|
||
pthread_mutex_lock(&thread_lock); | ||
threads_starting--; | ||
if (!threads_starting) | ||
pthread_cond_signal(&thread_parent); | ||
pthread_cond_wait(&thread_worker, &thread_lock); | ||
pthread_mutex_unlock(&thread_lock); | ||
|
||
do { | ||
for (i = 0; i < nfutexes; i++, w->ops++) { | ||
/* | ||
* We want the futex calls to fail in order to stress | ||
* the hashing of uaddr and not measure other steps, | ||
* such as internal waitqueue handling, thus enlarging | ||
* the critical region protected by hb->lock. | ||
*/ | ||
ret = futex_wait(&w->futex[i], 1234, NULL, | ||
fshared ? 0 : FUTEX_PRIVATE_FLAG); | ||
if (!silent && | ||
(!ret || errno != EAGAIN || errno != EWOULDBLOCK)) | ||
warn("Non-expected futex return call"); | ||
} | ||
} while (!done); | ||
|
||
return NULL; | ||
} | ||
|
||
static void toggle_done(int sig __maybe_unused, | ||
siginfo_t *info __maybe_unused, | ||
void *uc __maybe_unused) | ||
{ | ||
/* inform all threads that we're done for the day */ | ||
done = true; | ||
gettimeofday(&end, NULL); | ||
timersub(&end, &start, &runtime); | ||
} | ||
|
||
static void print_summary(void) | ||
{ | ||
unsigned long avg = avg_stats(&throughput_stats); | ||
double stddev = stddev_stats(&throughput_stats); | ||
|
||
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", | ||
!silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), | ||
(int) runtime.tv_sec); | ||
} | ||
|
||
int bench_futex_hash(int argc, const char **argv, | ||
const char *prefix __maybe_unused) | ||
{ | ||
int ret = 0; | ||
cpu_set_t cpu; | ||
struct sigaction act; | ||
unsigned int i, ncpus; | ||
pthread_attr_t thread_attr; | ||
struct worker *worker = NULL; | ||
|
||
argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); | ||
if (argc) { | ||
usage_with_options(bench_futex_hash_usage, options); | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
ncpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
|
||
sigfillset(&act.sa_mask); | ||
act.sa_sigaction = toggle_done; | ||
sigaction(SIGINT, &act, NULL); | ||
|
||
if (!nthreads) /* default to the number of CPUs */ | ||
nthreads = ncpus; | ||
|
||
worker = calloc(nthreads, sizeof(*worker)); | ||
if (!worker) | ||
goto errmem; | ||
|
||
printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", | ||
getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs); | ||
|
||
init_stats(&throughput_stats); | ||
pthread_mutex_init(&thread_lock, NULL); | ||
pthread_cond_init(&thread_parent, NULL); | ||
pthread_cond_init(&thread_worker, NULL); | ||
|
||
threads_starting = nthreads; | ||
pthread_attr_init(&thread_attr); | ||
gettimeofday(&start, NULL); | ||
for (i = 0; i < nthreads; i++) { | ||
worker[i].tid = i; | ||
worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); | ||
if (!worker[i].futex) | ||
goto errmem; | ||
|
||
CPU_ZERO(&cpu); | ||
CPU_SET(i % ncpus, &cpu); | ||
|
||
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); | ||
if (ret) | ||
err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); | ||
|
||
ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, | ||
(void *)(struct worker *) &worker[i]); | ||
if (ret) | ||
err(EXIT_FAILURE, "pthread_create"); | ||
|
||
} | ||
pthread_attr_destroy(&thread_attr); | ||
|
||
pthread_mutex_lock(&thread_lock); | ||
while (threads_starting) | ||
pthread_cond_wait(&thread_parent, &thread_lock); | ||
pthread_cond_broadcast(&thread_worker); | ||
pthread_mutex_unlock(&thread_lock); | ||
|
||
sleep(nsecs); | ||
toggle_done(0, NULL, NULL); | ||
|
||
for (i = 0; i < nthreads; i++) { | ||
ret = pthread_join(worker[i].thread, NULL); | ||
if (ret) | ||
err(EXIT_FAILURE, "pthread_join"); | ||
} | ||
|
||
/* cleanup & report results */ | ||
pthread_cond_destroy(&thread_parent); | ||
pthread_cond_destroy(&thread_worker); | ||
pthread_mutex_destroy(&thread_lock); | ||
|
||
for (i = 0; i < nthreads; i++) { | ||
unsigned long t = worker[i].ops/runtime.tv_sec; | ||
update_stats(&throughput_stats, t); | ||
if (!silent) { | ||
if (nfutexes == 1) | ||
printf("[thread %2d] futex: %p [ %ld ops/sec ]\n", | ||
worker[i].tid, &worker[i].futex[0], t); | ||
else | ||
printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n", | ||
worker[i].tid, &worker[i].futex[0], | ||
&worker[i].futex[nfutexes-1], t); | ||
} | ||
|
||
free(worker[i].futex); | ||
} | ||
|
||
print_summary(); | ||
|
||
free(worker); | ||
return ret; | ||
errmem: | ||
err(EXIT_FAILURE, "calloc"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
* Glibc independent futex library for testing kernel functionality. | ||
* Shamelessly stolen from Darren Hart <dvhltc@us.ibm.com> | ||
* http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/ | ||
*/ | ||
|
||
#ifndef _FUTEX_H | ||
#define _FUTEX_H | ||
|
||
#include <unistd.h> | ||
#include <sys/syscall.h> | ||
#include <sys/types.h> | ||
#include <linux/futex.h> | ||
|
||
/** | ||
* futex() - SYS_futex syscall wrapper | ||
* @uaddr: address of first futex | ||
* @op: futex op code | ||
* @val: typically expected value of uaddr, but varies by op | ||
* @timeout: typically an absolute struct timespec (except where noted | ||
* otherwise). Overloaded by some ops | ||
* @uaddr2: address of second futex for some ops\ | ||
* @val3: varies by op | ||
* @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG | ||
* | ||
* futex() is used by all the following futex op wrappers. It can also be | ||
* used for misuse and abuse testing. Generally, the specific op wrappers | ||
* should be used instead. It is a macro instead of an static inline function as | ||
* some of the types over overloaded (timeout is used for nr_requeue for | ||
* example). | ||
* | ||
* These argument descriptions are the defaults for all | ||
* like-named arguments in the following wrappers except where noted below. | ||
*/ | ||
#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ | ||
syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) | ||
|
||
/** | ||
* futex_wait() - block on uaddr with optional timeout | ||
* @timeout: relative timeout | ||
*/ | ||
static inline int | ||
futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags) | ||
{ | ||
return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); | ||
} | ||
|
||
#endif /* _FUTEX_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters