diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2016-05-05 20:44:06 +0000 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2016-10-06 18:30:51 +0200 |
commit | 1702c483ff220f873f290f3c4760bf8d55c34b44 (patch) | |
tree | 2dbd1ddd93379c894e97108817087809764ae591 | |
parent | d3aed406b518503467816f3a80133d9afb7f4d37 (diff) | |
download | linux-futex-futex_v12_unique_ids.tar.gz |
perf/bench/futex-hash: Support NUMAfutex_v12_unique_ids
This adds a new option to tell perf on which numa node the hash benchmark
should run. If set then
- The test is bound to the node
- Memory is allocated on the local NUMA node
- The threads are bound to the cpus on the node
The NUMA node can be specified by the -n argument.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | tools/perf/bench/Build | 4 | ||||
-rw-r--r-- | tools/perf/bench/futex-hash.c | 118 |
2 files changed, 109 insertions, 13 deletions
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 60bf119430479..9e6e518d7d623 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,3 +1,7 @@ +ifdef CONFIG_NUMA +CFLAGS_futex-hash.o += -DCONFIG_NUMA=1 +endif + perf-y += sched-messaging.o perf-y += sched-pipe.o perf-y += mem-functions.o diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 8024cd5febd22..56818095d7903 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -25,6 +25,12 @@ #include <err.h> #include <sys/time.h> +#ifdef CONFIG_NUMA +#include <numa.h> +#endif + +#define FUTEX_ATTACH 13 +#define FUTEX_ATTACHED 512 static unsigned int nthreads = 0; static unsigned int nsecs = 10; @@ -32,6 +38,7 @@ static unsigned int nsecs = 10; static unsigned int nfutexes = 1024; static bool fshared = false, done = false, silent = false; static int futex_flag = 0; +static int numa_node = -1; struct timeval start, end, runtime; static pthread_mutex_t thread_lock; @@ -42,9 +49,10 @@ static pthread_cond_t thread_parent, thread_worker; struct worker { int tid; u_int32_t *futex; + u_int32_t *futex_id; pthread_t thread; unsigned long ops; -}; +} __attribute__((aligned(128))); static const struct option options[] = { OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), @@ -52,9 +60,28 @@ static const struct option options[] = { OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"), OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), +#ifdef CONFIG_NUMA + OPT_INTEGER( 'n', "numa", &numa_node, "Specify the NUMA node"), +#endif OPT_END() }; +#ifndef CONFIG_NUMA +static int numa_run_on_node(int node __maybe_unused) { return 0; } +static int numa_node_of_cpu(int node __maybe_unused) { return 0; } +static void *numa_alloc_local(size_t size) { return malloc(size); } +static void numa_free(void *p, size_t size __maybe_unused) { return free(p); } +#endif + +static bool cpu_is_local(int cpu) +{ + if (numa_node < 0) + return true; + if (numa_node_of_cpu(cpu) == numa_node) + return true; + return false; +} + static const char * const bench_futex_hash_usage[] = { "perf bench futex hash <options>", NULL @@ -81,7 +108,8 @@ static void *workerfn(void *arg) * such as internal waitqueue handling, thus enlarging * the critical region protected by hb->lock. */ - ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag); + ret = futex_wait((void *)(unsigned long)w->futex_id[i], + 1234, NULL, futex_flag); if (!silent && (!ret || errno != EAGAIN || errno != EWOULDBLOCK)) warn("Non-expected futex return call"); @@ -111,6 +139,11 @@ static void print_summary(void) (int) runtime.tv_sec); } +static int futex_attach(u_int32_t *uaddr, unsigned int opflags) +{ + return futex(uaddr, FUTEX_ATTACH, 0, 0, NULL, 0, opflags); +} + int bench_futex_hash(int argc, const char **argv, const char *prefix __maybe_unused) { @@ -120,6 +153,8 @@ int bench_futex_hash(int argc, const char **argv, unsigned int i, ncpus; pthread_attr_t thread_attr; struct worker *worker = NULL; + char *node_str = NULL; + unsigned int cpunum; argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); if (argc) { @@ -133,18 +168,50 @@ int bench_futex_hash(int argc, const char **argv, act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - if (!nthreads) /* default to the number of CPUs */ - nthreads = ncpus; + if (!nthreads) { + /* default to the number of CPUs per NUMA node */ + if (numa_node < 0) { + nthreads = ncpus; + } else { + for (i = 0; i < ncpus; i++) { + if (cpu_is_local(i)) + nthreads++; + } + if (!nthreads) + err(EXIT_FAILURE, "No online CPUs for this node"); + } + } else { + int cpu_available = 0; - worker = calloc(nthreads, sizeof(*worker)); + for (i = 0; i < ncpus && !cpu_available; i++) { + if (cpu_is_local(i)) + cpu_available = 1; + } + if (!cpu_available) + err(EXIT_FAILURE, "No online CPUs for this node"); + } + + if (numa_node >= 0) { + ret = numa_run_on_node(numa_node); + if (ret < 0) + err(EXIT_FAILURE, "numa_run_on_node"); + ret = asprintf(&node_str, " on node %d", numa_node); + if (ret < 0) + err(EXIT_FAILURE, "numa_node, asprintf"); + } + + worker = numa_alloc_local(nthreads * sizeof(*worker)); if (!worker) goto errmem; if (!fshared) - futex_flag = FUTEX_PRIVATE_FLAG; + futex_flag = FUTEX_PRIVATE_FLAG | FUTEX_ATTACHED; - printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", - getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs); + printf("Run summary [PID %d]: %d threads%s, each operating on %d [%s] futexes for %d secs.\n\n", + getpid(), nthreads, + node_str ? : "", + nfutexes, fshared ? "shared":"private", + nsecs); init_stats(&throughput_stats); pthread_mutex_init(&thread_lock, NULL); @@ -154,14 +221,39 @@ int bench_futex_hash(int argc, const char **argv, threads_starting = nthreads; pthread_attr_init(&thread_attr); gettimeofday(&start, NULL); - for (i = 0; i < nthreads; i++) { + for (cpunum = 0, i = 0; i < nthreads; i++, cpunum++) { + unsigned int f_init; + + do { + if (cpu_is_local(cpunum)) + break; + cpunum++; + if (cpunum > ncpus) + cpunum = 0; + } while (1); + worker[i].tid = i; - worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); + worker[i].futex = numa_alloc_local(nfutexes * + sizeof(*worker[i].futex)); if (!worker[i].futex) goto errmem; + worker[i].futex_id = numa_alloc_local(nfutexes * + sizeof(*worker[i].futex_id)); + if (!worker[i].futex_id) + goto errmem; + + if (futex_flag & FUTEX_ATTACHED) { + for (f_init = 0; f_init < nfutexes; f_init++) { + ret = futex_attach(&worker[i].futex[f_init], futex_flag); + if (ret < 0) + err(EXIT_FAILURE, "Can't attached futex cpu%d futex%d", + i, f_init); + worker[i].futex_id[f_init] = ret; + } + } CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_SET(cpunum % ncpus, &cpu); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); if (ret) @@ -208,12 +300,12 @@ int bench_futex_hash(int argc, const char **argv, &worker[i].futex[nfutexes-1], t); } - free(worker[i].futex); + numa_free(worker[i].futex, nfutexes * sizeof(*worker[i].futex)); } print_summary(); - free(worker); + numa_free(worker, nthreads * sizeof(*worker)); return ret; errmem: err(EXIT_FAILURE, "calloc"); |