diff options
author | Andy Lutomirski <luto@mit.edu> | 2011-07-25 01:12:00 -0400 |
---|---|---|
committer | Andy Lutomirski <luto@mit.edu> | 2011-07-25 01:12:00 -0400 |
commit | 2158c72041b7aadf0952285bbb0e8b81beb2e0da (patch) | |
tree | 36f92ad15b285a754140001ab7cabd4bb96a481a | |
parent | c16e2637a871527ac0f1c3790f8cf96cb41dfcb0 (diff) | |
download | misc-tests-2158c72041b7aadf0952285bbb0e8b81beb2e0da.tar.gz |
Add context_switch_latency
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | context_switch_latency.c | 118 | ||||
-rw-r--r-- | test_vsyscall.cc | 72 |
4 files changed, 178 insertions, 20 deletions
@@ -4,4 +4,5 @@ timing_test time-warp-test evil-clock-test test_vsyscall +context_switch_latency *~ @@ -1,5 +1,5 @@ .PHONY: all -all: timing_test evil-clock-test test_vsyscall dump-vdso dump-vsyscall +all: timing_test evil-clock-test test_vsyscall dump-vdso dump-vsyscall context_switch_latency timing_test: timing_test.cc g++ -o $@ -lrt -ldl -O2 -Wall -g $^ @@ -8,10 +8,13 @@ evil-clock-test: evil-clock-test.cc g++ -o $@ -pthread -lrt -O2 -Wall -g $^ test_vsyscall: test_vsyscall.cc - g++ -o $@ -std=gnu++0x -lrt -ldl -O2 -Wall -g $^ + g++ -o $@ -std=gnu++0x -lrt -ldl -O2 -Wall -mavx -g $^ dump-vdso: dump-vdso.c gcc -o $@ -ldl -O2 $^ dump-vsyscall: dump-vsyscall.c gcc -o $@ -ldl -O2 $^ + +context_switch_latency: context_switch_latency.c + g++ -o $@ -pthread -lrt -O2 -Wall -g $^ diff --git a/context_switch_latency.c b/context_switch_latency.c new file mode 100644 index 0000000..522ba5b --- /dev/null +++ b/context_switch_latency.c @@ -0,0 +1,118 @@ +#include <time.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> +#include <sys/eventfd.h> + +int to_thread, from_thread; +volatile int state; // 0 = warmup. 1 = benchmark. 2 = exit. + +int use_xstate = 0; + +void maybe_use_xstate(void) +{ + if (use_xstate) + asm volatile ("pxor %%xmm0, %%xmm0" : : : "xmm0"); +} + +void *threadproc(void *x) +{ + while(1) + { + uint64_t buf; + + if (read(to_thread, &buf, 8) != 8) + abort(); + + int s = state; + if (s == 2) + return 0; + else if (1 || s == 0) + maybe_use_xstate(); + + buf = 1; + if (write(from_thread, &buf, 8) != 8) + abort(); + } +} + +void bounce() +{ + uint64_t buf = 1; + if (write(to_thread, &buf, 8) != 8) + abort(); + + if (read(from_thread, &buf, 8) != 8) + abort(); +} + +void killit() +{ + uint64_t buf = 1; + state = 2; + if (write(to_thread, &buf, 8) != 8) + abort(); +} + +int main(int argc, char **argv) +{ + struct timespec start, end; + pthread_t thread; + uint64_t i, iters = 100000; + uint64_t ns; + + if (argc != 2) { + printf("Usage: %s <0|1>\n\nSet the parameter to 1 to use xstate\n", + argv[0]); + return 1; + } else { + if (!strcmp(argv[1], "1")) + use_xstate = 1; + else if (strcmp(argv[1], "0")) + abort(); + } + + printf("use_xstate = %d\n", use_xstate); + + to_thread = eventfd(0, 0); + if (to_thread < 0) + abort(); + from_thread = eventfd(0, 0); + if (from_thread < 0) + abort(); + + if (pthread_create(&thread, 0, threadproc, 0) != 0) + abort(); + + /* Warm up (and burn the xstate heuristic) */ + for(i = 0; i < 10000; i++) { + bounce(); + maybe_use_xstate(); + } + + /* Clear out the xstate hack. */ + state = 1; + bounce(); + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < iters; i++) { + bounce(); + maybe_use_xstate(); + } + + clock_gettime(CLOCK_MONOTONIC, &end); + + killit(); + + ns = 1000000000ULL * (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec); + + printf("%llu iters at %.1f ns/iter\n", + (unsigned long long)iters, (double)ns / iters); + + pthread_join(thread, 0); + + return 0; +} diff --git a/test_vsyscall.cc b/test_vsyscall.cc index cbb1fe7..0d29af3 100644 --- a/test_vsyscall.cc +++ b/test_vsyscall.cc @@ -14,6 +14,11 @@ #include <asm/ldt.h> #include <errno.h> +static int sys_arch_prctl(long a, long b) +{ + return syscall(__NR_arch_prctl, a, b); +} + static inline int modify_ldt(int mode, void *ptr, unsigned long size) { int ret = syscall(__NR_modify_ldt, mode, ptr, size); @@ -200,26 +205,57 @@ int test(int argc, char **argv) int bench(int argc, char **argv) { - struct timeval tv; - struct timezone tz; - benchmark(" syscall gettimeofday", [&]{sys_gtod(&tv, &tz);}); - benchmark(" vdso gettimeofday", [&]{vdso_gtod(&tv, &tz);}); - benchmark("vsyscall gettimeofday", [&]{vgtod(&tv, &tz);}); - - printf("\n"); - time_t t; - benchmark(" syscall time ", [&]{sys_time(&t);}); - if (vdso_time) - benchmark(" vdso time ", [&]{vdso_time(&t);}); - benchmark("vsyscall time ", [&]{vtime(&t);}); + benchmark("dummy syscall ", [&]{syscall(0xffffffff);}); + benchmark("dummy prctl ", [&]{sys_arch_prctl(0xffffffff, 0);}); + + benchmark("kernel_fpu_begin ", [&]{sys_arch_prctl(1000, 0);}); + benchmark("kernel_fpu_begin + ymm", [&]{ + sys_arch_prctl(1000, 0); + asm volatile("vzeroupper"); + }); - printf("\n"); - unsigned cpu, node; - benchmark(" vdso getcpu ", [&]{vdso_getcpu(&cpu, &node, 0);}); - benchmark("vsyscall getcpu ", [&]{vgetcpu(&cpu, &node, 0);}); + benchmark("stts/clts x1000 ", [&]{sys_arch_prctl(1001, 0);}); + + long one = 1; + + char *xstate; + if (posix_memalign((void**)&xstate, 64, 4096)) { + perror("posix_memalign"); + return 1; + } - printf("\n"); - benchmark("dummy syscall ", [&]{syscall(0xffffffff);}); + asm volatile ("vbroadcastsd %0, %%ymm0" : : "m" (one) : "xmm0"); + benchmark("xsave+clflush+restore ymm ", [&]{ + asm volatile("xsaveq %0\n\tclflush %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); + benchmark("xsave+restore ymm ", [&]{ + asm volatile("xsaveq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); + benchmark("xsave+2restore ymm ", [&]{ + asm volatile("xsaveq %0\n\txrstorq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); + benchmark("2xsave+restore ymm ", [&]{ + asm volatile("xsaveq %0\n\txsaveq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); + benchmark("xsaveopt+restore ymm ", [&]{ + asm volatile("xsaveoptq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); + + asm volatile ("vbroadcastsd %0, %%ymm0; vzeroupper" : : "m" (one) : "xmm0"); + benchmark("xsave+restore xmm ", [&]{ + asm volatile("xsaveq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); + benchmark("xsaveopt+restore xmm ", [&]{ + asm volatile("xsaveoptq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); + + asm volatile ("vzeroall" : : "m" (one) : "xmm0"); + benchmark("xsave+restore zero ", [&]{ + asm volatile("xsaveq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); + benchmark("xsaveopt+restore zero ", [&]{ + asm volatile("xsaveoptq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff)); + }); return 0; } |