summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@mit.edu>2011-07-25 01:12:00 -0400
committerAndy Lutomirski <luto@mit.edu>2011-07-25 01:12:00 -0400
commit2158c72041b7aadf0952285bbb0e8b81beb2e0da (patch)
tree36f92ad15b285a754140001ab7cabd4bb96a481a
parentc16e2637a871527ac0f1c3790f8cf96cb41dfcb0 (diff)
downloadmisc-tests-2158c72041b7aadf0952285bbb0e8b81beb2e0da.tar.gz
Add context_switch_latency
-rw-r--r--.gitignore1
-rw-r--r--Makefile7
-rw-r--r--context_switch_latency.c118
-rw-r--r--test_vsyscall.cc72
4 files changed, 178 insertions, 20 deletions
diff --git a/.gitignore b/.gitignore
index 81d09d8..dd0fce7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@ timing_test
time-warp-test
evil-clock-test
test_vsyscall
+context_switch_latency
*~
diff --git a/Makefile b/Makefile
index ba00577..98a1f21 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
.PHONY: all
-all: timing_test evil-clock-test test_vsyscall dump-vdso dump-vsyscall
+all: timing_test evil-clock-test test_vsyscall dump-vdso dump-vsyscall context_switch_latency
timing_test: timing_test.cc
g++ -o $@ -lrt -ldl -O2 -Wall -g $^
@@ -8,10 +8,13 @@ evil-clock-test: evil-clock-test.cc
g++ -o $@ -pthread -lrt -O2 -Wall -g $^
test_vsyscall: test_vsyscall.cc
- g++ -o $@ -std=gnu++0x -lrt -ldl -O2 -Wall -g $^
+ g++ -o $@ -std=gnu++0x -lrt -ldl -O2 -Wall -mavx -g $^
dump-vdso: dump-vdso.c
gcc -o $@ -ldl -O2 $^
dump-vsyscall: dump-vsyscall.c
gcc -o $@ -ldl -O2 $^
+
+context_switch_latency: context_switch_latency.c
+ g++ -o $@ -pthread -lrt -O2 -Wall -g $^
diff --git a/context_switch_latency.c b/context_switch_latency.c
new file mode 100644
index 0000000..522ba5b
--- /dev/null
+++ b/context_switch_latency.c
@@ -0,0 +1,118 @@
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/eventfd.h>
+
+int to_thread, from_thread;
+volatile int state; // 0 = warmup. 1 = benchmark. 2 = exit.
+
+int use_xstate = 0;
+
+void maybe_use_xstate(void)
+{
+ if (use_xstate)
+ asm volatile ("pxor %%xmm0, %%xmm0" : : : "xmm0");
+}
+
+void *threadproc(void *x)
+{
+ while(1)
+ {
+ uint64_t buf;
+
+ if (read(to_thread, &buf, 8) != 8)
+ abort();
+
+ int s = state;
+ if (s == 2)
+ return 0;
+ else if (1 || s == 0)
+ maybe_use_xstate();
+
+ buf = 1;
+ if (write(from_thread, &buf, 8) != 8)
+ abort();
+ }
+}
+
+void bounce()
+{
+ uint64_t buf = 1;
+ if (write(to_thread, &buf, 8) != 8)
+ abort();
+
+ if (read(from_thread, &buf, 8) != 8)
+ abort();
+}
+
+void killit()
+{
+ uint64_t buf = 1;
+ state = 2;
+ if (write(to_thread, &buf, 8) != 8)
+ abort();
+}
+
+int main(int argc, char **argv)
+{
+ struct timespec start, end;
+ pthread_t thread;
+ uint64_t i, iters = 100000;
+ uint64_t ns;
+
+ if (argc != 2) {
+ printf("Usage: %s <0|1>\n\nSet the parameter to 1 to use xstate\n",
+ argv[0]);
+ return 1;
+ } else {
+ if (!strcmp(argv[1], "1"))
+ use_xstate = 1;
+ else if (strcmp(argv[1], "0"))
+ abort();
+ }
+
+ printf("use_xstate = %d\n", use_xstate);
+
+ to_thread = eventfd(0, 0);
+ if (to_thread < 0)
+ abort();
+ from_thread = eventfd(0, 0);
+ if (from_thread < 0)
+ abort();
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ abort();
+
+ /* Warm up (and burn the xstate heuristic) */
+ for(i = 0; i < 10000; i++) {
+ bounce();
+ maybe_use_xstate();
+ }
+
+ /* Clear out the xstate hack. */
+ state = 1;
+ bounce();
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (i = 0; i < iters; i++) {
+ bounce();
+ maybe_use_xstate();
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ killit();
+
+ ns = 1000000000ULL * (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec);
+
+ printf("%llu iters at %.1f ns/iter\n",
+ (unsigned long long)iters, (double)ns / iters);
+
+ pthread_join(thread, 0);
+
+ return 0;
+}
diff --git a/test_vsyscall.cc b/test_vsyscall.cc
index cbb1fe7..0d29af3 100644
--- a/test_vsyscall.cc
+++ b/test_vsyscall.cc
@@ -14,6 +14,11 @@
#include <asm/ldt.h>
#include <errno.h>
+static int sys_arch_prctl(long a, long b)
+{
+ return syscall(__NR_arch_prctl, a, b);
+}
+
static inline int modify_ldt(int mode, void *ptr, unsigned long size)
{
int ret = syscall(__NR_modify_ldt, mode, ptr, size);
@@ -200,26 +205,57 @@ int test(int argc, char **argv)
int bench(int argc, char **argv)
{
- struct timeval tv;
- struct timezone tz;
- benchmark(" syscall gettimeofday", [&]{sys_gtod(&tv, &tz);});
- benchmark(" vdso gettimeofday", [&]{vdso_gtod(&tv, &tz);});
- benchmark("vsyscall gettimeofday", [&]{vgtod(&tv, &tz);});
-
- printf("\n");
- time_t t;
- benchmark(" syscall time ", [&]{sys_time(&t);});
- if (vdso_time)
- benchmark(" vdso time ", [&]{vdso_time(&t);});
- benchmark("vsyscall time ", [&]{vtime(&t);});
+ benchmark("dummy syscall ", [&]{syscall(0xffffffff);});
+ benchmark("dummy prctl ", [&]{sys_arch_prctl(0xffffffff, 0);});
+
+ benchmark("kernel_fpu_begin ", [&]{sys_arch_prctl(1000, 0);});
+ benchmark("kernel_fpu_begin + ymm", [&]{
+ sys_arch_prctl(1000, 0);
+ asm volatile("vzeroupper");
+ });
- printf("\n");
- unsigned cpu, node;
- benchmark(" vdso getcpu ", [&]{vdso_getcpu(&cpu, &node, 0);});
- benchmark("vsyscall getcpu ", [&]{vgetcpu(&cpu, &node, 0);});
+ benchmark("stts/clts x1000 ", [&]{sys_arch_prctl(1001, 0);});
+
+ long one = 1;
+
+ char *xstate;
+ if (posix_memalign((void**)&xstate, 64, 4096)) {
+ perror("posix_memalign");
+ return 1;
+ }
- printf("\n");
- benchmark("dummy syscall ", [&]{syscall(0xffffffff);});
+ asm volatile ("vbroadcastsd %0, %%ymm0" : : "m" (one) : "xmm0");
+ benchmark("xsave+clflush+restore ymm ", [&]{
+ asm volatile("xsaveq %0\n\tclflush %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
+ benchmark("xsave+restore ymm ", [&]{
+ asm volatile("xsaveq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
+ benchmark("xsave+2restore ymm ", [&]{
+ asm volatile("xsaveq %0\n\txrstorq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
+ benchmark("2xsave+restore ymm ", [&]{
+ asm volatile("xsaveq %0\n\txsaveq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
+ benchmark("xsaveopt+restore ymm ", [&]{
+ asm volatile("xsaveoptq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
+
+ asm volatile ("vbroadcastsd %0, %%ymm0; vzeroupper" : : "m" (one) : "xmm0");
+ benchmark("xsave+restore xmm ", [&]{
+ asm volatile("xsaveq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
+ benchmark("xsaveopt+restore xmm ", [&]{
+ asm volatile("xsaveoptq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
+
+ asm volatile ("vzeroall" : : "m" (one) : "xmm0");
+ benchmark("xsave+restore zero ", [&]{
+ asm volatile("xsaveq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
+ benchmark("xsaveopt+restore zero ", [&]{
+ asm volatile("xsaveoptq %0\n\txrstorq %0" : "=m" (*xstate) : "a" (0xffffffff), "d" (0xffffffff));
+ });
return 0;
}