#define __STDC_FORMAT_MACROS #include #include #include #include #include #include #include #include #include #include #include #include #include typedef int (*vgettime_t)(clockid_t, timespec *); typedef long (*vgetcpu_t)(unsigned *cpu, unsigned *node, void *unused); typedef int (*vgettime_specific_t)(timespec *); void describe_clock(const char *name, int id) { struct timespec res; int ret = clock_getres(id, &res); if (ret < 0) { printf(" %d (%s) [failed to query resolution]\n", id, name); } else { printf(" %d (%s) resolution = %" PRIu64 ".%09u\n", id, name, (uint64_t)res.tv_sec, (unsigned)res.tv_nsec); } } static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = handler; sa.sa_flags = SA_SIGINFO | flags; sigemptyset(&sa.sa_mask); if (sigaction(sig, &sa, 0)) err(1, "sigaction"); } static void sigusr1(int sig, siginfo_t *info, void *ctx_void) { } static void *empty_thread_proc(void *) { return NULL; } static void iret_to_self(void) { #ifndef __x86_64__ register void *__sp asm("esp"); asm volatile ( "pushl %%ss\n\t" "pushl %%esp\n\t" "addl $4, (%%esp)\n\t" "pushfl\n\t" "pushl %%cs\n\t" "pushl $1f\n\t" "iret\n\t" "1:" : "+r" (__sp) : : "cc"); #else register void *__sp asm("rsp"); unsigned long tmp; asm volatile ( "movq %%ss, %0\n\t" "pushq %0\n\t" "pushq %%rsp\n\t" "addq $8, (%%rsp)\n\t" "pushfq\n\t" "movq %%cs, %0\n\t" "pushq %0\n\t" "pushq $1f\n\t" "iretq\n\t" "1:" : "=r" (tmp), "+r" (__sp) : : "cc"); #endif } int main(int argc, char **argv) { if (argc < 3) { printf("Usage: time [POSIX clock id]\n"); printf("\nClocks are:\n"); describe_clock("CLOCK_REALTIME", CLOCK_REALTIME); describe_clock("CLOCK_MONOTONIC", CLOCK_MONOTONIC); describe_clock("CLOCK_REALTIME_COARSE", CLOCK_REALTIME_COARSE); describe_clock("CLOCK_MONOTONIC_COARSE", CLOCK_MONOTONIC_COARSE); return 1; } void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) printf("dlopen failed\n");; vgettime_t vgettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); if (!vgettime) printf("dlsym failed: %s", dlerror()); vgettime_specific_t vgettime_monotonic = (vgettime_specific_t)dlsym(vdso, "__vdso_clock_gettime_monotonic"); vgetcpu_t vgetcpu = (vgetcpu_t)dlsym(vdso, "__vdso_getcpu"); if (!vgetcpu) printf("dlsym failed: %s", dlerror()); long loop_mult = 1; char *loop_arg = strdup(argv[1]); size_t loop_arg_len = strlen(loop_arg); if (loop_arg_len && loop_arg[loop_arg_len-1] == 'k') { loop_mult = 1000; loop_arg[loop_arg_len-1] = '\0'; } else if (loop_arg_len && loop_arg[loop_arg_len-1] == 'M') { loop_mult = 1000000; loop_arg[loop_arg_len-1] = '\0'; } size_t loops = (size_t)atol(argv[1]) * loop_mult; free(loop_arg); clockid_t c = argc > 3 ? atoi(argv[3]) : 0; const char *mode = argv[2]; sethandler(SIGUSR1, sigusr1, 0); timespec start; clock_gettime(CLOCK_MONOTONIC, &start); timespec t; if (!strcmp(mode, "clock_gettime")) { for (size_t i = 0; i < loops; ++i) clock_gettime(c, &t); } else if (!strcmp(mode, "rdtsc")) { for (size_t i = 0; i < loops; ++i) { unsigned int a, d; asm volatile ("rdtsc" : "=a" (a), "=d" (d)); } } else if (!strcmp(mode, "lfence_rdtsc")) { for (size_t i = 0; i < loops; ++i) { unsigned int a, d; asm volatile ("lfence;rdtsc" : "=a" (a), "=d" (d)); } } else if (!strcmp(mode, "lfence_rdtsc_lfence")) { for (size_t i = 0; i < loops; ++i) { unsigned int a, d; asm volatile (""); asm volatile ("lfence;rdtsc;lfence" : "=a" (a), "=d" (d)); } } else if (!strcmp(mode, "mfence_rdtsc_mfence")) { for (size_t i = 0; i < loops; ++i) { unsigned int a, d; asm volatile ("mfence;rdtsc;mfence" : "=a" (a), "=d" (d)); } } else if (!strcmp(mode, "mfence")) { for (size_t i = 0; i < loops; ++i) { unsigned int a, d; asm volatile ("mfence" : "=a" (a), "=d" (d)); } } else if (!strcmp(mode, "sfence")) { for (size_t i = 0; i < loops; ++i) { unsigned int a, d; asm volatile ("sfence" : "=a" (a), "=d" (d)); } } else if (!strcmp(mode, "lock_addl")) { std::atomic x; for (size_t i = 0; i < loops; ++i) x += 2; } else if (!strcmp(mode, "lock_xchg")) { std::atomic x; for (size_t i = 0; i < loops; ++i) x.exchange(2); } else if (!strcmp(mode, "cmpxchg_mismatch")) { std::atomic x; for (size_t i = 0; i < loops; ++i) asm volatile ("cmpxchg %[newval], %[mem]" : [mem] "+m" (x) : "a" (1), [newval] "r" (2) : "flags"); } else if (!strcmp(mode, "cmpxchg_match")) { std::atomic x; for (size_t i = 0; i < loops/2; ++i) { asm volatile ("cmpxchg %[newval], %[mem]" : [mem] "+m" (x) : "a" (0), [newval] "r" (2) : "flags"); asm volatile ("cmpxchg %[newval], %[mem]" : [mem] "+m" (x) : "a" (2), [newval] "r" (0) : "flags"); } } else if (!strcmp(mode, "rdtscp")) { for (size_t i = 0; i < loops; ++i) { unsigned int a, c, d; asm volatile ("rdtscp" : "=a" (a), "=c" (c), "=d" (d)); } } else if (!strcmp(mode, "lsl15")) { for (size_t i = 0; i < loops; ++i) { uint16_t index = (15 << 3) + 3; uint32_t limit; asm volatile ("lsl %[index], %[limit]" : [limit] "=r" (limit) : [index] "r" (index) : "cc"); } } else if (!strcmp(mode, "lsl100")) { for (size_t i = 0; i < loops; ++i) { uint16_t index = (100 << 3) + 3; uint32_t limit; asm volatile ("lsl %[index], %[limit]" : [limit] "=r" (limit) : [index] "r" (index) : "cc"); } } else if (!strcmp(mode, "mov_to_ds")) { for (size_t i = 0; i < loops; ++i) asm volatile ("mov %0, %%ds" : : "rm" (0)); } else if (!strcmp(mode, "zero_gs")) { for (size_t i = 0; i < loops; ++i) asm volatile ("mov %0, %%gs" : : "rm" (0)); } else if (!strcmp(mode, "nonzero_gs")) { unsigned short sel; asm ("mov %%ss, %0" : "=rm" (sel)); for (size_t i = 0; i < loops; ++i) asm volatile ("mov %0, %%gs" : : "rm" (sel)); } else if (!strcmp(mode, "rdgsbase")) { asm volatile ("mov %0, %%gs" : : "rm" (0)); for (size_t i = 0; i < loops; ++i) { unsigned long base; asm volatile ("rdgsbase %0" : "=rm" (base)); } } else if (!strcmp(mode, "wrgsbase")) { asm volatile ("mov %0, %%gs" : : "rm" (0)); for (size_t i = 0; i < loops; ++i) asm volatile ("wrgsbase %0" : : "rm" (1)); } else if (!strcmp(mode, "rdwrgsbase")) { asm volatile ("mov %0, %%gs" : : "rm" (0)); for (size_t i = 0; i < loops; ++i) { unsigned long base; asm volatile ("rdgsbase %0" : "=rm" (base)); asm volatile ("wrgsbase %0" : : "rm" (base)); } } else if (!strcmp(mode, "xsave_legacy")) { struct state { unsigned char buf[65536] __attribute__ ((aligned (64))); } state; for (size_t i = 0; i < loops; ++i) { asm volatile ("xsave %0" : "+m" (state) : "a" (0x3), "d" (0)); } } else if (!strcmp(mode, "xsave_all")) { struct state { unsigned char buf[65536] __attribute__ ((aligned (64))); } state; unsigned long eax, edx; asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); for (size_t i = 0; i < loops; ++i) { asm volatile ("xsave %0" : "+m" (state) : "a" (eax), "d" (edx)); } } else if (!strcmp(mode, "xsave_bndcsr")) { struct state { unsigned char buf[65536] __attribute__ ((aligned (64))); } state; for (size_t i = 0; i < loops; ++i) { asm volatile ("xsave %0" : "+m" (state) : "a" (0x10), "d" (0)); } } else if (!strcmp(mode, "xsavec_bndcsr")) { struct state { unsigned char buf[65536] __attribute__ ((aligned (64))); } state; for (size_t i = 0; i < loops; ++i) { asm volatile ("xsavec %0" : "+m" (state) : "a" (0x10), "d" (0)); } } else if (!strcmp(mode, "sgdt")) { struct { unsigned short limit; unsigned long base; } __attribute__((packed)) val; for (size_t i = 0; i < loops; ++i) asm volatile ("sgdt %0" : "=m" (val)); } else if (!strcmp(mode, "gettimeofday")) { struct timeval tv; for (size_t i = 0; i < loops; ++i) gettimeofday(&tv, 0); } else if (!strcmp(mode, "sys_clock_gettime")) { for (size_t i = 0; i < loops; ++i) syscall(__NR_clock_gettime, c, &t); } else if (!strcmp(mode, "vclock_gettime")) { for (size_t i = 0; i < loops; ++i) vgettime(c, &t); } else if (!strcmp(mode, "vclock_gettime_monotonic")) { for (size_t i = 0; i < loops; ++i) vgettime_monotonic(&t); } else if (!strcmp(mode, "vgetcpu")) { unsigned cpu; for (size_t i = 0; i < loops; ++i) vgetcpu(&cpu, NULL, NULL); } else if (!strcmp(mode, "getpid")) { for (size_t i = 0; i < loops; ++i) syscall(SYS_getpid); } else if (!strcmp(mode, "sys_enosys")) { for (size_t i = 0; i < loops; ++i) syscall(0xffffffff, c, &t); } else if (!strcmp(mode, "rdpmc")) { // Unlikely to work. unsigned int eax, edx; unsigned int ecx = 0; for (size_t i = 0; i < loops; ++i) asm volatile ("rdpmc" : "=a" (eax), "=d" (edx) : "c" (ecx)); } else if (!strcmp(mode, "memcpy_2k")) { unsigned char src[2048] = {}, dst[2048]; for (size_t i = 0; i < loops; ++i) { asm volatile ("" : "=m" (*src) : "m" (*dst) : "memory"); memcpy(dst, src, 2048); } #ifdef __x86_64__ } else if (!strcmp(mode, "vsyscall_time")) { auto vsyscall_time = (long (*)(long *))0xffffffffff600400; for (size_t i = 0; i < loops; ++i) vsyscall_time(nullptr); #endif } else if (!strcmp(mode, "raise")) { for (size_t i = 0; i < loops; ++i) raise(SIGUSR1); #ifdef __x86_64__ } else if (!strcmp(mode, "arch_prctl_42")) { for (size_t i = 0; i < loops; ++i) syscall(SYS_arch_prctl, 42, 0); #endif } else if (!strcmp(mode, "pthread_create")) { pthread_t thread; for (size_t i = 0; i < loops; ++i) { if (pthread_create(&thread, NULL, empty_thread_proc, NULL)) err(1, "pthread_create"); pthread_join(thread, NULL); } } else if (!strcmp(mode, "iret_to_self")) { for (size_t i = 0; i < loops; ++i) iret_to_self(); } else if (!strcmp(mode, "cpuid")) { for (size_t i = 0; i < loops; ++i) { unsigned int ax = 1, cx = 0; asm volatile ("cpuid" : "+a" (ax), "+c" (cx) : : "ebx", "edx"); } } else { printf("Unknown mode %s\n", mode); return 1; } timespec end; clock_gettime(CLOCK_MONOTONIC, &end); unsigned long long duration = (end.tv_nsec - start.tv_nsec) + 1000000000ULL * (end.tv_sec - start.tv_sec); printf("%ld loops in %.5fs = %.2f nsec / loop\n", (long)loops, float(duration) * 1e-9, float(duration) / loops); if (duration == 0) printf("[WARN]\tThe apparent elapsed time was exactly 0. You have precision issues.\n"); return 0; }