summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@mit.edu>2011-04-18 15:06:07 -0400
committerAndy Lutomirski <luto@mit.edu>2011-04-18 15:06:07 -0400
commitffe62acad5d1f2a8703a5726a6b662d32346dca9 (patch)
tree790df9140c3106e44e7f9ec3488eed1150ad2884
downloadmisc-tests-ffe62acad5d1f2a8703a5726a6b662d32346dca9.tar.gz
Initial commit
-rw-r--r--.gitignore5
-rw-r--r--Makefile8
-rw-r--r--evil-clock-test.cc861
-rw-r--r--timing_test.cc114
4 files changed, 988 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9cb133e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+dump-vdso
+timing_test
+time-warp-test
+evil-clock-test
+*~
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5c2179b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,8 @@
+.PHONY: all
+all: timing_test evil-clock-test
+
+timing_test: timing_test.cc
+ g++ -o $@ -lrt -ldl -O2 -Wall -g $^
+
+evil-clock-test: evil-clock-test.cc
+ g++ -o $@ -pthread -lrt -O0 -Wall -g $^
diff --git a/evil-clock-test.cc b/evil-clock-test.cc
new file mode 100644
index 0000000..4f3dc73
--- /dev/null
+++ b/evil-clock-test.cc
@@ -0,0 +1,861 @@
+#define __STDC_LIMIT_MACROS
+#define __STDC_FORMAT_MACROS
+#include <stdio.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <inttypes.h>
+#include <argp.h>
+
+#define barrier() asm volatile ("" : : : "memory")
+#define mb() asm volatile ("mfence" : : : "memory")
+#define cacheline_aligned __attribute__((aligned(128)))
+#define ACCESS_ONCE(x) (*(volatile typeof(x)*)&x)
+
+#define MAX_THREADS 4
+int cpus[4] = {-1, -1, -1, -1};
+typedef uint64_t Time;
+
+int verbosity = 0;
+bool tests_specified = false;
+bool run_now_test, run_load_test, run_store_test, run_load3_test;
+enum ClockType { RDTSC_DEFAULT, RDTSC, RDTSCP, LFENCE_RDTSC, MFENCE_RDTSC, MONOTONIC };
+ClockType clocktype = RDTSC_DEFAULT;
+
+static char doc[] = "Evil clock test -- a program that tries to make the clock fail";
+
+static struct argp_option options[] = {
+ {"clock", 'c', "clock-type", 0, "Clock to use"},
+ {"cpus", 'p', "cpu-list", 0, "CPUs to use (comma-separated)"},
+ {"verbose", 'v', 0, 0, "Increase verbosity"},
+ {"now-test", 'N', 0, 0, "Run now test"},
+ {"load-3-test", '3', 0, 0, "Run load-3 test (needs three CPUs)"},
+ {"store-order-test", 'S', 0, 0, "Run store order test"},
+ {"load-order-test", 'L', 0, 0, "Run load order test"},
+ {0}
+};
+
+static error_t parse_opt(int key, char *arg, struct argp_state *state)
+{
+ switch(key) {
+ case 'v':
+ verbosity++;
+ break;
+
+ case ARGP_KEY_ARG:
+ return ARGP_ERR_UNKNOWN;
+
+ case 'p':
+ {
+ char *arg_copy = strdup(arg);
+ int i = 0;
+ while(const char *c = strsep(&arg_copy, ","))
+ {
+ if (i >= MAX_THREADS)
+ argp_error(state, "too many cpu entries");
+
+ char *end;
+ int val = strtol(c, &end, 10);
+ if (*end)
+ argp_error(state, "bogus cpu entry");
+
+ if (val < 0 || val >= CPU_SETSIZE)
+ argp_error(state, "bad cpu number");
+
+ cpus[i++] = val;
+ }
+ free(arg_copy);
+ }
+ break;
+
+ case 'N':
+ tests_specified = true;
+ run_now_test = true;
+ break;
+
+ case 'L':
+ tests_specified = true;
+ run_load_test = true;
+ break;
+
+ case 'S':
+ tests_specified = true;
+ run_store_test = true;
+ break;
+
+ case '3':
+ tests_specified = true;
+ run_load3_test = true;
+ break;
+
+ case 'c':
+ if (!strcmp(arg, "rdtsc_default")) {
+ clocktype = RDTSC_DEFAULT;
+ } else if (!strcmp(arg, "rdtsc")) {
+ clocktype = RDTSC;
+ } else if (!strcmp(arg, "rdtscp")) {
+ clocktype = RDTSCP;
+ } else if (!strcmp(arg, "lfence_rdtsc")) {
+ clocktype = LFENCE_RDTSC;
+ } else if (!strcmp(arg, "mfence_rdtsc")) {
+ clocktype = MFENCE_RDTSC;
+ } else if (!strcmp(arg, "monotonic")) {
+ clocktype = MONOTONIC;
+ } else {
+ fprintf(stderr, "Unknown clock type. Choices are:\n"
+ " rdtsc_default: RDTSC (autodetected for your CPU)\n"
+ " rdtsc: RDTSC (no barrier)\n"
+ " rdtscp: RDTSCP\n"
+ " lfence_rdtsc: LFENCE;RDTSC\n"
+ " mfence_rdtsc: MFENCE;RDTSC\n"
+ " monotonic: clock_gettime(CLOCK_MONOTONIC)\n"
+ "\n");
+ argp_usage(state);
+ }
+ break;
+
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+static struct argp argp = { options, parse_opt, 0, doc };
+
+static inline Time rdtsc_strict()
+{
+ // This version is ordered wrt previous stores.
+ Time ret;
+ asm volatile ("mfence\n\t"
+ "rdtsc\n\t"
+ "shl $0x20,%%rdx\n\t"
+ "or %%rdx,%%rax"
+ : "=a" (ret) : : "cc", "rdx", "memory");
+ return ret;
+}
+
+template<int clocktype>
+struct Clock;
+
+template<>
+struct Clock<(int)RDTSC>
+{
+ static inline Time read()
+ {
+ Time ret;
+ asm volatile ("rdtsc\n\t"
+ "shl $0x20,%%rdx\n\t"
+ "or %%rdx,%%rax"
+ : "=a" (ret) : : "cc", "rdx", "memory");
+ return ret;
+ }
+
+ static inline Time read_strict()
+ {
+ return rdtsc_strict();
+ }
+
+ enum { is_strict = 0 };
+};
+
+template<>
+struct Clock<(int)RDTSCP>
+{
+ static inline Time read()
+ {
+ Time ret;
+ asm volatile ("rdtscp\n\t"
+ "shl $0x20,%%rdx\n\t"
+ "or %%rdx,%%rax"
+ : "=a" (ret) : : "cc", "rdx", "rcx", "memory");
+ return ret;
+ }
+
+ static inline Time read_strict()
+ {
+ return rdtsc_strict();
+ }
+
+ enum { is_strict = 0 };
+};
+
+template<>
+struct Clock<(int)LFENCE_RDTSC>
+{
+ static inline Time read()
+ {
+ Time ret;
+ asm volatile ("lfence\n\t"
+ "rdtsc\n\t"
+ "shl $0x20,%%rdx\n\t"
+ "or %%rdx,%%rax"
+ : "=a" (ret) : : "cc", "rdx", "memory");
+ return ret;
+ }
+
+ static inline Time read_strict()
+ {
+ return rdtsc_strict();
+ }
+
+ enum { is_strict = 0 };
+};
+
+template<>
+struct Clock<(int)MFENCE_RDTSC>
+{
+ static inline Time read()
+ {
+ Time ret;
+ asm volatile ("mfence\n\t"
+ "rdtsc\n\t"
+ "shl $0x20,%%rdx\n\t"
+ "or %%rdx,%%rax"
+ : "=a" (ret) : : "cc", "rdx", "memory");
+ return ret;
+ }
+
+ static inline Time read_strict()
+ {
+ return rdtsc_strict();
+ }
+
+ enum { is_strict = 1 };
+};
+
+template<>
+struct Clock<(int)MONOTONIC>
+{
+ static inline Time read()
+ {
+ struct timespec t;
+ clock_gettime(CLOCK_MONOTONIC, &t);
+ return (uint64_t)t.tv_sec * 1000000000ULL + (uint64_t)t.tv_nsec;
+ }
+
+ static inline Time read_strict()
+ {
+ mb();
+ return read();
+ }
+
+ enum { is_strict = 0 };
+};
+
+class TestScorer
+{
+public:
+ TestScorer() : nsamples(0), worst_error(INT64_MIN), nfailures(0) {}
+
+ // Asserts that the first timestamp is <= the second.
+ void Compare(int thread1, Time t1, int thread2, Time t2)
+ {
+ int64_t error = (int64_t)(t1 - t2); // Negative is good.
+ if (error > worst_error)
+ worst_error = error;
+
+ if (error > 0)
+ nfailures++;
+ }
+
+ // Verify that nonzero entries in times1 prior to nonzero entries
+ // in times2 have lower or equal values.
+ void CompareArrays(int len,
+ int thread1, Time *times1,
+ int thread2, Time *times2)
+ {
+ Time t1max = 0, t2max = 0;
+ int idx1;
+ bool t1fresh = false;
+ for(int i = 0; i < len; i++)
+ {
+ // Consume one entry from times2
+ if (times2[i]) {
+ if (times2[i] < t2max) {
+ printf(" ERROR! Time2 went back by %"PRIu64"\n",
+ t2max - times2[i]);
+ worst_error = INT64_MAX;
+ } else {
+ t2max = times2[i];
+ }
+
+ // Check state
+ if (t1max && t2max && t1fresh) {
+ t1fresh = false;
+ nsamples++;
+ Compare(thread1, t1max, thread2, t2max);
+ }
+ }
+
+ // Consume one entry from times1
+ if (times1[i]) {
+ if (times1[i] < t1max) {
+ printf(" ERROR! Time1 went back by %"PRIu64"\n",
+ t1max - times1[i]);
+ worst_error = INT64_MAX;
+ } else {
+ t1max = times1[i];
+ idx1 = i;
+ t1fresh = true;
+ }
+ }
+ }
+ }
+
+ void Print()
+ {
+ if (nsamples == 0)
+ printf(" No data!\n");
+ else if (worst_error <= 0)
+ printf(" Passed with margin %" PRIi64 " (%"PRIu64" samples)\n",
+ -worst_error, nsamples);
+ else
+ printf(" Failed %" PRIu64 "/%" PRIu64 " times with worst error %" PRIi64 "\n",
+ nfailures, nsamples, worst_error);
+ }
+
+ uint64_t nsamples;
+ int64_t worst_error;
+
+private:
+ uint64_t nfailures;
+};
+
+class SequenceTest
+{
+public:
+ typedef void (SequenceTest::*ThreadProc)(int);
+ unsigned long cacheline_aligned seq;
+ volatile bool cacheline_aligned end;
+
+ uint64_t nsamples;
+ int64_t worst_error;
+
+ void Stop()
+ {
+ end = true;
+ for(int i = 0; i < nthreads; i++)
+ {
+ void *retval;
+ if (pthread_join(threads[i], &retval) != 0)
+ abort();
+ }
+ }
+
+ SequenceTest() : seq(1), end(false), nsamples(0), worst_error(INT64_MIN + 1)
+ {
+ next_start = 0;
+ nthreads = 0;
+ memset(finished, 0, sizeof(finished));
+ memset(last_start, 0, sizeof(last_start));
+ }
+
+private:
+ struct cacheline_aligned {
+ unsigned long next_start;
+ int nthreads;
+ unsigned long finished[MAX_THREADS];
+ unsigned long last_start[MAX_THREADS];
+ pthread_t threads[MAX_THREADS];
+ };
+
+ int cacheline_aligned padding;
+
+ struct ThreadProcInfo
+ {
+ SequenceTest *test;
+ int threadidx;
+ ThreadProc proc;
+ };
+ static void *RealThreadProc(void *info)
+ {
+ ThreadProcInfo tpi = *(ThreadProcInfo*)info;
+ delete (ThreadProcInfo*)info;
+ (tpi.test->*tpi.proc)(tpi.threadidx);
+ return 0;
+ }
+
+protected:
+ void StartThread(ThreadProc proc)
+ {
+ if (nthreads >= MAX_THREADS)
+ abort();
+
+ ThreadProcInfo *info = new ThreadProcInfo;
+ info->test = this;
+ info->proc = proc;
+ info->threadidx = nthreads;
+
+ pthread_attr_t attr;
+ pthread_attr_init(&attr);
+ if (cpus[nthreads] != -1) {
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpus[nthreads], &cpuset);
+ pthread_attr_setaffinity_np(&attr, sizeof(cpuset), &cpuset);
+ }
+
+ if (pthread_create(&threads[nthreads], &attr, RealThreadProc, info) != 0) {
+ printf("Failed to start thread\n");
+ exit(1);
+ }
+
+ pthread_attr_destroy(&attr);
+
+ nthreads++;
+ }
+
+ unsigned long WaitForStartSignal(int threadidx)
+ {
+ unsigned long ret;
+
+ // Wait until the start trigger is set.
+ while(ACCESS_ONCE(next_start) == last_start[threadidx] && !end)
+ ;
+
+ // And wait for the start signal.
+ do {
+ ret = ACCESS_ONCE(next_start);
+ } while (ACCESS_ONCE(seq) < ret && !end);
+
+ last_start[threadidx] = ret;
+
+ barrier();
+ return ret;
+ }
+
+ void MarkDone(int threadidx)
+ {
+ ACCESS_ONCE(finished[threadidx]) = last_start[threadidx];
+ }
+
+ unsigned long SendStartSignal()
+ {
+ ACCESS_ONCE(next_start) = ACCESS_ONCE(seq);
+ return next_start;
+ }
+
+ bool thread_done(int threadidx)
+ {
+ return ACCESS_ONCE(finished[threadidx]) == next_start;
+ }
+};
+
+/* Now test */
+
+template<typename ClockType>
+class NowTest : public SequenceTest
+{
+public:
+ void Start()
+ {
+ StartThread((ThreadProc)&NowTest::WriterThread);
+ StartThread((ThreadProc)&NowTest::ReaderThread);
+ }
+
+private:
+ volatile Time cacheline_aligned now;
+
+ void WriterThread(int threadidx)
+ {
+ while(!end) {
+ now = ClockType::read();
+ now = ClockType::read();
+ now = ClockType::read();
+ now = ClockType::read();
+ now = ClockType::read();
+ now = ClockType::read();
+ now = ClockType::read();
+ now = ClockType::read();
+ nsamples += 8; // Very approximate
+ }
+ }
+
+ void ReaderThread(int threadidx)
+ {
+ while(!end)
+ {
+ Time other_now = now;
+ barrier();
+ Time my_now = ClockType::read();
+
+ int64_t error = (int64_t)(other_now - my_now);
+ if (error > worst_error)
+ worst_error = error;
+ }
+ }
+};
+
+/* Subsequent load test */
+
+template<typename ClockType>
+class Load3Test : public SequenceTest
+{
+public:
+ void Start()
+ {
+ StartThread((ThreadProc)&Load3Test::WriterThread);
+ StartThread((ThreadProc)&Load3Test::LoadBeforeClock);
+ StartThread((ThreadProc)&Load3Test::LoadAfterClock);
+ }
+
+private:
+ enum { results_len = 1048576 };
+ unsigned long results_1[results_len], results_2[results_len];
+
+ void WriterThread(int threadidx)
+ {
+ unsigned long my_seq = 1;
+ while(!end)
+ {
+ /* Clear the initial state */
+ memset(results_1, 0, sizeof(results_1));
+ memset(results_2, 0, sizeof(results_2));
+
+ /* Start a new run */
+ SendStartSignal();
+
+ /* Run until finished */
+ while(!end && (!thread_done(1) || !thread_done(2)))
+ {
+ ACCESS_ONCE(seq) = ++my_seq;
+ }
+
+ if (end)
+ return;
+
+ TestScorer checker;
+ checker.CompareArrays(results_len, 2, results_2, 1, results_1);
+ if (verbosity >= 2)
+ checker.Print();
+
+ nsamples += checker.nsamples;
+ if (checker.worst_error > worst_error)
+ worst_error = checker.worst_error;
+ }
+ }
+
+ void LoadBeforeClock(int threadidx)
+ {
+ while(true)
+ {
+ unsigned long start = WaitForStartSignal(threadidx);
+ if (end)
+ return;
+
+ /* Go! */
+ while(!end) {
+ unsigned long seqval = ACCESS_ONCE(seq);
+ unsigned long clock = ClockType::read();
+
+ unsigned long idx = seqval - start;
+ if (idx >= results_len)
+ break;
+
+ results_1[idx] = clock;
+ }
+
+ MarkDone(threadidx);
+ }
+ }
+
+ void LoadAfterClock(int threadidx)
+ {
+ while(true)
+ {
+ unsigned long start = WaitForStartSignal(threadidx);
+ if (end)
+ return;
+
+ /* Go! */
+ while(!end) {
+ unsigned long clock = ClockType::read();
+ unsigned long seqval = ACCESS_ONCE(seq);
+
+ unsigned long idx = seqval - start;
+ if (idx >= results_len)
+ break;
+
+ results_2[idx] = clock;
+ }
+
+ MarkDone(threadidx);
+ }
+ }
+};
+
+/* Prior store test and load order test */
+
+template<typename ClockType, int is_load>
+class LoadStoreTest : public SequenceTest
+{
+public:
+ void Start()
+ {
+ StartThread((ThreadProc)&LoadStoreTest::WriterThread);
+ StartThread((ThreadProc)&LoadStoreTest::ReaderThread);
+ }
+
+private:
+ template<typename ClockType_, int is_load_>
+ struct read_for_store;
+ template<typename ClockType_>
+ struct read_for_store<ClockType_, 0>
+ {
+ static inline Time read() { return ClockType::read(); }
+ };
+ template<typename ClockType_>
+ struct read_for_store<ClockType_, 1>
+ {
+ static inline Time read() { return ClockType::read_strict(); }
+ };
+
+ enum { results_len = 1048576 };
+ unsigned long results_1[results_len], results_2[results_len];
+
+ void WriterThread(int threadidx)
+ {
+ unsigned long my_seq = 1;
+
+ while(!end)
+ {
+ /* Clear the initial state */
+ memset(results_1, 0, sizeof(results_1));
+ memset(results_2, 0, sizeof(results_2));
+
+ /* Start a new run */
+ unsigned long start = SendStartSignal();
+
+ /* Run until finished */
+ while(my_seq - start < results_len)
+ {
+ unsigned long idx, time;
+ ACCESS_ONCE(seq) = ++my_seq;
+ time = read_for_store<ClockType, is_load>::read();
+
+ idx = my_seq - start;
+ results_2[idx] = time;
+ }
+
+ /* Wait for other thread */
+ while(!thread_done(1) && !end)
+ ACCESS_ONCE(seq) = ++my_seq;
+
+ if (end)
+ return;
+
+ TestScorer checker;
+ checker.CompareArrays(results_len, 1, results_1, 2, results_2);
+ if (verbosity >= 2)
+ checker.Print();
+
+ nsamples += checker.nsamples;
+ if (checker.worst_error > worst_error)
+ worst_error = checker.worst_error;
+ }
+ }
+
+ void ReaderThread(int threadidx)
+ {
+ while(!end)
+ {
+ unsigned long start = WaitForStartSignal(threadidx);
+ if (end)
+ return;
+
+ /* Go! */
+ while(!end) {
+ unsigned long clock = ClockType::read();
+ unsigned long seqval = ACCESS_ONCE(seq);
+
+ unsigned long idx = seqval - start;
+ if (idx >= results_len)
+ break;
+
+ results_1[idx] = clock;
+ }
+
+ MarkDone(threadidx);
+ }
+ }
+};
+
+/* End of tests */
+
+template<typename ClockType>
+static void run()
+{
+ if (run_now_test) {
+ NowTest<ClockType> *t = new NowTest<ClockType>;
+ t->Start();
+ usleep(1000000);
+ t->Stop();
+
+ if (t->nsamples == 0)
+ printf("Now test got no data\n");
+ else if (t->worst_error > 0)
+ printf("Now test failed : worst error %"PRIi64" with %"PRIu64" samples\n",
+ t->worst_error, t->nsamples);
+ else
+ printf("Now test passed : margin %"PRIi64" with %"PRIu64" samples\n",
+ -t->worst_error, t->nsamples);
+
+ delete t;
+ }
+
+ if (run_load3_test) {
+ Load3Test<ClockType> *t = new Load3Test<ClockType>;
+ t->Start();
+ usleep(1000000);
+ t->Stop();
+
+ if (t->nsamples == 0)
+ printf("Load3 test got no data\n");
+ else if (t->worst_error > 0)
+ printf("Load3 test failed: worst error %"PRIi64" with %"PRIu64" samples\n",
+ t->worst_error, t->nsamples);
+ else
+ printf("Load3 test passed: margin %"PRIi64" with %"PRIu64" samples\n",
+ -t->worst_error, t->nsamples);
+
+ delete t;
+ }
+
+ if (run_load_test) {
+ LoadStoreTest<ClockType, 1> *t = new LoadStoreTest<ClockType, 1>;
+ t->Start();
+ usleep(1000000);
+ t->Stop();
+
+ if (t->nsamples == 0) {
+ printf("Load test got no data\n");
+ } else if (t->worst_error > 0) {
+ printf("Load test failed : worst error %"PRIi64" with %"PRIu64" samples\n",
+ t->worst_error, t->nsamples);
+ } else {
+ printf("Load test passed : margin %"PRIi64" with %"PRIu64" samples\n",
+ -t->worst_error, t->nsamples);
+ }
+
+ delete t;
+ }
+
+ if (run_store_test) {
+ LoadStoreTest<ClockType, 0> *t = new LoadStoreTest<ClockType, 0>;
+ t->Start();
+ usleep(1000000);
+ t->Stop();
+
+ if (t->nsamples == 0) {
+ printf("Store test got no data\n");
+ } else if (t->worst_error > 0) {
+ printf("Store test failed%s: worst error %"PRIi64" with %"PRIu64" samples\n",
+ ClockType::is_strict ? "" : " as expected",
+ t->worst_error, t->nsamples);
+ } else {
+ printf("Store test passed: margin %"PRIi64" with %"PRIu64" samples\n",
+ -t->worst_error, t->nsamples);
+ }
+
+ delete t;
+ }
+}
+
+static void parse_cpuinfo()
+{
+ FILE *f = fopen("/proc/cpuinfo", "r");
+ if (!f) {
+ perror("/proc/cpuinfo");
+ exit(1);
+ }
+
+ char vendor[4096] = "", model_name[4096] = "", stepping[4096] = "", flags[4096] = "";
+
+ char buf[4096];
+ while(fgets(buf, sizeof(buf), f)) {
+ if (!*buf)
+ break; // Done with first cpu
+
+ char name[4096], val[4096];
+ if (sscanf(buf, "%[^\t:]\t: %[^\n]", name, val) != 2)
+ continue;
+
+ if (!strcmp(name, "vendor_id")) {
+ strncpy(vendor, val, sizeof(vendor));
+ vendor[sizeof(vendor)-1] = 0;
+ }
+
+ if (!strcmp(name, "model name")) {
+ strncpy(model_name, val, sizeof(model_name));
+ vendor[sizeof(model_name)-1] = 0;
+ }
+
+ if (!strcmp(name, "stepping")) {
+ strncpy(stepping, val, sizeof(stepping));
+ vendor[sizeof(stepping)-1] = 0;
+ }
+
+ if (!strcmp(name, "flags")) {
+ strncpy(flags, val, sizeof(flags));
+ vendor[sizeof(flags)-1] = 0;
+ }
+ }
+
+ fclose(f);
+
+ if (!*vendor || !*flags) {
+ fprintf(stderr, "Couldn't find required info in cpuinfo\n");
+ exit(1);
+ }
+
+ printf("CPU vendor : %s\n"
+ "CPU model : %s\n"
+ "CPU stepping : %s\n",
+ vendor, model_name, stepping);
+
+ char *flagsp = flags;
+ printf("TSC flags :");
+ while(const char *f = strsep(&flagsp, " ")) {
+ if (strstr(f, "tsc"))
+ printf(" %s", f);
+ }
+ printf("\n");
+
+ if (clocktype == RDTSC_DEFAULT) {
+ if (!strcmp(vendor, "GenuineIntel")) {
+ printf("Using lfence_rdtsc because you have an Intel CPU\n");
+ clocktype = LFENCE_RDTSC;
+ } else {
+ printf("Using mfence_rdtsc because you have an Intel CPU\n");
+ clocktype = MFENCE_RDTSC;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ argp_parse(&argp, argc, argv, 0, 0, 0);
+
+ if (!tests_specified)
+ run_now_test = run_load_test = run_load3_test = run_store_test = true;
+
+ parse_cpuinfo();
+
+ if (clocktype == RDTSC)
+ run<Clock<RDTSC> >();
+ else if (clocktype == RDTSCP)
+ run<Clock<RDTSCP> >();
+ else if (clocktype == LFENCE_RDTSC)
+ run<Clock<LFENCE_RDTSC> >();
+ else if (clocktype == MFENCE_RDTSC)
+ run<Clock<MFENCE_RDTSC> >();
+ else if (clocktype == MONOTONIC)
+ run<Clock<MONOTONIC> >();
+ else
+ abort();
+
+ return 0;
+}
diff --git a/timing_test.cc b/timing_test.cc
new file mode 100644
index 0000000..136a731
--- /dev/null
+++ b/timing_test.cc
@@ -0,0 +1,114 @@
+#define __STDC_FORMAT_MACROS
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+typedef int (*vgettime_t)(clockid_t, timespec *);
+
+void describe_clock(const char *name, int id)
+{
+ struct timespec res;
+ int ret = clock_getres(id, &res);
+ if (ret < 0) {
+ printf(" %d (%s) [failed to query resolution]\n",
+ id, name);
+ } else {
+ printf(" %d (%s) resolution = %" PRIu64 ".%09u\n",
+ id, name,
+ (uint64_t)res.tv_sec, (unsigned)res.tv_nsec);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 3) {
+ printf("Usage: time <Miters> <mode> [POSIX clock id]\n");
+ printf("\nClocks are:\n");
+ describe_clock("CLOCK_REALTIME", CLOCK_REALTIME);
+ describe_clock("CLOCK_MONOTONIC", CLOCK_MONOTONIC);
+ describe_clock("CLOCK_REALTIME_COARSE", CLOCK_REALTIME_COARSE);
+ describe_clock("CLOCK_MONOTONIC_COARSE", CLOCK_MONOTONIC_COARSE);
+ return 1;
+ }
+
+ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ printf("dlopen failed: %s", dlerror());
+
+ vgettime_t vgettime = (vgettime_t)dlsym(vdso, "clock_gettime");
+ if (!vgettime)
+ printf("dlsym failed: %s", dlerror());
+
+ size_t loops = (size_t)atol(argv[1]) * 1000000;
+ clockid_t c = argc > 3 ? atoi(argv[3]) : 0;
+ const char *mode = argv[2];
+
+ timespec start;
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ timespec t;
+ if (!strcmp(mode, "clock_gettime")) {
+ for (size_t i = 0; i < loops; ++i)
+ clock_gettime(c, &t);
+ } else if (!strcmp(mode, "rdtsc")) {
+ for (size_t i = 0; i < loops; ++i) {
+ unsigned int a, d;
+ asm volatile ("rdtsc" : "=a" (a), "=d" (d));
+ }
+ } else if (!strcmp(mode, "lfence_rdtsc")) {
+ for (size_t i = 0; i < loops; ++i) {
+ unsigned int a, d;
+ asm volatile ("lfence;rdtsc" : "=a" (a), "=d" (d));
+ }
+ } else if (!strcmp(mode, "lfence_rdtsc_lfence")) {
+ for (size_t i = 0; i < loops; ++i) {
+ unsigned int a, d;
+ asm volatile ("");
+ asm volatile ("lfence;rdtsc;lfence" : "=a" (a), "=d" (d));
+ }
+ } else if (!strcmp(mode, "mfence_rdtsc_mfence")) {
+ for (size_t i = 0; i < loops; ++i) {
+ unsigned int a, d;
+ asm volatile ("mfence;rdtsc;mfence" : "=a" (a), "=d" (d));
+ }
+ } else if (!strcmp(mode, "rdtscp")) {
+ for (size_t i = 0; i < loops; ++i) {
+ unsigned int a, c, d;
+ asm volatile ("rdtscp" : "=a" (a), "=c" (c), "=d" (d));
+ }
+ } else if (!strcmp(mode, "gettimeofday")) {
+ struct timeval tv;
+ for (size_t i = 0; i < loops; ++i)
+ gettimeofday(&tv, 0);
+ } else if (!strcmp(mode, "sys_clock_gettime")) {
+ for (size_t i = 0; i < loops; ++i)
+ syscall(__NR_clock_gettime, c, &t);
+ } else if (!strcmp(mode, "vclock_gettime")) {
+ for (size_t i = 0; i < loops; ++i)
+ vgettime(c, &t);
+ } else if (!strcmp(mode, "rdpmc")) {
+ // Unlikely to work.
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ for (size_t i = 0; i < loops; ++i)
+ asm volatile ("rdpmc" : "=a" (eax), "=d" (edx) : "c" (ecx));
+ } else {
+ printf("Unknown mode %s\n", mode);
+ return 1;
+ }
+
+ timespec end;
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ unsigned long long duration = (end.tv_nsec - start.tv_nsec) + 1000000000ULL * (end.tv_sec - start.tv_sec);
+ printf("%ld loops in %.5fs = %.2f nsec / loop\n",
+ loops, float(duration) * 1e-9,
+ float(duration) / loops);
+ return 0;
+}