4 files changed, 130 insertions, 19 deletions
diff --git a/Makefile b/Makefile
index a143545..31e1792 100644
--- a/Makefile
+++ b/Makefile
@@ -125,7 +125,8 @@ install: all
 	cp $(TARGETS) "$(DESTDIR)$(bindir)"
 	if test -n "$(PYLIB)" ; then \
 		install -D -m 755 src/hwlatdetect/hwlatdetect.py $(DESTDIR)$(PYLIB)/hwlatdetect.py ; \
-		ln -sf $(PYLIB)/hwlatdetect.py "$(DESTDIR)$(bindir)/hwlatdetect" ; \
+		rm -f "$(DESTDIR)$(bindir)/hwlatdetect" ; \
+		ln -s $(PYLIB)/hwlatdetect.py "$(DESTDIR)$(bindir)/hwlatdetect" ; \
 	fi
 	install -D -m 644 src/backfire/backfire.c "$(DESTDIR)$(srcdir)/backfire/backfire.c"
 	install -m 644 src/backfire/Makefile "$(DESTDIR)$(srcdir)/backfire/Makefile"
diff --git a/README.markdown b/README.markdown
new file mode 100644
index 0000000..3ab6ddf
--- /dev/null
+++ b/README.markdown
@@ -0,0 +1,87 @@
+# RT-Tests
+
+This repository contains some programs that test various rt-linux features.
+
+## Usage
+
+### Compile
+
+    sudo apt-get install build-essential libnuma-dev
+    make
+
+### Run tests
+
+To run one test thread per CPU or per CPU core, each thread on a separate
+processor, type
+
+    sudo ./cyclictest -a -t -n -p99
+
+On a non-realtime system, you may see something like
+
+    T: 0 ( 3431) P:99 I:1000 C: 100000 Min:      5 Act:   10 Avg:   14 Max:   39242
+    T: 1 ( 3432) P:98 I:1500 C:  66934 Min:      4 Act:   10 Avg:   17 Max:   39661
+
+The rightmost column contains the most important result, i.e. the worst-case
+latency of 39.242 milliseconds. On a realtime-enabled system, the result may
+look like
+
+    T: 0 ( 3407) P:99 I:1000 C: 100000 Min:      7 Act:   10 Avg:   10 Max:      18
+    T: 1 ( 3408) P:98 I:1500 C:  67043 Min:      7 Act:    8 Avg:   10 Max:      22
+
+and, thus, indicate an apparent short-term worst-case latency of 18
+microseconds.
+
+Running cyclictest only over a short period of time and without creating
+appropriate real-time stress conditions is rather meaningless, since the
+execution of an asynchronous event from idle state is normally always quite
+fast, and every - even non-RT system - can do that. The challenge is to minimize
+ the latency when reacting to an asynchronuous event, irrespective of what code
+path is executed at the time when the external event arrives.
+Therefore, specific stress conditions must be present while cyclictest is
+running to reliably determine the worst-case latency of a given system.
+
+### Latency fighting
+
+If - as in the above example - a low worst-case latency is measured, and this is
+the case even under a system load that is equivalent to the load expected under
+production conditions, everything is alright.
+Of course, the measurement must last suffciently long, preferably 24 hours or
+more to run several hundred million test threads. If possible, the `-i` command
+line option (thread interval) should be used to increase the number of test
+threads over time.
+As a role of thumb, the thread interval should be set to a value twice as long
+as the expected worst-case latency. If at the end of such a test period the
+worst-cae latency still did not exceed the value that is assumed critical for a
+given system, the particular kernel in combination with the hardware in use can
+then probably be regarded as real-time capable.
+
+What, however, if the latency is higher than acceptable? Then, the famous
+"*latency fighting*" begins. For this purpose, the cyclictest tool provides the
+`-b` option that causes a function tracing to be written to
+`/sys/kernel/debug/tracing/trace`, if a specified latency threshold was
+exceeded, for example:
+
+    ./cyclictest -a -t -n -p99 -f -b100
+
+This causes the program to abort execution, if the latency value exceeds 100
+microseconds; the culprit can then be found in the trace output at
+`/sys/kernel/debug/tracing/trace`.
+The kernel function that was executed just before a latency of more than 100
+microseconds was detected is marked with an exclamation mark such as
+
+    qemu-30047 2D.h3 742805us : __activate_task+0x42/0x68 <cyclicte-426> (199 1)
+    qemu-30047 2D.h3 742806us : __trace_start_sched_wakeup+0x40/0x161 <cyclicte-426> (0 -1)
+    qemu-30047 2DNh3 742806us!: try_to_wake_up+0x422/0x460 <cyclicte-426> (199 -5)
+    qemu-30047 2DN.1 742939us : __sched_text_start+0xf3/0xdcd (c064e442 0)
+
+The first column indicates the calling process responsible for triggering the
+latency.
+
+If the trace output is not obvious, it can be submitted to the OSADL Latency
+Fight Support Service at
+[latency-fighters@osadl.org](mailto:latency-fighters@osadl.org).
+In addition to the output of `cat /sys/kernel/debug/tracing/trace`, the output
+of `lspci` and the `.config` file that was used to build the kernel in question
+must be submitted. We are sure you understand that OSADL members will be served
+first, but we promise to do our best to help everybody to successfully fight
+against kernel and driver latencies.
diff --git a/src/cyclictest/cyclictest.c b/src/cyclictest/cyclictest.c
index 731b4bd..dadced8 100644
--- a/src/cyclictest/cyclictest.c
+++ b/src/cyclictest/cyclictest.c
@@ -1,7 +1,7 @@
 /*
  * High resolution timer test software
  *
- * (C) 2008-2011 Clark Williams <williams@redhat.com>
+ * (C) 2008-2012 Clark Williams <williams@redhat.com>
  * (C) 2005-2007 Thomas Gleixner <tglx@linutronix.de>
  *
  * This program is free software; you can redistribute it and/or
@@ -170,6 +170,7 @@ static int duration = 0;
 static int use_nsecs = 0;
 static int refresh_on_max;
 static int force_sched_other;
+static int priospread = 0;
 
 static pthread_cond_t refresh_on_max_cond = PTHREAD_COND_INITIALIZER;
 static pthread_mutex_t refresh_on_max_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -692,35 +693,46 @@ void *timerthread(void *param)
 
 		case MODE_CLOCK_NANOSLEEP:
 			if (par->timermode == TIMER_ABSTIME) {
-				ret = clock_nanosleep(par->clock, TIMER_ABSTIME,
-						&next, NULL);
+				if ((ret = clock_nanosleep(par->clock, TIMER_ABSTIME, &next, NULL))) {
+					if (ret != EINTR)
+						warn("clock_nanosleep failed. errno: %d\n", errno);
+					goto out;
+				}
 			} else {
 				clock_gettime(par->clock, &now);
-				ret = clock_nanosleep(par->clock, TIMER_RELTIME,
-						&interval, NULL);
+				if ((ret = clock_nanosleep(par->clock, TIMER_RELTIME, &interval, NULL))) {
+					if (ret != EINTR)
+						warn("clock_gettime failed. errno: %d\n", errno);
+					goto out;
+				}
 				next.tv_sec = now.tv_sec + interval.tv_sec;
 				next.tv_nsec = now.tv_nsec + interval.tv_nsec;
 				tsnorm(&next);
 			}
-
-			/* Avoid negative calcdiff result if clock_nanosleep() 
-			 * gets interrupted.
-			 */
-			if (ret == EINTR)
-				goto out;
-
 			break;
 
 		case MODE_SYS_NANOSLEEP:
-			clock_gettime(par->clock, &now);
-			nanosleep(&interval, NULL);
+			if ((ret = clock_gettime(par->clock, &now))) {
+				if (ret != EINTR)
+					warn("clock_gettime failed: errno %d\n", errno);
+				goto out;
+			}
+			if (nanosleep(&interval, NULL)) {
+				if (errno != EINTR)
+					warn("nanosleep failed. errno: %d\n", errno);
+				goto out;
+			}
 			next.tv_sec = now.tv_sec + interval.tv_sec;
 			next.tv_nsec = now.tv_nsec + interval.tv_nsec;
 			tsnorm(&next);
 			break;
 		}
 
-		clock_gettime(par->clock, &now);
+		if ((ret = clock_gettime(par->clock, &now))) {
+			if (ret != EINTR)
+				warn("clock_getttime failed. errno: %d\n", errno);
+			goto out;
+		}
 
 		if (use_nsecs)
 			diff = calcdiff_ns(now, next);
@@ -845,6 +857,7 @@ static void display_help(int error)
 	       "-p PRIO  --prio=PRIO       priority of highest prio thread\n"
 	       "-P       --preemptoff      Preempt off tracing (used with -b)\n"
 	       "-q       --quiet           print only a summary on exit\n"
+	       "-Q       --priospread       spread priority levels starting at specified value\n"
 	       "-r       --relative        use relative timer instead of absolute\n"
 	       "-s       --system          use sys_nanosleep and sys_setitimer\n"
 	       "-t       --threads         one thread per available processor\n"
@@ -984,9 +997,10 @@ static void process_options (int argc, char *argv[])
 			{"smp", no_argument, NULL, 'S'},
 			{"numa", no_argument, NULL, 'U'},
 			{"latency", required_argument, NULL, 'e'},
+			{"priospread", no_argument, NULL, 'Q'},
 			{NULL, 0, NULL, 0}
 		};
-		int c = getopt_long(argc, argv, "a::b:Bc:Cd:Efh:H:i:Il:MnNo:O:p:PmqrsSt::uUvD:wWT:y:e:",
+		int c = getopt_long(argc, argv, "a::b:Bc:Cd:Efh:H:i:Il:MnNo:O:p:PmqQrsSt::uUvD:wWT:y:e:",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -1045,6 +1059,7 @@ static void process_options (int argc, char *argv[])
 			}
 			break;
 		case 'q': quiet = 1; break;
+		case 'Q': priospread = 1; break;
 		case 'r': timermode = TIMER_RELTIME; break;
 		case 's': use_system = MODE_SYS_OFFSET; break;
 		case 't':
@@ -1140,6 +1155,12 @@ static void process_options (int argc, char *argv[])
 	if (priority < 0 || priority > 99)
 		error = 1;
 
+	if (priospread && priority == 0) {
+		fprintf(stderr, "defaulting realtime priority to %d\n", 
+			num_threads+1);
+		priority = num_threads+1;
+	}
+
 	if (priority && (policy != SCHED_FIFO && policy != SCHED_RR)) {
 		fprintf(stderr, "policy and priority don't match: setting policy to SCHED_FIFO\n");
 		policy = SCHED_FIFO;
@@ -1456,7 +1477,7 @@ int main(int argc, char **argv)
 			par->policy = SCHED_OTHER;
 			force_sched_other = 1;
 		}
-		if (priority && !histogram && !smp && !numa)
+		if (priospread)
 			priority--;
 		par->clock = clocksources[clocksel];
 		par->mode = mode;
diff --git a/src/hwlatdetect/hwlatdetect.py b/src/hwlatdetect/hwlatdetect.py
index 66e66a2..ca47c1c 100755
--- a/src/hwlatdetect/hwlatdetect.py
+++ b/src/hwlatdetect/hwlatdetect.py
@@ -519,11 +519,13 @@ if __name__ == '__main__':
     info("Samples exceeding threshold: %d" % exceeding)
 
     if reportfile:
+        count = 0
         f = open(reportfile, "w")
         for s in detect.samples:
+            count += 1
             f.write("%s\n" % s)
         f.close()
-        info("sample data written to %s" % reportfile)
+        info("sample data (%d samples) written to %s" % (count, reportfile))
     else:
         for s in detect.samples:
             print "%s" % s