diff -urN linux-2.4.13-preempt/Documentation/Configure.help linux/Documentation/Configure.help
--- linux-2.4.13-preempt/Documentation/Configure.help	Thu Oct 11 20:15:47 2001
+++ linux/Documentation/Configure.help	Thu Oct 11 20:16:05 2001
@@ -166,6 +166,16 @@
   system where throughput is more important than interactive response,
   such as a server system.  Say N if you are unsure.
 
+Collect preemption latency times
+CONFIG_PREEMPT_TIMES
+  This option will instruct the preemption code to measure in-kernel
+  latencies due to various locking primitives.  The results for the 20
+  worst latency times are accessible via /proc/latencytimes.  They are
+  reset on read.
+
+  Say Y here if you are testing kernel preemption and trying to determine
+  where high-latency problems lie.  Say N if you are unsure.
+
 Kernel math emulation
 CONFIG_MATH_EMULATION
   Linux can emulate a math coprocessor (used for floating point
diff -urN linux-2.4.13-preempt/arch/i386/config.in linux/arch/i386/config.in
--- linux-2.4.13-preempt/arch/i386/config.in	Thu Oct 11 20:15:34 2001
+++ linux/arch/i386/config.in	Thu Oct 11 20:16:05 2001
@@ -170,6 +170,9 @@
 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
 bool 'Symmetric multi-processing support' CONFIG_SMP
 bool 'Preemptible Kernel' CONFIG_PREEMPT
+if [ "$CONFIG_PREEMPT" = "y" ]; then
+   bool 'Collect preemption latency times' CONFIG_PREEMPT_TIMES
+fi
 if [ "$CONFIG_SMP" != "y" ]; then
    bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
    dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
diff -urN linux-2.4.13-preempt/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c
--- linux-2.4.13-preempt/arch/i386/kernel/irq.c	Thu Oct 11 20:15:35 2001
+++ linux/arch/i386/kernel/irq.c	Thu Oct 11 20:16:05 2001
@@ -559,6 +559,11 @@
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
+#ifdef CONFIG_PREEMPT_TIMES
+extern void latency_cause(int,int);
+#else
+#define latency_cause(a, b)
+#endif
 asmlinkage unsigned int do_IRQ(struct pt_regs regs)
 {	
 	/* 
@@ -577,9 +582,12 @@
 	struct irqaction * action;
 	unsigned int status;
 
+	preempt_lock_start(-99);
+
 	kstat.irqs[cpu][irq]++;
 	spin_lock(&desc->lock);
 	desc->handler->ack(irq);
+        latency_cause(-99,~irq);
 	/*
 	   REPLAY is when Linux resends an IRQ that was dropped earlier
 	   WAITING is used by probe to mark irqs that are being tested
@@ -636,6 +644,8 @@
 	desc->handler->end(irq);
 	spin_unlock(&desc->lock);
 
+	preempt_lock_stop();
+
 	if (softirq_pending(cpu))
 		do_softirq();
 	return 1;
diff -urN linux-2.4.13-preempt/include/asm-i386/smplock.h linux/include/asm-i386/smplock.h
--- linux-2.4.13-preempt/include/asm-i386/smplock.h	Thu Oct 11 20:15:00 2001
+++ linux/include/asm-i386/smplock.h	Thu Oct 11 20:16:05 2001
@@ -9,6 +9,16 @@
 #include <asm/current.h>
 
 extern spinlock_t kernel_flag;
+#ifdef CONFIG_PREEMPT_TIMES
+#define lock_bkl() do { \
+	atomic_inc(atomic_ptr_preempt_count());  \
+	preempt_lock_start(3); \
+	_raw_spin_lock(&kernel_flag); \
+} while (0)
+#else
+#define lock_bkl() spin_lock(&kernel_flag)
+#endif
+#define unlock_bkl() spin_unlock(&kernel_flag)
 
 #ifdef CONFIG_SMP
 #define kernel_locked()		spin_is_locked(&kernel_flag)
@@ -40,7 +50,18 @@
 		spin_lock(&kernel_flag); \
 } while (0)
 
-
+#ifdef CONFIG_PREEMPT_TIMES
+#define lock_kernel() do { \
+	if (current->lock_depth == -1) \
+		lock_bkl(); \
+	++current->lock_depth; \
+} while (0)
+ 
+#define unlock_kernel() do { \
+	if (--current->lock_depth < 0) \
+		unlock_bkl(); \
+} while (0)
+#else
 /*
  * Getting the big kernel lock.
  *
@@ -87,3 +108,4 @@
 		 "=m" (current->lock_depth));
 #endif
 }
+#endif /* CONFIG_PREEMPT_TIMES */
diff -urN linux-2.4.13-preempt/include/linux/spinlock.h linux/include/linux/spinlock.h
--- linux-2.4.13-preempt/include/linux/spinlock.h	Thu Oct 11 20:14:59 2001
+++ linux/include/linux/spinlock.h	Thu Oct 11 20:16:05 2001
@@ -136,6 +136,25 @@
   
 #ifdef CONFIG_PREEMPT
 
+#ifdef CONFIG_PREEMPT_TIMES
+extern void latency_start(const char *fname,unsigned lineno,int cause);
+extern void latency_check(const char *fname,unsigned lineno, int cause);
+extern void latency_end(const char *fname,unsigned lineno);
+#define preempt_lock_start(c) \
+do { \
+	if (preempt_is_disable() == 1) \
+		latency_start(__BASE_FILE__, __LINE__, c); \
+} while (0)
+#define preempt_lock_stop() \
+do { \
+	if (preempt_is_disable() == 1) \
+		latency_end(__BASE_FILE__, __LINE__); \
+} while (0)
+#else
+#define preempt_lock_start(c)	do {} while (0)
+#define preempt_lock_stop()	do {} while (0)
+#endif /* CONFIG_PREEMPT_TIMES */
+  
 #define switch_lock_count() current->preempt_count
 #define preempt_is_disable() (switch_lock_count().counter)
 #define atomic_ptr_preempt_count() (&switch_lock_count())
@@ -143,13 +162,16 @@
 
 #define preempt_disable() do {			\
 	atomic_inc(atomic_ptr_preempt_count());	\
+	preempt_lock_start(1);			\
 } while (0)
 
 #define preempt_enable_no_resched() do {	\
+	preempt_lock_stop();			\
 	atomic_dec(atomic_ptr_preempt_count());	\
 } while (0)
 
 #define preempt_enable() do {			\
+	preempt_lock_stop();			\
 	if (atomic_dec_and_test(atomic_ptr_preempt_count()) && \
 			current->need_resched)	\
 		preempt_schedule();		\
@@ -192,6 +214,9 @@
 #define write_lock(lock)	_raw_write_lock(lock)
 #define write_unlock(lock)	_raw_write_unlock(lock)
 #define write_trylock(lock)	_raw_write_trylock(lock)
+
+#define preempt_lock_start(c)	do {} while (0)
+#define preempt_lock_stop()	do {} while (0)
 #endif /* !CONFIG_PREEMPT */
 
 /* "lock on reference count zero" */
diff -urN linux-2.4.13-preempt/kernel/Makefile linux/kernel/Makefile
--- linux-2.4.13-preempt/kernel/Makefile	Thu Oct 11 20:14:57 2001
+++ linux/kernel/Makefile	Thu Oct 11 20:16:05 2001
@@ -19,6 +19,7 @@
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += ksyms.o
 obj-$(CONFIG_PM) += pm.o
+obj-$(CONFIG_PREEMPT_TIMES) += preem_latency.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff -urN linux-2.4.13-preempt/kernel/ksyms.c linux/kernel/ksyms.c
--- linux-2.4.13-preempt/kernel/ksyms.c	Thu Oct 11 20:14:57 2001
+++ linux/kernel/ksyms.c	Thu Oct 11 20:16:05 2001
@@ -435,6 +435,11 @@
 #ifdef CONFIG_PREEMPT
 EXPORT_SYMBOL(preempt_schedule);
 #endif
+#ifdef CONFIG_PREEMPT_TIMES
+EXPORT_SYMBOL(latency_start);
+EXPORT_SYMBOL(latency_end);
+EXPORT_SYMBOL(latency_check);
+#endif
 EXPORT_SYMBOL(schedule_timeout);
 EXPORT_SYMBOL(jiffies);
 EXPORT_SYMBOL(xtime);
diff -urN linux-2.4.13-preempt/kernel/preem_latency.c linux/kernel/preem_latency.c
--- linux-2.4.13-preempt/kernel/preem_latency.c	Wed Dec 31 19:00:00 1969
+++ linux/kernel/preem_latency.c	Thu Oct 11 20:16:05 2001
@@ -0,0 +1,386 @@
+/*
+ * Preemption Latency Measurement Tool
+ * Copyright (C) 2001 MontaVista Software Inc.
+ *
+ * Measures the duration of locks held in the kernel.
+ * Useful for pinpointing long-held locks, which we
+ * can not preempt during.
+ *
+ * 20010923 Robert M. Love
+ * 	- remove causes "lowlat" and "check"
+ * 	- update for revised preemption patch
+ * 	- remove defines for preempt_lock_check and window
+ * 
+ * 20010919 Robert M. Love
+ * 	whitespace cleanup, remove unneeded code, remove
+ * 	ifdefs around latency_cause, etc
+ * 
+ * 20010918 Robert M. Love
+ * 	update for 2.4.10-pre11 and 2.4.9-ac13
+ */
+
+#include <asm/system.h>
+#include <asm/current.h>
+#include <linux/sched.h>
+#include <asm/msr.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+/* 
+ * definded in asm/msr.h as 
+ * __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
+ */
+#define readclock(low) rdtscl(low)
+
+#define NUM_LOG_ENTRY 20
+#define NUM_COUNT 2
+#define COMMAND_LENGTH 16
+#define NUM_INTERRUPT 16
+
+const char * syscallStartFileName = "system_call";
+const char * entryCheckFileName = "ret_with_reschedule";
+const char * errorCodeStart = "error_code start";
+const char * deviceNotAvailableStart = "device_not_availablestart";
+
+extern unsigned long cpu_khz;  /* number of rdtsc ticks per second/1000 */
+
+extern void latency_check(const char *, unsigned, int);
+extern void latency_end(const char *, unsigned);
+
+int theCause, mask;
+unsigned theSyscallNumber;
+
+#define SfS 0 /* waiting for a preempt off start1 call */
+#define SfC 1 /* waiting for a preempt off end call */
+#define SfM 2 /* waiting for either of the above */
+
+struct pointId {
+	const char * FileName;
+	unsigned Address;    
+	unsigned FileLine;
+	unsigned ProcId;
+	char ProcName[COMMAND_LENGTH];
+};
+
+/* worst recorded latencies */
+struct log_entry {
+	unsigned latency;
+	int cause;
+	int mask;
+	unsigned intrCount[NUM_INTERRUPT+1];
+	struct pointId start;
+	struct pointId end;
+};
+
+struct LatencyData {
+	int breakCount;		/* count interrupt and iret */
+	const char * testName;	/* the test name */
+	unsigned logFlag;	/* 0 - no logging; 1 - logging */
+        unsigned panicFlag;	/* set to 1 if something is really wrong */
+	unsigned syncFlag;	/* for synchro between start and end */
+	unsigned numIntrs;	/* total number interrupts & intrs in range*/
+	unsigned numCount;	/* layout */
+	unsigned numEntry;
+	int least;		/* lowest latency we care about */
+	unsigned count[NUM_COUNT];
+	struct log_entry  entry[NUM_LOG_ENTRY]; /* worst recorded latencies */
+};
+
+struct LatencyData latencyData = {
+	breakCount: 0,
+	testName:   "preemption latency (start/check/break points)",
+	logFlag:    0,
+	panicFlag:  0,
+	syncFlag:   0,
+	numIntrs:   0,
+	numCount:   NUM_COUNT,
+	numEntry:   NUM_LOG_ENTRY,
+};
+
+static char *cause_name[] = {
+	"Unknown",
+	"spin_lock",
+	"reacqBKL",
+	"BKL"
+};
+
+#define CAUSE_MAX (sizeof(cause_name)/sizeof(cause_name[0]) - 1)
+
+static struct pointId latencyStart;
+static unsigned latencyStartCount;
+unsigned numIntr = NUM_INTERRUPT;
+unsigned intrCount[NUM_INTERRUPT+1];
+
+#define CLEAR(x) do { \
+			int zz; \
+			for (zz=0; zz<= NUM_INTERRUPT; zz++) \
+			x[zz] = 0; \
+		} while(0)
+
+#define COPY(x, y) do { \
+			int zz; \
+			for (zz=0; zz<= NUM_INTERRUPT; zz++) \
+			x[zz] = y[zz]; \
+		} while(0)
+
+/* strategy : 
+ * 
+ * Four points are identified
+ *   S : the starting point right after system call, user -> kernel
+ *   S1: alternative starting points 
+ *      . faults (including device_not_available which does go through 
+ *              error_code : label)
+ *      . interrupts (all go through do_IRQ)
+ *   C : need_resched is checked and schedule() is invoked if flag is set
+ *   B : before RESTORE ALL and if control is returnning to user land
+ *
+ * For S points :
+ *      it MUST follow a break point
+ *      it initializes timing values for the starting point
+ *
+ * For S1 points :
+ *      if it follows a breakpoint, treat it as an S point
+ *      otherwise, ignore it.
+ *
+ * For C points :
+ *      it must follow a C point or a S point
+ *      calculate the interval and reset the starting point
+ *
+ * For B points :
+ *      It must follow a C point or an S point
+ *      calculate the internval and invalidate the starting point
+ *
+ */
+/* used to debug the execution between start() and start1() */
+static unsigned startFlag = 0;
+
+asmlinkage void latency_start(const char *fname,unsigned lineno, int cause)
+{
+	/* if we are not logging or we have an error, do nothing */
+	if ((latencyData.logFlag == 0) || ( latencyData.panicFlag != 0))
+		return;
+
+	if (latencyData.syncFlag != SfC) {
+		readclock(latencyStartCount);
+		startFlag = 1;
+		mask = 0;
+		theCause = cause;
+		latencyData.syncFlag = SfC;
+		latencyStart.Address = (int)__builtin_return_address(0);
+		latencyStart.FileName = fname;
+		latencyStart.FileLine = lineno;
+		latencyStart.ProcId = current->pid;
+		memcpy(latencyStart.ProcName, current->comm, 16);
+		CLEAR(intrCount);
+        }
+}
+
+void latency_cause(int was, int tobe)
+{
+	if (was == -100) {
+		mask |= tobe;
+		return;
+        }
+
+	if (theCause == was)
+		theCause = tobe;
+}
+
+void latency_logentry(unsigned diff, const char *fname,
+		unsigned lineno, unsigned Address, int mask)
+{
+	unsigned lowest = 0xffffffff;
+	unsigned lowestIndex = 0;
+	unsigned sameIndex = 0xffffffff;
+	int i;
+
+	if (diff < latencyData.least) 
+		return;
+
+	/* check if we need to log this event */
+	for (i = 0; i < NUM_LOG_ENTRY; i++) {
+		if (lowest > latencyData.entry[i].latency) {
+			lowest = latencyData.entry[i].latency;
+			lowestIndex = i;
+		}
+		/* If the call addresses match, it is the same path */
+		if ((Address == latencyData.entry[i].end.Address) &&
+		 (latencyStart.Address == latencyData.entry[i].start.Address)){
+			sameIndex = i;
+			break;
+		}
+	}
+
+	if (sameIndex == 0xffffffff)  {
+		i = lowestIndex;
+		/*
+		 * we would like "least" to be the smallest latency in the table
+		 * but we are pressed for time so we settle for it being what 
+		 * may have been the lowest, as this new entry may replace 
+		 * the found lowest with a higher value, in which case we don't 
+		 * have the true lowest.  Oh well, we  get closer as time 
+		 * passes.  The real reason for this is to speed things up, 
+		 * so we don't worry too much if this isn't exact.
+		 */
+		latencyData.least = latencyData.entry[i].latency;
+	} else {
+		i = sameIndex;
+	}
+
+	if (diff > latencyData.entry[i].latency) {
+		latencyData.entry[i].latency = diff;
+		latencyData.entry[i].cause = theCause; 
+		latencyData.entry[i].mask = mask; 
+
+		latencyData.entry[i].end.FileName = fname;
+		latencyData.entry[i].end.FileLine = lineno;
+		latencyData.entry[i].end.Address = Address;
+		latencyData.entry[i].end.ProcId = current->pid;
+		memcpy(latencyData.entry[i].end.ProcName,
+			current->comm, COMMAND_LENGTH);
+
+		latencyData.entry[i].start.FileName = latencyStart.FileName;
+		latencyData.entry[i].start.FileLine = latencyStart.FileLine;
+		latencyData.entry[i].start.Address = latencyStart.Address; 
+		latencyData.entry[i].start.ProcId = latencyStart.ProcId;
+		memcpy(latencyData.entry[i].start.ProcName, 
+			latencyStart.ProcName, COMMAND_LENGTH);
+	}
+
+	latencyData.numIntrs++;
+}
+
+/* Called at end of preemption time */
+asmlinkage void latency_end(const char *fname, unsigned lineno)
+{
+	unsigned endCount;
+
+	/* if we are not logging or we have an error, do nothing */
+	if ((latencyData.logFlag == 0) || (latencyData.panicFlag != 0)) {
+		return;
+	}
+
+	/* read entry again */
+	readclock(endCount);
+
+	if (latencyData.syncFlag == SfS) {
+		latencyData.count[SfS] ++;
+		return;
+	}
+
+	latency_logentry(endCount - latencyStartCount, fname, lineno,
+		(int)__builtin_return_address(0), mask);
+
+	latencyData.syncFlag = SfS;
+	return;
+}
+
+/* latency_check is for the end of one period and the start of another */
+asmlinkage void latency_check(const char *fname, unsigned lineno, int cause)
+{
+	unsigned endCount;
+
+	/* if we are not logging or we have an error, do nothing */
+	if ((latencyData.logFlag == 0) || (latencyData.panicFlag != 0)) {
+		return;
+	}
+	/* read entry again */
+	readclock(endCount);
+
+	if (latencyData.syncFlag != SfS) {
+		latency_logentry(endCount - latencyStartCount, fname, lineno,
+			(int)__builtin_return_address(0), mask);
+	}
+
+	/* re-set the starting point */
+	latencyData.syncFlag = SfM;
+	mask = 0;
+	theCause = cause;
+	latencyStart.Address = (int) __builtin_return_address(0);
+	latencyStart.FileName = fname;
+	latencyStart.FileLine = lineno;
+	latencyStart.ProcId = current->pid;
+	memcpy(latencyStart.ProcName, current->comm, 16);
+	CLEAR(intrCount);
+	readclock(latencyStartCount);
+}
+
+static int g_read_completed = 0;
+
+static int latencytimes_read_proc(char *page_buffer, char **my_first_byte,
+	off_t virtual_start, int length, int *eof, void *data)
+{
+	int my_buffer_offset = 0;
+	char * const my_base = page_buffer;
+	int i,j,max;
+
+	if (virtual_start == 0){
+	/* Just been opened */
+		latencyData.logFlag = 0;  /* stop logging */
+		g_read_completed = 0;
+		my_buffer_offset += sprintf(my_base + my_buffer_offset, 
+		 "Worst %d latency times of %d measured in this period.\n"
+		 "  usec      cause     mask   start line/file      address   end line/file\n", NUM_LOG_ENTRY,latencyData.numIntrs);
+
+	} else if (g_read_completed == NUM_LOG_ENTRY) {
+		*eof = 1;
+		latencyData.breakCount = 0;
+		latencyData.syncFlag = SfS;
+		latencyData.numIntrs = 0;
+		latencyData.least = 0;
+		for (i = 0; i < latencyData.numCount; 
+				latencyData.count[i++] = 0);
+		latencyData.logFlag = 1;  /* start logging */
+		return 0;
+	}
+	g_read_completed++;
+	for (j = max = i = 0;j < NUM_LOG_ENTRY; j++){
+		if( latencyData.entry[j].latency > max ){
+			max = latencyData.entry[j].latency;
+			i = j;
+		}
+	}
+	my_buffer_offset += 
+		sprintf(my_base + my_buffer_offset,
+			"%6d %10s %8x %5d/%-15s %8x %5d/%s\n",
+			(int)((latencyData.entry[i].latency * 10) / 
+				(cpu_khz / 100)),
+			(latencyData.entry[i].cause == -99) ? "unknown" :
+			(latencyData.entry[i].cause < 0) ?
+			irq_desc[~latencyData.entry[i].cause].action->name :
+			(latencyData.entry[i].cause > CAUSE_MAX) ? "Unknown" :
+			cause_name[latencyData.entry[i].cause],
+			latencyData.entry[i].mask,
+			latencyData.entry[i].start.FileLine,
+			latencyData.entry[i].start.FileName == (char *)0 ?
+			"entry.S" : 
+			latencyData.entry[i].start.FileName,
+			latencyData.entry[i].start.Address,
+			/*latencyData.entry[i].start.ProcId,
+			  latencyData.entry[i].start.ProcName,*/
+			latencyData.entry[i].end.FileLine,
+			latencyData.entry[i].end.FileName == (char *)0 ?
+			"entry.S" : 
+			latencyData.entry[i].end.FileName /*,
+			latencyData.entry[i].end.ProcId,
+			latencyData.entry[i].end.ProcName*/);
+
+	/* Clearing these two effectivly clears the entry */
+
+	latencyData.entry[i].latency = 0;
+	latencyData.entry[i].end.Address = 0;
+	*my_first_byte = page_buffer;
+	return  my_buffer_offset;       
+}
+
+int __init latencytimes_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	create_proc_read_entry("latencytimes", 0, 0, latencytimes_read_proc, 0);
+#endif
+	return 0;
+}
+
+__initcall(latencytimes_init);
diff -urN linux-2.4.13-preempt/kernel/sched.c linux/kernel/sched.c
--- linux-2.4.13-preempt/kernel/sched.c	Thu Oct 11 20:14:57 2001
+++ linux/kernel/sched.c	Thu Oct 11 20:16:05 2001
@@ -1110,10 +1110,12 @@
 		/* it would be ideal not to lock tasks to their cpu here,
 		 * but only around the data that needs such locking */
 		lock_to_this_cpu();
+		preempt_lock_start(2);
 		atomic_add(PREEMPT_ACTIVE + 1, &switch_lock_count());
 		schedule();
 		restore_cpus_allowed();
 		atomic_sub(PREEMPT_ACTIVE + 1, &switch_lock_count());
+		preempt_lock_stop();
 	}
 }
 #endif /* CONFIG_PREEMPT */
diff -urN linux-2.4.13-preempt/kernel/softirq.c linux/kernel/softirq.c
--- linux-2.4.13-preempt/kernel/softirq.c	Thu Oct 11 20:14:57 2001
+++ linux/kernel/softirq.c	Thu Oct 11 20:16:05 2001
@@ -44,6 +44,12 @@
 
 static struct softirq_action softirq_vec[32] __cacheline_aligned;
 
+#ifdef CONFIG_PREEMPT_TIMES
+extern void latency_cause(int,int);
+#else
+#define latency_cause(a, b)
+#endif
+
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
  * but we also don't want to introduce a worst case 1/HZ latency
@@ -67,6 +73,7 @@
 
 	if (in_interrupt())
 		return;
+	preempt_lock_start(-99);
 
 	local_irq_save(flags);
 
@@ -84,6 +91,9 @@
 		local_irq_enable();
 
 		h = softirq_vec;
+ 
+		latency_cause(-100, pending);
+
 
 		do {
 			if (pending & 1)
@@ -106,6 +116,7 @@
 	}
 
 	local_irq_restore(flags);
+	preempt_lock_stop();
 }
 
 /*