[PATCH] remove the BKL by turning it into a semaphore

This is the current remove-BKL patch. I test-booted it on x86 and x64, trying every conceivable combination of SMP, PREEMPT and PREEMPT_BKL. All other architectures should compile as well. (most of the testing was done with the zaphod patch undone but it applies cleanly on vanilla -mm3 as well and should work fine.) this is the debugging-enabled variant of the patch which has two main debugging features: - debug potentially illegal smp_processor_id() use. Has caught a number of real bugs - e.g. look at the printk.c fix in the patch. - make it possible to enable/disable the BKL via a .config. If this goes upstream we dont want this of course, but for now it gives people a chance to find out whether any particular problem was caused by this patch. This patch has one important fix over the previous BKL patch: on PREEMPT kernels if we preempted BKL-using code then the code still auto-dropped the BKL by mistake. This caused a number of breakages for testers, which breakages went away once this bug was fixed. Also the debugging mechanism has been improved alot relative to the previous BKL patch. Would be nice to test-drive this in -mm. There will likely be some more smp_processor_id() false positives but they are 1) harmless 2) easy to fix up. We could as well find more real smp_processor_id() related breakages as well. The most noteworthy fact is that no BKL-using code was found yet that relied on smp_processor_id(), which is promising from a compatibility POV. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Ingo Molnar <mingo@elte.hu> 2005-01-07 21:59:57 -0800
committer: Linus Torvalds <torvalds@evo.osdl.org> 2005-01-07 21:59:57 -0800
commit: fb8f6499abc6a847109d9602b797aa6afd2d5a3d (patch)
tree: 9b23f9dde8826bb5df266ce9be81c1d51c6e804a /lib
parent: 8a1a48b7cd80de98d4d07ee1e78311a88c738335 (diff)
download: history-fb8f6499abc6a847109d9602b797aa6afd2d5a3d.tar.gz
2 files changed, 150 insertions, 2 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d9da4ec792b452..850bb10ec91ef6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -48,6 +48,16 @@ config DEBUG_SLAB
 	  allocation as well as poisoning memory on free to catch use of freed
 	  memory. This can make kmalloc/kfree-intensive workloads much slower.
 
+config DEBUG_PREEMPT
+	bool "Debug preemptible kernel"
+	depends on PREEMPT && X86
+	default y
+	help
+	  If you say Y here then the kernel will use a debug variant of the
+	  commonly used smp_processor_id() function and will print warnings
+	  if kernel code uses it in a preemption-unsafe way. Also, the kernel
+	  will detect preemption count underflows.
+
 config DEBUG_SPINLOCK
 	bool "Spinlock debugging"
 	depends on DEBUG_KERNEL && (ALPHA || ARM || X86 || IA64 || M32R || MIPS || PARISC || PPC32 || (SUPERH && !SUPERH64) || SPARC32 || SPARC64 || USERMODE || X86_64)
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 48dc05a13963da..8819c12a74ee3c 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -7,6 +7,141 @@
  */
 #include <linux/smp_lock.h>
 #include <linux/module.h>
+#include <linux/kallsyms.h>
+
+#if defined(CONFIG_PREEMPT) && defined(__smp_processor_id) && \
+		defined(CONFIG_DEBUG_PREEMPT)
+
+/*
+ * Debugging check.
+ */
+unsigned int smp_processor_id(void)
+{
+	unsigned long preempt_count = preempt_count();
+	int this_cpu = __smp_processor_id();
+	cpumask_t this_mask;
+
+	if (likely(preempt_count))
+		goto out;
+
+	if (irqs_disabled())
+		goto out;
+
+	/*
+	 * Kernel threads bound to a single CPU can safely use
+	 * smp_processor_id():
+	 */
+	this_mask = cpumask_of_cpu(this_cpu);
+
+	if (cpus_equal(current->cpus_allowed, this_mask))
+		goto out;
+
+	/*
+	 * It is valid to assume CPU-locality during early bootup:
+	 */
+	if (system_state != SYSTEM_RUNNING)
+		goto out;
+
+	/*
+	 * Avoid recursion:
+	 */
+	preempt_disable();
+
+	if (!printk_ratelimit())
+		goto out_enable;
+
+	printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] code: %s/%d\n", preempt_count(), current->comm, current->pid);
+	print_symbol("caller is %s\n", (long)__builtin_return_address(0));
+	dump_stack();
+
+out_enable:
+	preempt_enable_no_resched();
+out:
+	return this_cpu;
+}
+
+EXPORT_SYMBOL(smp_processor_id);
+
+#endif /* PREEMPT && __smp_processor_id && DEBUG_PREEMPT */
+
+#ifdef CONFIG_PREEMPT_BKL
+/*
+ * The 'big kernel semaphore'
+ *
+ * This mutex is taken and released recursively by lock_kernel()
+ * and unlock_kernel().  It is transparently dropped and reaquired
+ * over schedule().  It is used to protect legacy code that hasn't
+ * been migrated to a proper locking design yet.
+ *
+ * Note: code locked by this semaphore will only be serialized against
+ * other code using the same locking facility. The code guarantees that
+ * the task remains on the same CPU.
+ *
+ * Don't use in new code.
+ */
+DECLARE_MUTEX(kernel_sem);
+
+/*
+ * Re-acquire the kernel semaphore.
+ *
+ * This function is called with preemption off.
+ *
+ * We are executing in schedule() so the code must be extremely careful
+ * about recursion, both due to the down() and due to the enabling of
+ * preemption. schedule() will re-check the preemption flag after
+ * reacquiring the semaphore.
+ */
+int __lockfunc __reacquire_kernel_lock(void)
+{
+	struct task_struct *task = current;
+	int saved_lock_depth = task->lock_depth;
+
+	BUG_ON(saved_lock_depth < 0);
+
+	task->lock_depth = -1;
+	preempt_enable_no_resched();
+
+	down(&kernel_sem);
+
+	preempt_disable();
+	task->lock_depth = saved_lock_depth;
+
+	return 0;
+}
+
+void __lockfunc __release_kernel_lock(void)
+{
+	up(&kernel_sem);
+}
+
+/*
+ * Getting the big kernel semaphore.
+ */
+void __lockfunc lock_kernel(void)
+{
+	struct task_struct *task = current;
+	int depth = task->lock_depth + 1;
+
+	if (likely(!depth))
+		/*
+		 * No recursion worries - we set up lock_depth _after_
+		 */
+		down(&kernel_sem);
+
+	task->lock_depth = depth;
+}
+
+void __lockfunc unlock_kernel(void)
+{
+	struct task_struct *task = current;
+
+	BUG_ON(task->lock_depth < 0);
+
+	if (likely(--task->lock_depth < 0))
+		up(&kernel_sem);
+}
+
+#else
 
 /*
  * The 'big kernel lock'
@@ -34,7 +169,7 @@ static spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
  * (This works on UP too - _raw_spin_trylock will never
  * return false in that case)
  */
-int __lockfunc get_kernel_lock(void)
+int __lockfunc __reacquire_kernel_lock(void)
 {
 	while (!_raw_spin_trylock(&kernel_flag)) {
 		if (test_thread_flag(TIF_NEED_RESCHED))
@@ -45,7 +180,7 @@ int __lockfunc get_kernel_lock(void)
 	return 0;
 }
 
-void __lockfunc put_kernel_lock(void)
+void __lockfunc __release_kernel_lock(void)
 {
 	_raw_spin_unlock(&kernel_flag);
 	preempt_enable_no_resched();
@@ -122,5 +257,8 @@ void __lockfunc unlock_kernel(void)
 		__unlock_kernel();
 }
 
+#endif
+
 EXPORT_SYMBOL(lock_kernel);
 EXPORT_SYMBOL(unlock_kernel);
+
author	Ingo Molnar <mingo@elte.hu>	2005-01-07 21:59:57 -0800
committer	Linus Torvalds <torvalds@evo.osdl.org>	2005-01-07 21:59:57 -0800
commit	fb8f6499abc6a847109d9602b797aa6afd2d5a3d (patch)
tree	9b23f9dde8826bb5df266ce9be81c1d51c6e804a /lib
parent	8a1a48b7cd80de98d4d07ee1e78311a88c738335 (diff)
download	history-fb8f6499abc6a847109d9602b797aa6afd2d5a3d.tar.gz