diff options
author | Ingo Molnar <mingo@elte.hu> | 2005-01-07 21:59:57 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@evo.osdl.org> | 2005-01-07 21:59:57 -0800 |
commit | fb8f6499abc6a847109d9602b797aa6afd2d5a3d (patch) | |
tree | 9b23f9dde8826bb5df266ce9be81c1d51c6e804a /lib | |
parent | 8a1a48b7cd80de98d4d07ee1e78311a88c738335 (diff) | |
download | history-fb8f6499abc6a847109d9602b797aa6afd2d5a3d.tar.gz |
[PATCH] remove the BKL by turning it into a semaphore
This is the current remove-BKL patch. I test-booted it on x86 and x64, trying
every conceivable combination of SMP, PREEMPT and PREEMPT_BKL. All other
architectures should compile as well. (most of the testing was done with the
zaphod patch undone but it applies cleanly on vanilla -mm3 as well and should
work fine.)
this is the debugging-enabled variant of the patch which has two main
debugging features:
- debug potentially illegal smp_processor_id() use. Has caught a number
of real bugs - e.g. look at the printk.c fix in the patch.
- make it possible to enable/disable the BKL via a .config. If this
goes upstream we dont want this of course, but for now it gives
people a chance to find out whether any particular problem was caused
by this patch.
This patch has one important fix over the previous BKL patch: on PREEMPT
kernels if we preempted BKL-using code then the code still auto-dropped the
BKL by mistake. This caused a number of breakages for testers, which
breakages went away once this bug was fixed.
Also the debugging mechanism has been improved alot relative to the previous
BKL patch.
Would be nice to test-drive this in -mm. There will likely be some more
smp_processor_id() false positives but they are 1) harmless 2) easy to fix up.
We could as well find more real smp_processor_id() related breakages as well.
The most noteworthy fact is that no BKL-using code was found yet that relied
on smp_processor_id(), which is promising from a compatibility POV.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 10 | ||||
-rw-r--r-- | lib/kernel_lock.c | 142 |
2 files changed, 150 insertions, 2 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d9da4ec792b452..850bb10ec91ef6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -48,6 +48,16 @@ config DEBUG_SLAB allocation as well as poisoning memory on free to catch use of freed memory. This can make kmalloc/kfree-intensive workloads much slower. +config DEBUG_PREEMPT + bool "Debug preemptible kernel" + depends on PREEMPT && X86 + default y + help + If you say Y here then the kernel will use a debug variant of the + commonly used smp_processor_id() function and will print warnings + if kernel code uses it in a preemption-unsafe way. Also, the kernel + will detect preemption count underflows. + config DEBUG_SPINLOCK bool "Spinlock debugging" depends on DEBUG_KERNEL && (ALPHA || ARM || X86 || IA64 || M32R || MIPS || PARISC || PPC32 || (SUPERH && !SUPERH64) || SPARC32 || SPARC64 || USERMODE || X86_64) diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 48dc05a13963da..8819c12a74ee3c 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -7,6 +7,141 @@ */ #include <linux/smp_lock.h> #include <linux/module.h> +#include <linux/kallsyms.h> + +#if defined(CONFIG_PREEMPT) && defined(__smp_processor_id) && \ + defined(CONFIG_DEBUG_PREEMPT) + +/* + * Debugging check. + */ +unsigned int smp_processor_id(void) +{ + unsigned long preempt_count = preempt_count(); + int this_cpu = __smp_processor_id(); + cpumask_t this_mask; + + if (likely(preempt_count)) + goto out; + + if (irqs_disabled()) + goto out; + + /* + * Kernel threads bound to a single CPU can safely use + * smp_processor_id(): + */ + this_mask = cpumask_of_cpu(this_cpu); + + if (cpus_equal(current->cpus_allowed, this_mask)) + goto out; + + /* + * It is valid to assume CPU-locality during early bootup: + */ + if (system_state != SYSTEM_RUNNING) + goto out; + + /* + * Avoid recursion: + */ + preempt_disable(); + + if (!printk_ratelimit()) + goto out_enable; + + printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] code: %s/%d\n", preempt_count(), current->comm, current->pid); + print_symbol("caller is %s\n", (long)__builtin_return_address(0)); + dump_stack(); + +out_enable: + preempt_enable_no_resched(); +out: + return this_cpu; +} + +EXPORT_SYMBOL(smp_processor_id); + +#endif /* PREEMPT && __smp_processor_id && DEBUG_PREEMPT */ + +#ifdef CONFIG_PREEMPT_BKL +/* + * The 'big kernel semaphore' + * + * This mutex is taken and released recursively by lock_kernel() + * and unlock_kernel(). It is transparently dropped and reaquired + * over schedule(). It is used to protect legacy code that hasn't + * been migrated to a proper locking design yet. + * + * Note: code locked by this semaphore will only be serialized against + * other code using the same locking facility. The code guarantees that + * the task remains on the same CPU. + * + * Don't use in new code. + */ +DECLARE_MUTEX(kernel_sem); + +/* + * Re-acquire the kernel semaphore. + * + * This function is called with preemption off. + * + * We are executing in schedule() so the code must be extremely careful + * about recursion, both due to the down() and due to the enabling of + * preemption. schedule() will re-check the preemption flag after + * reacquiring the semaphore. + */ +int __lockfunc __reacquire_kernel_lock(void) +{ + struct task_struct *task = current; + int saved_lock_depth = task->lock_depth; + + BUG_ON(saved_lock_depth < 0); + + task->lock_depth = -1; + preempt_enable_no_resched(); + + down(&kernel_sem); + + preempt_disable(); + task->lock_depth = saved_lock_depth; + + return 0; +} + +void __lockfunc __release_kernel_lock(void) +{ + up(&kernel_sem); +} + +/* + * Getting the big kernel semaphore. + */ +void __lockfunc lock_kernel(void) +{ + struct task_struct *task = current; + int depth = task->lock_depth + 1; + + if (likely(!depth)) + /* + * No recursion worries - we set up lock_depth _after_ + */ + down(&kernel_sem); + + task->lock_depth = depth; +} + +void __lockfunc unlock_kernel(void) +{ + struct task_struct *task = current; + + BUG_ON(task->lock_depth < 0); + + if (likely(--task->lock_depth < 0)) + up(&kernel_sem); +} + +#else /* * The 'big kernel lock' @@ -34,7 +169,7 @@ static spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; * (This works on UP too - _raw_spin_trylock will never * return false in that case) */ -int __lockfunc get_kernel_lock(void) +int __lockfunc __reacquire_kernel_lock(void) { while (!_raw_spin_trylock(&kernel_flag)) { if (test_thread_flag(TIF_NEED_RESCHED)) @@ -45,7 +180,7 @@ int __lockfunc get_kernel_lock(void) return 0; } -void __lockfunc put_kernel_lock(void) +void __lockfunc __release_kernel_lock(void) { _raw_spin_unlock(&kernel_flag); preempt_enable_no_resched(); @@ -122,5 +257,8 @@ void __lockfunc unlock_kernel(void) __unlock_kernel(); } +#endif + EXPORT_SYMBOL(lock_kernel); EXPORT_SYMBOL(unlock_kernel); + |