Subject: mm: raw_pagefault_disable From: Peter Zijlstra Date: Fri Aug 05 17:16:58 CEST 2011 Adding migrate_disable() to pagefault_disable() to preserve the per-cpu thing for kmap_atomic might not have been the best of choices. But short of adding preempt_disable/migrate_disable foo all over the kmap code it still seems the best way. It does however yield the below borkage as well as wreck !-rt builds since !-rt does rely on pagefault_disable() not preempting. So fix all that up by adding raw_pagefault_disable(). [] warn_slowpath_common+0x85/0x9d [] warn_slowpath_fmt+0x46/0x48 [] ? _raw_spin_lock+0x6c/0x73 [] ? watchdog_overflow_callback+0x9b/0xd0 [] watchdog_overflow_callback+0xb7/0xd0 [] __perf_event_overflow+0x11c/0x1fe [] ? perf_event_update_userpage+0x149/0x151 [] ? perf_event_task_disable+0x7c/0x7c [] perf_event_overflow+0x14/0x16 [] x86_pmu_handle_irq+0xcb/0x108 [] perf_event_nmi_handler+0x46/0x91 [] notifier_call_chain+0x79/0xa6 [] __atomic_notifier_call_chain+0x66/0x98 [] ? notifier_call_chain+0xa6/0xa6 [] atomic_notifier_call_chain+0x14/0x16 [] notify_die+0x2e/0x30 [] do_nmi+0x7e/0x22b [] nmi+0x1a/0x2c [] ? sub_preempt_count+0x4b/0xaa <> [] delay_tsc+0xac/0xd1 [] __delay+0xf/0x11 [] do_raw_spin_lock+0xd2/0x13c [] _raw_spin_lock_irqsave+0x6b/0x85 [] ? task_rq_lock+0x35/0x8d [] task_rq_lock+0x35/0x8d [] migrate_disable+0x65/0x12c [] pagefault_disable+0xe/0x1f [] dump_trace+0x21f/0x2e2 [] show_trace_log_lvl+0x54/0x5d [] show_trace+0x15/0x17 [] dump_stack+0x77/0x80 [] spin_bug+0x9c/0xa3 [] ? task_rq_lock+0x50/0x8d [] do_raw_spin_lock+0x47/0x13c [] _raw_spin_lock+0x60/0x73 [] ? task_rq_lock+0x50/0x8d [] task_rq_lock+0x50/0x8d [] migrate_disable+0x65/0x12c [] pagefault_disable+0xe/0x1f [] dump_trace+0x21f/0x2e2 [] save_stack_trace+0x2f/0x4c [] save_trace+0x3f/0xaf [] mark_lock+0x228/0x530 [] __lock_acquire+0x662/0x1812 [] ? native_sched_clock+0x37/0x6d [] ? trace_hardirqs_off_caller+0x1f/0x99 [] ? sched_rt_period_timer+0xbd/0x218 [] lock_acquire+0x145/0x18a [] ? sched_rt_period_timer+0xbd/0x218 [] _raw_spin_lock+0x40/0x73 [] ? sched_rt_period_timer+0xbd/0x218 [] sched_rt_period_timer+0xbd/0x218 [] __run_hrtimer+0x1e4/0x347 [] ? can_migrate_task.clone.82+0x14a/0x14a [] hrtimer_interrupt+0xee/0x1d6 [] ? add_preempt_count+0xae/0xb2 [] smp_apic_timer_interrupt+0x85/0x98 [] apic_timer_interrupt+0x13/0x20 Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-31keae8mkjiv8esq4rl76cib@git.kernel.org --- include/linux/uaccess.h | 30 ++++++++++++++++++++++++++++-- mm/memory.c | 2 ++ 2 files changed, 30 insertions(+), 2 deletions(-) Index: linux-stable/include/linux/uaccess.h =================================================================== --- linux-stable.orig/include/linux/uaccess.h +++ linux-stable/include/linux/uaccess.h @@ -8,8 +8,34 @@ * These routines enable/disable the pagefault handler in that * it will not take any MM locks and go straight to the fixup table. */ +static inline void raw_pagefault_disable(void) +{ + inc_preempt_count(); + barrier(); +} + +static inline void raw_pagefault_enable(void) +{ + barrier(); + dec_preempt_count(); + barrier(); + preempt_check_resched(); +} + +#ifndef CONFIG_PREEMPT_RT_FULL +static inline void pagefault_disable(void) +{ + raw_pagefault_disable(); +} + +static inline void pagefault_enable(void) +{ + raw_pagefault_enable(); +} +#else extern void pagefault_disable(void); extern void pagefault_enable(void); +#endif #ifndef ARCH_HAS_NOCACHE_UACCESS @@ -50,9 +76,9 @@ static inline unsigned long __copy_from_ mm_segment_t old_fs = get_fs(); \ \ set_fs(KERNEL_DS); \ - pagefault_disable(); \ + raw_pagefault_disable(); \ ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ - pagefault_enable(); \ + raw_pagefault_enable(); \ set_fs(old_fs); \ ret; \ }) Index: linux-stable/mm/memory.c =================================================================== --- linux-stable.orig/mm/memory.c +++ linux-stable/mm/memory.c @@ -3484,6 +3484,7 @@ unlock: return 0; } +#ifdef CONFIG_PREEMPT_RT_FULL void pagefault_disable(void) { inc_preempt_count(); @@ -3512,6 +3513,7 @@ void pagefault_enable(void) preempt_check_resched(); } EXPORT_SYMBOL_GPL(pagefault_enable); +#endif /* * By the time we get here, we already hold the mm semaphore