From: Zwane Mwaikambo Introduce cpu_idle_wait() on architectures requiring modification of pm_idle from modules, this will ensure that all processors have updated their cached values of pm_idle upon exit. This patch is to address the bug report at http://bugme.osdl.org/show_bug.cgi?id=1716 and replaces the current code fix which is in violation of normal RCU usage as pointed out by Stephen, Dipankar and Paul. Signed-off-by: Zwane Mwaikambo Signed-off-by: Andrew Morton --- 25-akpm/arch/i386/kernel/apm.c | 2 +- 25-akpm/arch/i386/kernel/process.c | 30 +++++++++++++++++++++++------- 25-akpm/arch/ia64/kernel/process.c | 30 +++++++++++++++++++++++------- 25-akpm/arch/x86_64/kernel/process.c | 31 ++++++++++++++++++++++++------- 25-akpm/drivers/acpi/processor.c | 2 +- 25-akpm/include/asm-i386/system.h | 1 + 25-akpm/include/asm-ia64/system.h | 1 + 25-akpm/include/asm-x86_64/system.h | 2 ++ 8 files changed, 76 insertions(+), 23 deletions(-) diff -puN arch/i386/kernel/apm.c~remove-rcu-abuse-in-cpu_idle arch/i386/kernel/apm.c --- 25/arch/i386/kernel/apm.c~remove-rcu-abuse-in-cpu_idle 2004-12-11 22:29:47.151927368 -0800 +++ 25-akpm/arch/i386/kernel/apm.c 2004-12-11 22:29:47.167924936 -0800 @@ -2369,7 +2369,7 @@ static void __exit apm_exit(void) * (pm_idle), Wait for all processors to update cached/local * copies of pm_idle before proceeding. */ - synchronize_kernel(); + cpu_idle_wait(); } if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) && (apm_info.connection_version > 0x0100)) { diff -puN arch/i386/kernel/process.c~remove-rcu-abuse-in-cpu_idle arch/i386/kernel/process.c --- 25/arch/i386/kernel/process.c~remove-rcu-abuse-in-cpu_idle 2004-12-11 22:29:47.153927064 -0800 +++ 25-akpm/arch/i386/kernel/process.c 2004-12-11 22:29:47.168924784 -0800 @@ -72,6 +72,7 @@ unsigned long thread_saved_pc(struct tas * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +static cpumask_t cpu_idle_map; void disable_hlt(void) { @@ -144,16 +145,16 @@ static void poll_idle (void) */ void cpu_idle (void) { + int cpu = smp_processor_id(); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { void (*idle)(void); - /* - * Mark this as an RCU critical section so that - * synchronize_kernel() in the unload path waits - * for our completion. - */ - rcu_read_lock(); + + if (cpu_isset(cpu, cpu_idle_map)) + cpu_clear(cpu, cpu_idle_map); + rmb(); idle = pm_idle; if (!idle) @@ -161,12 +162,27 @@ void cpu_idle (void) irq_stat[smp_processor_id()].idle_timestamp = jiffies; idle(); - rcu_read_unlock(); } schedule(); } } +void cpu_idle_wait(void) +{ + int cpu; + cpumask_t map; + + for_each_online_cpu(cpu) + cpu_set(cpu, cpu_idle_map); + + wmb(); + do { + schedule_timeout(HZ); + cpus_and(map, cpu_idle_map, cpu_online_map); + } while (!cpus_empty(map)); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + /* * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, * which can obviate IPI to trigger checking of need_resched. diff -puN arch/ia64/kernel/process.c~remove-rcu-abuse-in-cpu_idle arch/ia64/kernel/process.c --- 25/arch/ia64/kernel/process.c~remove-rcu-abuse-in-cpu_idle 2004-12-11 22:29:47.154926912 -0800 +++ 25-akpm/arch/ia64/kernel/process.c 2004-12-11 22:29:47.169924632 -0800 @@ -46,6 +46,7 @@ #include "sigframe.h" void (*ia64_mark_idle)(int); +static cpumask_t cpu_idle_map; unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); @@ -225,10 +226,28 @@ static inline void play_dead(void) } #endif /* CONFIG_HOTPLUG_CPU */ + +void cpu_idle_wait(void) +{ + int cpu; + cpumask_t map; + + for_each_online_cpu(cpu) + cpu_set(cpu, cpu_idle_map); + + wmb(); + do { + schedule_timeout(HZ); + cpus_and(map, cpu_idle_map, cpu_online_map); + } while (!cpus_empty(map)); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + void __attribute__((noreturn)) cpu_idle (void *unused) { void (*mark_idle)(int) = ia64_mark_idle; + int cpu = smp_processor_id(); /* endless idle loop with no priority at all */ while (1) { @@ -241,17 +260,14 @@ cpu_idle (void *unused) if (mark_idle) (*mark_idle)(1); - /* - * Mark this as an RCU critical section so that - * synchronize_kernel() in the unload path waits - * for our completion. - */ - rcu_read_lock(); + + if (cpu_isset(cpu, cpu_idle_map)) + cpu_clear(cpu, cpu_idle_map); + rmb(); idle = pm_idle; if (!idle) idle = default_idle; (*idle)(); - rcu_read_unlock(); } if (mark_idle) diff -puN arch/x86_64/kernel/process.c~remove-rcu-abuse-in-cpu_idle arch/x86_64/kernel/process.c --- 25/arch/x86_64/kernel/process.c~remove-rcu-abuse-in-cpu_idle 2004-12-11 22:29:47.156926608 -0800 +++ 25-akpm/arch/x86_64/kernel/process.c 2004-12-11 22:29:47.170924480 -0800 @@ -61,6 +61,7 @@ EXPORT_SYMBOL(boot_option_idle_override) * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +static cpumask_t cpu_idle_map; void disable_hlt(void) { @@ -123,6 +124,23 @@ static void poll_idle (void) } } + +void cpu_idle_wait(void) +{ + int cpu; + cpumask_t map; + + for_each_online_cpu(cpu) + cpu_set(cpu, cpu_idle_map); + + wmb(); + do { + schedule_timeout(HZ); + cpus_and(map, cpu_idle_map, cpu_online_map); + } while (!cpus_empty(map)); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a @@ -131,21 +149,20 @@ static void poll_idle (void) */ void cpu_idle (void) { + int cpu = smp_processor_id(); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { void (*idle)(void); - /* - * Mark this as an RCU critical section so that - * synchronize_kernel() in the unload path waits - * for our completion. - */ - rcu_read_lock(); + + if (cpu_isset(cpu, cpu_idle_map)) + cpu_clear(cpu, cpu_idle_map); + rmb(); idle = pm_idle; if (!idle) idle = default_idle; idle(); - rcu_read_unlock(); } schedule(); } diff -puN drivers/acpi/processor.c~remove-rcu-abuse-in-cpu_idle drivers/acpi/processor.c --- 25/drivers/acpi/processor.c~remove-rcu-abuse-in-cpu_idle 2004-12-11 22:29:47.158926304 -0800 +++ 25-akpm/drivers/acpi/processor.c 2004-12-11 22:29:47.172924176 -0800 @@ -2578,7 +2578,7 @@ acpi_processor_remove ( * (pm_idle), Wait for all processors to update cached/local * copies of pm_idle before proceeding. */ - synchronize_kernel(); + cpu_idle_wait(); } status = acpi_remove_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY, diff -puN include/asm-i386/system.h~remove-rcu-abuse-in-cpu_idle include/asm-i386/system.h --- 25/include/asm-i386/system.h~remove-rcu-abuse-in-cpu_idle 2004-12-11 22:29:47.159926152 -0800 +++ 25-akpm/include/asm-i386/system.h 2004-12-11 22:29:47.173924024 -0800 @@ -543,5 +543,6 @@ void disable_hlt(void); void enable_hlt(void); extern int es7000_plat; +void cpu_idle_wait(void); #endif diff -puN include/asm-ia64/system.h~remove-rcu-abuse-in-cpu_idle include/asm-ia64/system.h --- 25/include/asm-ia64/system.h~remove-rcu-abuse-in-cpu_idle 2004-12-11 22:29:47.160926000 -0800 +++ 25-akpm/include/asm-ia64/system.h 2004-12-11 22:29:47.173924024 -0800 @@ -284,6 +284,7 @@ do { \ #define ia64_platform_is(x) (strcmp(x, platform_name) == 0) +void cpu_idle_wait(void); #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff -puN include/asm-x86_64/system.h~remove-rcu-abuse-in-cpu_idle include/asm-x86_64/system.h --- 25/include/asm-x86_64/system.h~remove-rcu-abuse-in-cpu_idle 2004-12-11 22:29:47.162925696 -0800 +++ 25-akpm/include/asm-x86_64/system.h 2004-12-11 22:29:47.174923872 -0800 @@ -326,6 +326,8 @@ static inline unsigned long __cmpxchg(vo /* For spinlocks etc */ #define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) +void cpu_idle_wait(void); + /* * disable hlt during certain critical i/o operations */ _