From b17cda7addbd9839befc7af655083a245307656b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 15 Mar 2010 10:10:23 +0100 Subject: [PATCH] sched: _cpu_down(): Don't play with current->cpus_allowed commit bb8eef78087fd2f10bcfca68f794640f7d37592c in tip. _cpu_down() changes the current task's affinity and then recovers it at the end. The problems are well known: we can't restore old_allowed if it was bound to the now-dead-cpu, and we can race with the userspace which can change cpu-affinity during unplug. _cpu_down() should not play with current->cpus_allowed at all. Instead, take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable() removes the dying cpu from cpu_online_mask. [ upstream commit: 6a1bdc1b577ebcb65f6603c57f8347309bc4ab13 ] Signed-off-by: Oleg Nesterov Acked-by: Rafael J. Wysocki Signed-off-by: Peter Zijlstra LKML-Reference: <20100315091023.GA9148@redhat.com> Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Paul Gortmaker --- include/linux/sched.h | 1 + kernel/cpu.c | 18 ++++++------------ kernel/sched.c | 2 +- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2167a13..6d936b6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1970,6 +1970,7 @@ extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); #ifdef CONFIG_HOTPLUG_CPU +extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p); extern void idle_task_exit(void); #else static inline void idle_task_exit(void) {} diff --git a/kernel/cpu.c b/kernel/cpu.c index 25bba73..914aedc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -164,6 +164,7 @@ static inline void check_for_tasks(int cpu) } struct take_cpu_down_param { + struct task_struct *caller; unsigned long mod; void *hcpu; }; @@ -172,6 +173,7 @@ struct take_cpu_down_param { static int __ref take_cpu_down(void *_param) { struct take_cpu_down_param *param = _param; + unsigned int cpu = (unsigned long)param->hcpu; int err; /* Ensure this CPU doesn't handle any more interrupts. */ @@ -182,6 +184,8 @@ static int __ref take_cpu_down(void *_param) raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, param->hcpu); + if (task_cpu(param->caller) == cpu) + move_task_off_dead_cpu(cpu, param->caller); /* Force idle task to run as soon as we yield: it should immediately notice cpu is offline and die quickly. */ sched_idle_next(); @@ -192,10 +196,10 @@ static int __ref take_cpu_down(void *_param) static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) { int err, nr_calls = 0; - cpumask_var_t old_allowed; void *hcpu = (void *)(long)cpu; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; struct take_cpu_down_param tcd_param = { + .caller = current, .mod = mod, .hcpu = hcpu, }; @@ -206,9 +210,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) if (!cpu_online(cpu)) return -EINVAL; - if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL)) - return -ENOMEM; - cpu_hotplug_begin(); set_cpu_active(cpu, false); err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, @@ -225,10 +226,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) goto out_release; } - /* Ensure that we are not runnable on dying cpu */ - cpumask_copy(old_allowed, ¤t->cpus_allowed); - set_cpus_allowed_ptr(current, cpu_active_mask); - err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { set_cpu_active(cpu, true); @@ -237,7 +234,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) hcpu) == NOTIFY_BAD) BUG(); - goto out_allowed; + goto out_release; } BUG_ON(cpu_online(cpu)); @@ -255,8 +252,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) check_for_tasks(cpu); -out_allowed: - set_cpus_allowed_ptr(current, old_allowed); out_release: cpu_hotplug_done(); if (!err) { @@ -264,7 +259,6 @@ out_release: hcpu) == NOTIFY_BAD) BUG(); } - free_cpumask_var(old_allowed); return err; } diff --git a/kernel/sched.c b/kernel/sched.c index ee8b5f5..455c38c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5878,7 +5878,7 @@ static int migration_thread(void *data) /* * Figure out where task on dead CPU should go, use force if necessary. */ -static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) +void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { struct rq *rq = cpu_rq(dead_cpu); int needs_cpu, uninitialized_var(dest_cpu); -- 1.7.0.4