diff options
Diffstat (limited to 'kernel/cpu.c')
-rw-r--r-- | kernel/cpu.c | 240 |
1 files changed, 196 insertions, 44 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 503f248f28291e..c48208b921601d 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -60,16 +60,10 @@ static int cpu_hotplug_disabled; static struct { struct task_struct *active_writer; - /* wait queue to wake up the active_writer */ wait_queue_head_t wq; -#ifdef CONFIG_PREEMPT_RT_FULL - /* Makes the lock keep the task's state */ - spinlock_t lock; -#else /* verifies that no writer will get active while readers are active */ struct mutex lock; -#endif /* * Also blocks the new readers during * an ongoing cpu hotplug operation. @@ -81,27 +75,13 @@ static struct { #endif } cpu_hotplug = { .active_writer = NULL, - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), -#ifdef CONFIG_PREEMPT_RT_FULL - .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock), -#else .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), -#endif + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), #ifdef CONFIG_DEBUG_LOCK_ALLOC .dep_map = {.name = "cpu_hotplug.lock" }, #endif }; -#ifdef CONFIG_PREEMPT_RT_FULL -# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock) -# define hotplug_trylock() rt_spin_trylock(&cpu_hotplug.lock) -# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock) -#else -# define hotplug_lock() mutex_lock(&cpu_hotplug.lock) -# define hotplug_trylock() mutex_trylock(&cpu_hotplug.lock) -# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock) -#endif - /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map) #define cpuhp_lock_acquire_tryread() \ @@ -109,12 +89,42 @@ static struct { #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) +/** + * hotplug_pcp - per cpu hotplug descriptor + * @unplug: set when pin_current_cpu() needs to sync tasks + * @sync_tsk: the task that waits for tasks to finish pinned sections + * @refcount: counter of tasks in pinned sections + * @grab_lock: set when the tasks entering pinned sections should wait + * @synced: notifier for @sync_tsk to tell cpu_down it's finished + * @mutex: the mutex to make tasks wait (used when @grab_lock is true) + * @mutex_init: zero if the mutex hasn't been initialized yet. + * + * Although @unplug and @sync_tsk may point to the same task, the @unplug + * is used as a flag and still exists after @sync_tsk has exited and + * @sync_tsk set to NULL. + */ struct hotplug_pcp { struct task_struct *unplug; + struct task_struct *sync_tsk; int refcount; + int grab_lock; struct completion synced; +#ifdef CONFIG_PREEMPT_RT_FULL + spinlock_t lock; +#else + struct mutex mutex; +#endif + int mutex_init; }; +#ifdef CONFIG_PREEMPT_RT_FULL +# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock) +# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock) +#else +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex) +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex) +#endif + static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); /** @@ -128,18 +138,39 @@ static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); void pin_current_cpu(void) { struct hotplug_pcp *hp; + int force = 0; retry: hp = this_cpu_ptr(&hotplug_pcp); - if (!hp->unplug || hp->refcount || preempt_count() > 1 || + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 || hp->unplug == current) { hp->refcount++; return; } - preempt_enable(); - hotplug_lock(); - hotplug_unlock(); + if (hp->grab_lock) { + preempt_enable(); + hotplug_lock(hp); + hotplug_unlock(hp); + } else { + preempt_enable(); + /* + * Try to push this task off of this CPU. + */ + if (!migrate_me()) { + preempt_disable(); + hp = this_cpu_ptr(&hotplug_pcp); + if (!hp->grab_lock) { + /* + * Just let it continue it's already pinned + * or about to sleep. + */ + force = 1; + goto retry; + } + preempt_enable(); + } + } preempt_disable(); goto retry; } @@ -160,26 +191,84 @@ void unpin_current_cpu(void) wake_up_process(hp->unplug); } -/* - * FIXME: Is this really correct under all circumstances ? - */ +static void wait_for_pinned_cpus(struct hotplug_pcp *hp) +{ + set_current_state(TASK_UNINTERRUPTIBLE); + while (hp->refcount) { + schedule_preempt_disabled(); + set_current_state(TASK_UNINTERRUPTIBLE); + } +} + static int sync_unplug_thread(void *data) { struct hotplug_pcp *hp = data; preempt_disable(); hp->unplug = current; + wait_for_pinned_cpus(hp); + + /* + * This thread will synchronize the cpu_down() with threads + * that have pinned the CPU. When the pinned CPU count reaches + * zero, we inform the cpu_down code to continue to the next step. + */ set_current_state(TASK_UNINTERRUPTIBLE); - while (hp->refcount) { - schedule_preempt_disabled(); + preempt_enable(); + complete(&hp->synced); + + /* + * If all succeeds, the next step will need tasks to wait till + * the CPU is offline before continuing. To do this, the grab_lock + * is set and tasks going into pin_current_cpu() will block on the + * mutex. But we still need to wait for those that are already in + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop() + * will kick this thread out. + */ + while (!hp->grab_lock && !kthread_should_stop()) { + schedule(); + set_current_state(TASK_UNINTERRUPTIBLE); + } + + /* Make sure grab_lock is seen before we see a stale completion */ + smp_mb(); + + /* + * Now just before cpu_down() enters stop machine, we need to make + * sure all tasks that are in pinned CPU sections are out, and new + * tasks will now grab the lock, keeping them from entering pinned + * CPU sections. + */ + if (!kthread_should_stop()) { + preempt_disable(); + wait_for_pinned_cpus(hp); + preempt_enable(); + complete(&hp->synced); + } + + set_current_state(TASK_UNINTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); set_current_state(TASK_UNINTERRUPTIBLE); } set_current_state(TASK_RUNNING); - preempt_enable(); - complete(&hp->synced); + + /* + * Force this thread off this CPU as it's going down and + * we don't want any more work on this CPU. + */ + current->flags &= ~PF_NO_SETAFFINITY; + do_set_cpus_allowed(current, cpu_present_mask); + migrate_me(); return 0; } +static void __cpu_unplug_sync(struct hotplug_pcp *hp) +{ + wake_up_process(hp->sync_tsk); + wait_for_completion(&hp->synced); +} + /* * Start the sync_unplug_thread on the target cpu and wait for it to * complete. @@ -187,23 +276,83 @@ static int sync_unplug_thread(void *data) static int cpu_unplug_begin(unsigned int cpu) { struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); - struct task_struct *tsk; + int err; + + /* Protected by cpu_hotplug.lock */ + if (!hp->mutex_init) { +#ifdef CONFIG_PREEMPT_RT_FULL + spin_lock_init(&hp->lock); +#else + mutex_init(&hp->mutex); +#endif + hp->mutex_init = 1; + } + + /* Inform the scheduler to migrate tasks off this CPU */ + tell_sched_cpu_down_begin(cpu); init_completion(&hp->synced); - tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); - if (IS_ERR(tsk)) - return (PTR_ERR(tsk)); - kthread_bind(tsk, cpu); - wake_up_process(tsk); - wait_for_completion(&hp->synced); + + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); + if (IS_ERR(hp->sync_tsk)) { + err = PTR_ERR(hp->sync_tsk); + hp->sync_tsk = NULL; + return err; + } + kthread_bind(hp->sync_tsk, cpu); + + /* + * Wait for tasks to get out of the pinned sections, + * it's still OK if new tasks enter. Some CPU notifiers will + * wait for tasks that are going to enter these sections and + * we must not have them block. + */ + __cpu_unplug_sync(hp); + return 0; } +static void cpu_unplug_sync(unsigned int cpu) +{ + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); + + init_completion(&hp->synced); + /* The completion needs to be initialzied before setting grab_lock */ + smp_wmb(); + + /* Grab the mutex before setting grab_lock */ + hotplug_lock(hp); + hp->grab_lock = 1; + + /* + * The CPU notifiers have been completed. + * Wait for tasks to get out of pinned CPU sections and have new + * tasks block until the CPU is completely down. + */ + __cpu_unplug_sync(hp); + + /* All done with the sync thread */ + kthread_stop(hp->sync_tsk); + hp->sync_tsk = NULL; +} + static void cpu_unplug_done(unsigned int cpu) { struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); hp->unplug = NULL; + /* Let all tasks know cpu unplug is finished before cleaning up */ + smp_wmb(); + + if (hp->sync_tsk) + kthread_stop(hp->sync_tsk); + + if (hp->grab_lock) { + hotplug_unlock(hp); + /* protected by cpu_hotplug.lock */ + hp->grab_lock = 0; + } + tell_sched_cpu_down_done(cpu); } void get_online_cpus(void) @@ -212,9 +361,9 @@ void get_online_cpus(void) if (cpu_hotplug.active_writer == current) return; cpuhp_lock_acquire_read(); - hotplug_lock(); + mutex_lock(&cpu_hotplug.lock); atomic_inc(&cpu_hotplug.refcount); - hotplug_unlock(); + mutex_unlock(&cpu_hotplug.lock); } EXPORT_SYMBOL_GPL(get_online_cpus); @@ -267,11 +416,11 @@ void cpu_hotplug_begin(void) cpuhp_lock_acquire(); for (;;) { - hotplug_lock(); + mutex_lock(&cpu_hotplug.lock); prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); if (likely(!atomic_read(&cpu_hotplug.refcount))) break; - hotplug_unlock(); + mutex_unlock(&cpu_hotplug.lock); schedule(); } finish_wait(&cpu_hotplug.wq, &wait); @@ -280,7 +429,7 @@ void cpu_hotplug_begin(void) void cpu_hotplug_done(void) { cpu_hotplug.active_writer = NULL; - hotplug_unlock(); + mutex_unlock(&cpu_hotplug.lock); cpuhp_lock_release(); } @@ -516,6 +665,9 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) smpboot_park_threads(cpu); + /* Notifiers are done. Don't let any more tasks pin this CPU. */ + cpu_unplug_sync(cpu); + /* * Prevent irq alloc/free while the dying cpu reorganizes the * interrupt affinities. |