summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2017-08-18 12:29:58 +0200
committerSebastian Andrzej Siewior <bigeasy@linutronix.de>2017-08-18 12:29:58 +0200
commit5a4f22d5ee59f1e83f70b870afcf7f78b3f7ac6c (patch)
tree37d44422d6e455a1b97666ce53b768babfcb3ef0
parentf6122c410207c72c5bd3f9a6e5a722b86b251568 (diff)
download4.12-rt-patches-5a4f22d5ee59f1e83f70b870afcf7f78b3f7ac6c.tar.gz
[ANNOUNCE] v4.11.12-rt10
Dear RT folks! I'm pleased to announce the v4.11.12-rt10 patch set. Changes since v4.11.12-rt9: - A tweak to scheduler to let it know that a task is in a migration disabled region so there are less possible tasks to migrate. Idea and patch by Daniel Bristot de Oliveira. - A fix for the CPU idle code on arm64 was merged in v4.11.9-rt6 and now updated to version which queued for mainline. - hrtimers which fired during a bad window while a shutdown would be postponed for ever and could corrupt the deferred list. Reported by Mike Galbraith. - The new RWLOCK code a flaw in the write-lock path where a task could lose its task state. Reported and fixed by Mike Galbraith. Known issues - There was a report regarding a deadlock within the rtmutex code. The delta patch against v4.11.12-rt9 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/incr/patch-4.11.12-rt9-rt10.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.11.12-rt10 The RT patch against v4.11.12 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patch-4.11.12-rt10.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.12-rt10.tar.xz Sebastian diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -28,8 +28,15 @@ static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls) { int ret; + /* + * __atomic_notifier_call_chain has a RCU read critical section, which + * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let + * RCU know this. + */ + rcu_irq_enter_irqson(); ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, nr_to_call, nr_calls); + rcu_irq_exit_irqson(); return notifier_to_errno(ret); } diff --git a/kernel/locking/rwlock-rt.c b/kernel/locking/rwlock-rt.c --- a/kernel/locking/rwlock-rt.c +++ b/kernel/locking/rwlock-rt.c @@ -190,14 +190,14 @@ void __sched __write_rt_lock(struct rt_rw_lock *lock) /* Force readers into slow path */ atomic_sub(READER_BIAS, &lock->readers); + raw_spin_lock_irqsave(&m->wait_lock, flags); + + raw_spin_lock(&self->pi_lock); + self->saved_state = self->state; + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); + raw_spin_unlock(&self->pi_lock); + for (;;) { - raw_spin_lock_irqsave(&m->wait_lock, flags); - - raw_spin_lock(&self->pi_lock); - self->saved_state = self->state; - __set_current_state_no_track(TASK_UNINTERRUPTIBLE); - raw_spin_unlock(&self->pi_lock); - /* Have all readers left the critical region? */ if (!atomic_read(&lock->readers)) { atomic_set(&lock->readers, WRITER_BIAS); @@ -213,6 +213,12 @@ void __sched __write_rt_lock(struct rt_rw_lock *lock) if (atomic_read(&lock->readers) != 0) schedule(); + + raw_spin_lock_irqsave(&m->wait_lock, flags); + + raw_spin_lock(&self->pi_lock); + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); + raw_spin_unlock(&self->pi_lock); } } diff --git a/kernel/sched/core.c b/kernel/sched/core.c --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7499,6 +7499,47 @@ const u32 sched_prio_to_wmult[40] = { #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) +static inline void +update_nr_migratory(struct task_struct *p, long delta) +{ + if (unlikely((p->sched_class == &rt_sched_class || + p->sched_class == &dl_sched_class) && + p->nr_cpus_allowed > 1)) { + if (p->sched_class == &rt_sched_class) + task_rq(p)->rt.rt_nr_migratory += delta; + else + task_rq(p)->dl.dl_nr_migratory += delta; + } +} + +static inline void +migrate_disable_update_cpus_allowed(struct task_struct *p) +{ + struct rq *rq; + struct rq_flags rf; + + p->cpus_ptr = cpumask_of(smp_processor_id()); + + rq = task_rq_lock(p, &rf); + update_nr_migratory(p, -1); + p->nr_cpus_allowed = 1; + task_rq_unlock(rq, p, &rf); +} + +static inline void +migrate_enable_update_cpus_allowed(struct task_struct *p) +{ + struct rq *rq; + struct rq_flags rf; + + p->cpus_ptr = &p->cpus_mask; + + rq = task_rq_lock(p, &rf); + p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); + update_nr_migratory(p, 1); + task_rq_unlock(rq, p, &rf); +} + void migrate_disable(void) { struct task_struct *p = current; @@ -7524,10 +7565,9 @@ void migrate_disable(void) preempt_disable(); preempt_lazy_disable(); pin_current_cpu(); - p->migrate_disable = 1; - p->cpus_ptr = cpumask_of(smp_processor_id()); - p->nr_cpus_allowed = 1; + migrate_disable_update_cpus_allowed(p); + p->migrate_disable = 1; preempt_enable(); } @@ -7559,9 +7599,8 @@ void migrate_enable(void) preempt_disable(); - p->cpus_ptr = &p->cpus_mask; - p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); p->migrate_disable = 0; + migrate_enable_update_cpus_allowed(p); if (p->migrate_disable_update) { struct rq *rq; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) +#define PU(x) \ + SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x)) #define PN(x) \ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) - P(rt_nr_running); + PU(rt_nr_running); +#ifdef CONFIG_SMP + PU(rt_nr_migratory); +#endif P(rt_throttled); PN(rt_time); PN(rt_runtime); #undef PN +#undef PU #undef P } @@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) struct dl_bw *dl_bw; SEQ_printf(m, "\ndl_rq[%d]:\n", cpu); - SEQ_printf(m, " .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running); + +#define PU(x) \ + SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x)) + + PU(dl_nr_running); #ifdef CONFIG_SMP + PU(dl_nr_migratory); dl_bw = &cpu_rq(cpu)->rd->dl_bw; #else dl_bw = &dl_rq->dl_bw; #endif SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw); SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw); + +#undef PU } extern __read_mostly int sched_clock_running; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1802,6 +1802,11 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, */ enqueue_hrtimer(timer, new_base); } +#ifdef CONFIG_PREEMPT_RT_BASE + list_splice_tail(&old_base->expired, &new_base->expired); + if (!list_empty(&new_base->expired)) + raise_softirq_irqoff(HRTIMER_SOFTIRQ); +#endif } int hrtimers_dead_cpu(unsigned int scpu) diff --git a/localversion-rt b/localversion-rt --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt9 +-rt10 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r--patches/completion-use-simple-wait-queues.patch4
-rw-r--r--patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch64
-rw-r--r--patches/hotplug-light-get-online-cpus.patch8
-rw-r--r--patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch32
-rw-r--r--patches/localversion.patch2
-rw-r--r--patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch51
-rw-r--r--patches/preempt-lazy-support.patch8
-rw-r--r--patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch154
-rw-r--r--patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch83
-rw-r--r--patches/series4
10 files changed, 369 insertions, 41 deletions
diff --git a/patches/completion-use-simple-wait-queues.patch b/patches/completion-use-simple-wait-queues.patch
index 80e8f53b3086af..3ce5296d8d20bd 100644
--- a/patches/completion-use-simple-wait-queues.patch
+++ b/patches/completion-use-simple-wait-queues.patch
@@ -276,7 +276,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
EXPORT_SYMBOL(completion_done);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -7487,7 +7487,10 @@ void migrate_disable(void)
+@@ -7528,7 +7528,10 @@ void migrate_disable(void)
return;
}
#ifdef CONFIG_SCHED_DEBUG
@@ -288,7 +288,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#endif
if (p->migrate_disable) {
-@@ -7518,7 +7521,10 @@ void migrate_enable(void)
+@@ -7558,7 +7561,10 @@ void migrate_enable(void)
}
#ifdef CONFIG_SCHED_DEBUG
diff --git a/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch b/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch
index f24636b8b1f30e..14e5556fc3290c 100644
--- a/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch
+++ b/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch
@@ -1,14 +1,14 @@
From: Alex Shi <alex.shi@linaro.org>
-Date: Thu, 6 Jul 2017 16:47:46 +0800
-Subject: [PATCH] cpu_pm: replace raw_notifier to atomic_notifier
+Date: Fri, 28 Jul 2017 15:09:25 +0800
+Subject: PM / CPU: replace raw_notifier with atomic_notifier
-This patch replace a rwlock and raw notifier by atomic notifier which
-protected by spin_lock and rcu.
+This patch replaces an rwlock and raw notifier by an atomic notifier
+protected by a spin_lock and RCU.
-The first to reason to have this replace is due to a 'scheduling while
- atomic' bug of RT kernel on arm/arm64 platform. On arm/arm64, rwlock
-cpu_pm_notifier_lock in cpu_pm cause a potential schedule after irq
-disable in idle call chain:
+The main reason for this change is due to a 'scheduling while atomic'
+bug with RT kernels on ARM/ARM64. On ARM/ARM64, the rwlock
+cpu_pm_notifier_lock in cpu_pm_enter/exit() causes a potential
+schedule after IRQ disable in the idle call chain:
cpu_startup_entry
cpu_idle_loop
@@ -38,28 +38,24 @@ The kernel panic is here:
Daniel Lezcano said this notification is needed on arm/arm64 platforms.
Sebastian suggested using atomic_notifier instead of rwlock, which is not
-only removing the sleeping in idle, but also getting better latency
-improvement.
+only removing the sleeping in idle, but also improving latency.
-This patch passed Fengguang's 0day testing.
+Tony Lindgren found a miss use that rcu_read_lock used after rcu_idle_enter
+Paul McKenney suggested trying RCU_NONIDLE.
Signed-off-by: Alex Shi <alex.shi@linaro.org>
-Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Anders Roxell <anders.roxell@linaro.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Steven Rostedt <rostedt@goodmis.org>
-Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
-Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
+Tested-by: Tony Lindgren <tony@atomide.com>
+Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+[ rjw: Subject & changelog ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
- kernel/cpu_pm.c | 43 ++++++-------------------------------------
- 1 file changed, 6 insertions(+), 37 deletions(-)
+ kernel/cpu_pm.c | 50 +++++++++++++-------------------------------------
+ 1 file changed, 13 insertions(+), 37 deletions(-)
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
-@@ -22,14 +22,13 @@
+@@ -22,15 +22,21 @@
#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
@@ -72,11 +68,19 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
int ret;
- ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
++ /*
++ * __atomic_notifier_call_chain has a RCU read critical section, which
++ * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
++ * RCU know this.
++ */
++ rcu_irq_enter_irqson();
+ ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
nr_to_call, nr_calls);
++ rcu_irq_exit_irqson();
return notifier_to_errno(ret);
-@@ -47,14 +46,7 @@ static int cpu_pm_notify(enum cpu_pm_eve
+ }
+@@ -47,14 +53,7 @@ static int cpu_pm_notify(enum cpu_pm_eve
*/
int cpu_pm_register_notifier(struct notifier_block *nb)
{
@@ -92,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
-@@ -69,14 +61,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi
+@@ -69,14 +68,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi
*/
int cpu_pm_unregister_notifier(struct notifier_block *nb)
{
@@ -108,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
-@@ -100,7 +85,6 @@ int cpu_pm_enter(void)
+@@ -100,7 +92,6 @@ int cpu_pm_enter(void)
int nr_calls;
int ret = 0;
@@ -116,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls);
if (ret)
/*
-@@ -108,7 +92,6 @@ int cpu_pm_enter(void)
+@@ -108,7 +99,6 @@ int cpu_pm_enter(void)
* PM entry who are notified earlier to prepare for it.
*/
cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL);
@@ -124,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
return ret;
}
-@@ -128,13 +111,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter);
+@@ -128,13 +118,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter);
*/
int cpu_pm_exit(void)
{
@@ -139,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
EXPORT_SYMBOL_GPL(cpu_pm_exit);
-@@ -159,7 +136,6 @@ int cpu_cluster_pm_enter(void)
+@@ -159,7 +143,6 @@ int cpu_cluster_pm_enter(void)
int nr_calls;
int ret = 0;
@@ -147,7 +151,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls);
if (ret)
/*
-@@ -167,7 +143,6 @@ int cpu_cluster_pm_enter(void)
+@@ -167,7 +150,6 @@ int cpu_cluster_pm_enter(void)
* PM entry who are notified earlier to prepare for it.
*/
cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL);
@@ -155,7 +159,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
return ret;
}
-@@ -190,13 +165,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
+@@ -190,13 +172,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
*/
int cpu_cluster_pm_exit(void)
{
diff --git a/patches/hotplug-light-get-online-cpus.patch b/patches/hotplug-light-get-online-cpus.patch
index 5ddb9c5c2b06eb..48a5697ada2d47 100644
--- a/patches/hotplug-light-get-online-cpus.patch
+++ b/patches/hotplug-light-get-online-cpus.patch
@@ -64,15 +64,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
void cpus_read_lock(void)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -7469,6 +7469,7 @@ void migrate_disable(void)
+@@ -7510,6 +7510,7 @@ void migrate_disable(void)
}
preempt_disable();
+ pin_current_cpu();
- p->migrate_disable = 1;
- p->cpus_ptr = cpumask_of(smp_processor_id());
-@@ -7533,12 +7534,15 @@ void migrate_enable(void)
+ migrate_disable_update_cpus_allowed(p);
+ p->migrate_disable = 1;
+@@ -7572,12 +7573,15 @@ void migrate_enable(void)
arg.task = p;
arg.dest_cpu = dest_cpu;
diff --git a/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch b/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch
new file mode 100644
index 00000000000000..67e8c0e37ef343
--- /dev/null
+++ b/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch
@@ -0,0 +1,32 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 18 Aug 2017 10:09:09 +0200
+Subject: [PATCH] kernel/hrtimer: migrate deferred timer on CPU down
+
+hrtimers, which were deferred to the softirq context, and expire between
+softirq shutdown and hrtimer migration are dangling around. If the CPU
+goes back up the list head will be initialized and this corrupts the
+timer's list. It will remain unnoticed until a hrtimer_cancel().
+This moves those timers so they will expire.
+
+Cc: stable-rt@vger.kernel.org
+Reported-by: Mike Galbraith <efault@gmx.de>
+Tested-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/time/hrtimer.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -1748,6 +1748,11 @@ static void migrate_hrtimer_list(struct
+ */
+ enqueue_hrtimer(timer, new_base);
+ }
++#ifdef CONFIG_PREEMPT_RT_BASE
++ list_splice_tail(&old_base->expired, &new_base->expired);
++ if (!list_empty(&new_base->expired))
++ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++#endif
+ }
+
+ int hrtimers_dead_cpu(unsigned int scpu)
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 02952cda4bfa23..e16fb07c0a7d6f 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt9
++-rt10
diff --git a/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch b/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch
new file mode 100644
index 00000000000000..b5f9fc1bb79c10
--- /dev/null
+++ b/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch
@@ -0,0 +1,51 @@
+From: Mike Galbraith <efault@gmx.de>
+Date: Fri, 18 Aug 2017 10:56:14 +0200
+Subject: [PATCH] locking, rwlock-rt: do not save state multiple times in
+ __write_rt_lock()
+
+Save state prior to entering the acquisition loop, otherwise we may
+initially see readers, but upon releasing ->wait_lock see none, loop
+back around, and having not slept, save TASK_UNINTERRUPTIBLE.
+
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rwlock-rt.c | 18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+--- a/kernel/locking/rwlock-rt.c
++++ b/kernel/locking/rwlock-rt.c
+@@ -190,14 +190,14 @@ void __sched __write_rt_lock(struct rt_r
+ /* Force readers into slow path */
+ atomic_sub(READER_BIAS, &lock->readers);
+
+- for (;;) {
+- raw_spin_lock_irqsave(&m->wait_lock, flags);
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
+
+- raw_spin_lock(&self->pi_lock);
+- self->saved_state = self->state;
+- __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+- raw_spin_unlock(&self->pi_lock);
++ raw_spin_lock(&self->pi_lock);
++ self->saved_state = self->state;
++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
++ raw_spin_unlock(&self->pi_lock);
+
++ for (;;) {
+ /* Have all readers left the critical region? */
+ if (!atomic_read(&lock->readers)) {
+ atomic_set(&lock->readers, WRITER_BIAS);
+@@ -213,6 +213,12 @@ void __sched __write_rt_lock(struct rt_r
+
+ if (atomic_read(&lock->readers) != 0)
+ schedule();
++
++ raw_spin_lock_irqsave(&m->wait_lock, flags);
++
++ raw_spin_lock(&self->pi_lock);
++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
++ raw_spin_unlock(&self->pi_lock);
+ }
+ }
+
diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch
index b59fd2ade25886..225838f8b5e8cb 100644
--- a/patches/preempt-lazy-support.patch
+++ b/patches/preempt-lazy-support.patch
@@ -362,15 +362,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* The idle tasks have their own, simple scheduling class:
*/
-@@ -7443,6 +7519,7 @@ void migrate_disable(void)
+@@ -7484,6 +7560,7 @@ void migrate_disable(void)
}
preempt_disable();
+ preempt_lazy_disable();
pin_current_cpu();
- p->migrate_disable = 1;
-@@ -7512,6 +7589,7 @@ void migrate_enable(void)
+ migrate_disable_update_cpus_allowed(p);
+@@ -7551,6 +7628,7 @@ void migrate_enable(void)
arg.dest_cpu = dest_cpu;
unpin_current_cpu();
@@ -378,7 +378,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
preempt_enable();
stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
-@@ -7520,6 +7598,7 @@ void migrate_enable(void)
+@@ -7559,6 +7637,7 @@ void migrate_enable(void)
}
}
unpin_current_cpu();
diff --git a/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch b/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch
new file mode 100644
index 00000000000000..a18dbc5b97fc49
--- /dev/null
+++ b/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch
@@ -0,0 +1,154 @@
+From: Daniel Bristot de Oliveira <bristot@redhat.com>
+Date: Mon, 26 Jun 2017 17:07:15 +0200
+Subject: rt: Increase/decrease the nr of migratory tasks when enabling/disabling migration
+
+There is a problem in the migrate_disable()/enable() implementation
+regarding the number of migratory tasks in the rt/dl RQs. The problem
+is the following:
+
+When a task is attached to the rt runqueue, it is checked if it either
+can run in more than one CPU, or if it is with migration disable. If
+either check is true, the rt_rq->rt_nr_migratory counter is not
+increased. The counter increases otherwise.
+
+When the task is detached, the same check is done. If either check is
+true, the rt_rq->rt_nr_migratory counter is not decreased. The counter
+decreases otherwise. The same check is done in the dl scheduler.
+
+One important thing is that, migrate disable/enable does not touch this
+counter for tasks attached to the rt rq. So suppose the following chain
+of events.
+
+Assumptions:
+Task A is the only runnable task in A Task B runs on the CPU B
+Task A runs on CFS (non-rt) Task B has RT priority
+Thus, rt_nr_migratory is 0 B is running
+Task A can run on all CPUS.
+
+Timeline:
+ CPU A/TASK A CPU B/TASK B
+A takes the rt mutex X .
+A disables migration .
+ . B tries to take the rt mutex X
+ . As it is held by A {
+ . A inherits the rt priority of B
+ . A is dequeued from CFS RQ of CPU A
+ . A is enqueued in the RT RQ of CPU A
+ . As migration is disabled
+ . rt_nr_migratory in A is not increased
+ .
+A enables migration
+A releases the rt mutex X {
+ A returns to its original priority
+ A ask to be dequeued from RT RQ {
+ As migration is now enabled and it can run on all CPUS {
+ rt_nr_migratory should be decreased
+ As rt_nr_migratory is 0, rt_nr_migratory under flows
+ }
+}
+
+This variable is important because it notifies if there are more than one
+runnable & migratory task in the runqueue. If there are more than one
+tasks, the rt_rq is set as overloaded, and then tries to migrate some
+tasks. This rule is important to keep the scheduler working conserving,
+that is, in a system with M CPUs, the M highest priority tasks should be
+running.
+
+As rt_nr_migratory is unsigned, it will become > 0, notifying that the
+RQ is overloaded, activating pushing mechanism without need.
+
+This patch fixes this problem by decreasing/increasing the
+rt/dl_nr_migratory in the migrate disable/enable operations.
+
+Reported-by: Pei Zhang <pezhang@redhat.com>
+Reported-by: Luiz Capitulino <lcapitulino@redhat.com>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
+Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com>
+Cc: Clark Williams <williams@redhat.com>
+Cc: Luiz Capitulino <lcapitulino@redhat.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: LKML <linux-kernel@vger.kernel.org>
+Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/core.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 44 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7449,6 +7449,47 @@ const u32 sched_prio_to_wmult[40] = {
+
+ #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+
++static inline void
++update_nr_migratory(struct task_struct *p, long delta)
++{
++ if (unlikely((p->sched_class == &rt_sched_class ||
++ p->sched_class == &dl_sched_class) &&
++ p->nr_cpus_allowed > 1)) {
++ if (p->sched_class == &rt_sched_class)
++ task_rq(p)->rt.rt_nr_migratory += delta;
++ else
++ task_rq(p)->dl.dl_nr_migratory += delta;
++ }
++}
++
++static inline void
++migrate_disable_update_cpus_allowed(struct task_struct *p)
++{
++ struct rq *rq;
++ struct rq_flags rf;
++
++ p->cpus_ptr = cpumask_of(smp_processor_id());
++
++ rq = task_rq_lock(p, &rf);
++ update_nr_migratory(p, -1);
++ p->nr_cpus_allowed = 1;
++ task_rq_unlock(rq, p, &rf);
++}
++
++static inline void
++migrate_enable_update_cpus_allowed(struct task_struct *p)
++{
++ struct rq *rq;
++ struct rq_flags rf;
++
++ p->cpus_ptr = &p->cpus_mask;
++
++ rq = task_rq_lock(p, &rf);
++ p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
++ update_nr_migratory(p, 1);
++ task_rq_unlock(rq, p, &rf);
++}
++
+ void migrate_disable(void)
+ {
+ struct task_struct *p = current;
+@@ -7469,10 +7510,9 @@ void migrate_disable(void)
+ }
+
+ preempt_disable();
+- p->migrate_disable = 1;
+
+- p->cpus_ptr = cpumask_of(smp_processor_id());
+- p->nr_cpus_allowed = 1;
++ migrate_disable_update_cpus_allowed(p);
++ p->migrate_disable = 1;
+
+ preempt_enable();
+ }
+@@ -7501,9 +7541,8 @@ void migrate_enable(void)
+
+ preempt_disable();
+
+- p->cpus_ptr = &p->cpus_mask;
+- p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
+ p->migrate_disable = 0;
++ migrate_enable_update_cpus_allowed(p);
+
+ if (p->migrate_disable_update) {
+ struct rq *rq;
diff --git a/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch b/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch
new file mode 100644
index 00000000000000..0ca8e397154247
--- /dev/null
+++ b/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch
@@ -0,0 +1,83 @@
+From: Daniel Bristot de Oliveira <bristot@redhat.com>
+Date: Mon, 26 Jun 2017 17:07:14 +0200
+Subject: sched/debug: Inform the number of rt/dl task that can migrate
+
+Add the value of the rt_rq.rt_nr_migratory and dl_rq.dl_nr_migratory
+to the sched_debug output, for instance:
+
+rt_rq[0]:
+ .rt_nr_running : 2
+ .rt_nr_migratory : 1 <--- Like this
+ .rt_throttled : 0
+ .rt_time : 828.645877
+ .rt_runtime : 1000.000000
+
+This is useful to debug problems related to the dl/rt schedulers.
+
+This also fixes the format of some variables, that were unsigned, rather
+than signed.
+
+Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
+Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com>
+Cc: Clark Williams <williams@redhat.com>
+Cc: Luiz Capitulino <lcapitulino@redhat.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: LKML <linux-kernel@vger.kernel.org>
+Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/debug.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int
+
+ #define P(x) \
+ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
++#define PU(x) \
++ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
+ #define PN(x) \
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
+
+- P(rt_nr_running);
++ PU(rt_nr_running);
++#ifdef CONFIG_SMP
++ PU(rt_nr_migratory);
++#endif
+ P(rt_throttled);
+ PN(rt_time);
+ PN(rt_runtime);
+
+ #undef PN
++#undef PU
+ #undef P
+ }
+
+@@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int
+ struct dl_bw *dl_bw;
+
+ SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
+- SEQ_printf(m, " .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
++
++#define PU(x) \
++ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
++
++ PU(dl_nr_running);
+ #ifdef CONFIG_SMP
++ PU(dl_nr_migratory);
+ dl_bw = &cpu_rq(cpu)->rd->dl_bw;
+ #else
+ dl_bw = &dl_rq->dl_bw;
+ #endif
+ SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
+ SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
++
++#undef PU
+ }
+
+ extern __read_mostly int sched_clock_running;
diff --git a/patches/series b/patches/series
index 9f369c6e42b20e..813e43113cff40 100644
--- a/patches/series
+++ b/patches/series
@@ -395,6 +395,7 @@ sched-deadline-dl_task_timer-has-to-be-irqsafe.patch
timer-fd-avoid-live-lock.patch
tick-broadcast--Make-hrtimer-irqsafe.patch
timer-hrtimer-check-properly-for-a-running-timer.patch
+kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch
# POSIX-CPU-TIMERS
posix-timers-thread-posix-cpu-timers-on-rt.patch
@@ -414,6 +415,8 @@ sched-disable-ttwu-queue.patch
sched-disable-rt-group-sched-on-rt.patch
sched-ttwu-ensure-success-return-is-correct.patch
sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch
+sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch
+rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch
# STOP MACHINE
stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch
@@ -477,6 +480,7 @@ locking-rtmutex--Make-inner-working-of-rt_spin_slow_lock---accessible.patch
locking-rt-rwlock--Provide-reader-biased-rwlock-for-RT.patch
locking-rt-rwlock--Make-reader-biased-rwlocks-selectable.patch
rt-locking--Consolidate-rwlock-variants.patch
+locking-rwlock-rt-do-not-save-state-multiple-times-i.patch
# RCU
peter_zijlstra-frob-rcu.patch