[ANNOUNCE] v4.11.12-rt10

Dear RT folks! I'm pleased to announce the v4.11.12-rt10 patch set. Changes since v4.11.12-rt9: - A tweak to scheduler to let it know that a task is in a migration disabled region so there are less possible tasks to migrate. Idea and patch by Daniel Bristot de Oliveira. - A fix for the CPU idle code on arm64 was merged in v4.11.9-rt6 and now updated to version which queued for mainline. - hrtimers which fired during a bad window while a shutdown would be postponed for ever and could corrupt the deferred list. Reported by Mike Galbraith. - The new RWLOCK code a flaw in the write-lock path where a task could lose its task state. Reported and fixed by Mike Galbraith. Known issues - There was a report regarding a deadlock within the rtmutex code. The delta patch against v4.11.12-rt9 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/incr/patch-4.11.12-rt9-rt10.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.11.12-rt10 The RT patch against v4.11.12 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patch-4.11.12-rt10.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.12-rt10.tar.xz Sebastian diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -28,8 +28,15 @@ static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls) { int ret; + /* + * __atomic_notifier_call_chain has a RCU read critical section, which + * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let + * RCU know this. + */ + rcu_irq_enter_irqson(); ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, nr_to_call, nr_calls); + rcu_irq_exit_irqson(); return notifier_to_errno(ret); } diff --git a/kernel/locking/rwlock-rt.c b/kernel/locking/rwlock-rt.c --- a/kernel/locking/rwlock-rt.c +++ b/kernel/locking/rwlock-rt.c @@ -190,14 +190,14 @@ void __sched __write_rt_lock(struct rt_rw_lock *lock) /* Force readers into slow path */ atomic_sub(READER_BIAS, &lock->readers); + raw_spin_lock_irqsave(&m->wait_lock, flags); + + raw_spin_lock(&self->pi_lock); + self->saved_state = self->state; + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); + raw_spin_unlock(&self->pi_lock); + for (;;) { - raw_spin_lock_irqsave(&m->wait_lock, flags); - - raw_spin_lock(&self->pi_lock); - self->saved_state = self->state; - __set_current_state_no_track(TASK_UNINTERRUPTIBLE); - raw_spin_unlock(&self->pi_lock); - /* Have all readers left the critical region? */ if (!atomic_read(&lock->readers)) { atomic_set(&lock->readers, WRITER_BIAS); @@ -213,6 +213,12 @@ void __sched __write_rt_lock(struct rt_rw_lock *lock) if (atomic_read(&lock->readers) != 0) schedule(); + + raw_spin_lock_irqsave(&m->wait_lock, flags); + + raw_spin_lock(&self->pi_lock); + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); + raw_spin_unlock(&self->pi_lock); } } diff --git a/kernel/sched/core.c b/kernel/sched/core.c --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7499,6 +7499,47 @@ const u32 sched_prio_to_wmult[40] = { #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) +static inline void +update_nr_migratory(struct task_struct *p, long delta) +{ + if (unlikely((p->sched_class == &rt_sched_class || + p->sched_class == &dl_sched_class) && + p->nr_cpus_allowed > 1)) { + if (p->sched_class == &rt_sched_class) + task_rq(p)->rt.rt_nr_migratory += delta; + else + task_rq(p)->dl.dl_nr_migratory += delta; + } +} + +static inline void +migrate_disable_update_cpus_allowed(struct task_struct *p) +{ + struct rq *rq; + struct rq_flags rf; + + p->cpus_ptr = cpumask_of(smp_processor_id()); + + rq = task_rq_lock(p, &rf); + update_nr_migratory(p, -1); + p->nr_cpus_allowed = 1; + task_rq_unlock(rq, p, &rf); +} + +static inline void +migrate_enable_update_cpus_allowed(struct task_struct *p) +{ + struct rq *rq; + struct rq_flags rf; + + p->cpus_ptr = &p->cpus_mask; + + rq = task_rq_lock(p, &rf); + p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); + update_nr_migratory(p, 1); + task_rq_unlock(rq, p, &rf); +} + void migrate_disable(void) { struct task_struct *p = current; @@ -7524,10 +7565,9 @@ void migrate_disable(void) preempt_disable(); preempt_lazy_disable(); pin_current_cpu(); - p->migrate_disable = 1; - p->cpus_ptr = cpumask_of(smp_processor_id()); - p->nr_cpus_allowed = 1; + migrate_disable_update_cpus_allowed(p); + p->migrate_disable = 1; preempt_enable(); } @@ -7559,9 +7599,8 @@ void migrate_enable(void) preempt_disable(); - p->cpus_ptr = &p->cpus_mask; - p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); p->migrate_disable = 0; + migrate_enable_update_cpus_allowed(p); if (p->migrate_disable_update) { struct rq *rq; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) +#define PU(x) \ + SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x)) #define PN(x) \ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) - P(rt_nr_running); + PU(rt_nr_running); +#ifdef CONFIG_SMP + PU(rt_nr_migratory); +#endif P(rt_throttled); PN(rt_time); PN(rt_runtime); #undef PN +#undef PU #undef P } @@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) struct dl_bw *dl_bw; SEQ_printf(m, "\ndl_rq[%d]:\n", cpu); - SEQ_printf(m, " .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running); + +#define PU(x) \ + SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x)) + + PU(dl_nr_running); #ifdef CONFIG_SMP + PU(dl_nr_migratory); dl_bw = &cpu_rq(cpu)->rd->dl_bw; #else dl_bw = &dl_rq->dl_bw; #endif SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw); SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw); + +#undef PU } extern __read_mostly int sched_clock_running; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1802,6 +1802,11 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, */ enqueue_hrtimer(timer, new_base); } +#ifdef CONFIG_PREEMPT_RT_BASE + list_splice_tail(&old_base->expired, &new_base->expired); + if (!list_empty(&new_base->expired)) + raise_softirq_irqoff(HRTIMER_SOFTIRQ); +#endif } int hrtimers_dead_cpu(unsigned int scpu) diff --git a/localversion-rt b/localversion-rt --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt9 +-rt10 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 2017-08-18 12:29:58 +0200
committer: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 2017-08-18 12:29:58 +0200
commit: 5a4f22d5ee59f1e83f70b870afcf7f78b3f7ac6c (patch)
tree: 37d44422d6e455a1b97666ce53b768babfcb3ef0
parent: f6122c410207c72c5bd3f9a6e5a722b86b251568 (diff)
download: 4.12-rt-patches-5a4f22d5ee59f1e83f70b870afcf7f78b3f7ac6c.tar.gz
10 files changed, 369 insertions, 41 deletions
diff --git a/patches/completion-use-simple-wait-queues.patch b/patches/completion-use-simple-wait-queues.patch
index 80e8f53b3086af..3ce5296d8d20bd 100644
--- a/patches/completion-use-simple-wait-queues.patch
+++ b/patches/completion-use-simple-wait-queues.patch
@@ -276,7 +276,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  EXPORT_SYMBOL(completion_done);
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -7487,7 +7487,10 @@ void migrate_disable(void)
+@@ -7528,7 +7528,10 @@ void migrate_disable(void)
  		return;
  	}
  #ifdef CONFIG_SCHED_DEBUG
@@ -288,7 +288,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  #endif
  
  	if (p->migrate_disable) {
-@@ -7518,7 +7521,10 @@ void migrate_enable(void)
+@@ -7558,7 +7561,10 @@ void migrate_enable(void)
  	}
  
  #ifdef CONFIG_SCHED_DEBUG
diff --git a/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch b/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch
index f24636b8b1f30e..14e5556fc3290c 100644
--- a/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch
+++ b/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch
@@ -1,14 +1,14 @@
 From: Alex Shi <alex.shi@linaro.org>
-Date: Thu, 6 Jul 2017 16:47:46 +0800
-Subject: [PATCH] cpu_pm: replace raw_notifier to atomic_notifier
+Date: Fri, 28 Jul 2017 15:09:25 +0800
+Subject: PM / CPU: replace raw_notifier with atomic_notifier
 
-This patch replace a rwlock and raw notifier by atomic notifier which
-protected by spin_lock and rcu.
+This patch replaces an rwlock and raw notifier by an atomic notifier
+protected by a spin_lock and RCU.
 
-The first to reason to have this replace is due to a 'scheduling while
- atomic' bug of RT kernel on arm/arm64 platform. On arm/arm64, rwlock
-cpu_pm_notifier_lock in cpu_pm cause a potential schedule after irq
-disable in idle call chain:
+The main reason for this change is due to a 'scheduling while atomic'
+bug with RT kernels on ARM/ARM64. On ARM/ARM64, the rwlock
+cpu_pm_notifier_lock in cpu_pm_enter/exit() causes a potential
+schedule after IRQ disable in the idle call chain:
 
 cpu_startup_entry
   cpu_idle_loop
@@ -38,28 +38,24 @@ The kernel panic is here:
 
 Daniel Lezcano said this notification is needed on arm/arm64 platforms.
 Sebastian suggested using atomic_notifier instead of rwlock, which is not
-only removing the sleeping in idle, but also getting better latency
-improvement.
+only removing the sleeping in idle, but also improving latency.
 
-This patch passed Fengguang's 0day testing.
+Tony Lindgren found a miss use that rcu_read_lock used after rcu_idle_enter
+Paul McKenney suggested trying RCU_NONIDLE.
 
 Signed-off-by: Alex Shi <alex.shi@linaro.org>
-Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: Anders Roxell <anders.roxell@linaro.org>
-Cc: Rik van Riel <riel@redhat.com>
-Cc: Steven Rostedt <rostedt@goodmis.org>
-Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
-Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
+Tested-by: Tony Lindgren <tony@atomide.com>
+Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+[ rjw: Subject & changelog ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 ---
- kernel/cpu_pm.c |   43 ++++++-------------------------------------
- 1 file changed, 6 insertions(+), 37 deletions(-)
+ kernel/cpu_pm.c |   50 +++++++++++++-------------------------------------
+ 1 file changed, 13 insertions(+), 37 deletions(-)
 
 --- a/kernel/cpu_pm.c
 +++ b/kernel/cpu_pm.c
-@@ -22,14 +22,13 @@
+@@ -22,15 +22,21 @@
  #include <linux/spinlock.h>
  #include <linux/syscore_ops.h>
  
@@ -72,11 +68,19 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  	int ret;
  
 -	ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
++	/*
++	 * __atomic_notifier_call_chain has a RCU read critical section, which
++	 * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
++	 * RCU know this.
++	 */
++	rcu_irq_enter_irqson();
 +	ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
  		nr_to_call, nr_calls);
++	rcu_irq_exit_irqson();
  
  	return notifier_to_errno(ret);
-@@ -47,14 +46,7 @@ static int cpu_pm_notify(enum cpu_pm_eve
+ }
+@@ -47,14 +53,7 @@ static int cpu_pm_notify(enum cpu_pm_eve
   */
  int cpu_pm_register_notifier(struct notifier_block *nb)
  {
@@ -92,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  }
  EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
  
-@@ -69,14 +61,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi
+@@ -69,14 +68,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi
   */
  int cpu_pm_unregister_notifier(struct notifier_block *nb)
  {
@@ -108,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  }
  EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
  
-@@ -100,7 +85,6 @@ int cpu_pm_enter(void)
+@@ -100,7 +92,6 @@ int cpu_pm_enter(void)
  	int nr_calls;
  	int ret = 0;
  
@@ -116,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  	ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls);
  	if (ret)
  		/*
-@@ -108,7 +92,6 @@ int cpu_pm_enter(void)
+@@ -108,7 +99,6 @@ int cpu_pm_enter(void)
  		 * PM entry who are notified earlier to prepare for it.
  		 */
  		cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL);
@@ -124,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
  	return ret;
  }
-@@ -128,13 +111,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter);
+@@ -128,13 +118,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter);
   */
  int cpu_pm_exit(void)
  {
@@ -139,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  }
  EXPORT_SYMBOL_GPL(cpu_pm_exit);
  
-@@ -159,7 +136,6 @@ int cpu_cluster_pm_enter(void)
+@@ -159,7 +143,6 @@ int cpu_cluster_pm_enter(void)
  	int nr_calls;
  	int ret = 0;
  
@@ -147,7 +151,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  	ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls);
  	if (ret)
  		/*
-@@ -167,7 +143,6 @@ int cpu_cluster_pm_enter(void)
+@@ -167,7 +150,6 @@ int cpu_cluster_pm_enter(void)
  		 * PM entry who are notified earlier to prepare for it.
  		 */
  		cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL);
@@ -155,7 +159,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
  	return ret;
  }
-@@ -190,13 +165,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
+@@ -190,13 +172,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
   */
  int cpu_cluster_pm_exit(void)
  {
diff --git a/patches/hotplug-light-get-online-cpus.patch b/patches/hotplug-light-get-online-cpus.patch
index 5ddb9c5c2b06eb..48a5697ada2d47 100644
--- a/patches/hotplug-light-get-online-cpus.patch
+++ b/patches/hotplug-light-get-online-cpus.patch
@@ -64,15 +64,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  void cpus_read_lock(void)
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -7469,6 +7469,7 @@ void migrate_disable(void)
+@@ -7510,6 +7510,7 @@ void migrate_disable(void)
  	}
  
  	preempt_disable();
 +	pin_current_cpu();
- 	p->migrate_disable = 1;
  
- 	p->cpus_ptr = cpumask_of(smp_processor_id());
-@@ -7533,12 +7534,15 @@ void migrate_enable(void)
+ 	migrate_disable_update_cpus_allowed(p);
+ 	p->migrate_disable = 1;
+@@ -7572,12 +7573,15 @@ void migrate_enable(void)
  			arg.task = p;
  			arg.dest_cpu = dest_cpu;
  
diff --git a/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch b/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch
new file mode 100644
index 00000000000000..67e8c0e37ef343
--- /dev/null
+++ b/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch
@@ -0,0 +1,32 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 18 Aug 2017 10:09:09 +0200
+Subject: [PATCH] kernel/hrtimer: migrate deferred timer on CPU down
+
+hrtimers, which were deferred to the softirq context, and expire between
+softirq shutdown and hrtimer migration are dangling around. If the CPU
+goes back up the list head will be initialized and this corrupts the
+timer's list. It will remain unnoticed until a hrtimer_cancel().
+This moves those timers so they will expire.
+
+Cc: stable-rt@vger.kernel.org
+Reported-by: Mike Galbraith <efault@gmx.de>
+Tested-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/time/hrtimer.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -1748,6 +1748,11 @@ static void migrate_hrtimer_list(struct
+ 		 */
+ 		enqueue_hrtimer(timer, new_base);
+ 	}
++#ifdef CONFIG_PREEMPT_RT_BASE
++	list_splice_tail(&old_base->expired, &new_base->expired);
++	if (!list_empty(&new_base->expired))
++		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++#endif
+ }
+ 
+ int hrtimers_dead_cpu(unsigned int scpu)
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 02952cda4bfa23..e16fb07c0a7d6f 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 --- /dev/null
 +++ b/localversion-rt
 @@ -0,0 +1 @@
-+-rt9
++-rt10
diff --git a/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch b/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch
new file mode 100644
index 00000000000000..b5f9fc1bb79c10
--- /dev/null
+++ b/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch
@@ -0,0 +1,51 @@
+From: Mike Galbraith <efault@gmx.de>
+Date: Fri, 18 Aug 2017 10:56:14 +0200
+Subject: [PATCH] locking, rwlock-rt: do not save state multiple times in
+ __write_rt_lock()
+
+Save state prior to entering the acquisition loop, otherwise we may
+initially see readers, but upon releasing ->wait_lock see none, loop
+back around, and having not slept, save TASK_UNINTERRUPTIBLE.
+
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rwlock-rt.c |   18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+--- a/kernel/locking/rwlock-rt.c
++++ b/kernel/locking/rwlock-rt.c
+@@ -190,14 +190,14 @@ void __sched __write_rt_lock(struct rt_r
+ 	/* Force readers into slow path */
+ 	atomic_sub(READER_BIAS, &lock->readers);
+ 
+-	for (;;) {
+-		raw_spin_lock_irqsave(&m->wait_lock, flags);
++	raw_spin_lock_irqsave(&m->wait_lock, flags);
+ 
+-		raw_spin_lock(&self->pi_lock);
+-		self->saved_state = self->state;
+-		__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+-		raw_spin_unlock(&self->pi_lock);
++	raw_spin_lock(&self->pi_lock);
++	self->saved_state = self->state;
++	__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
++	raw_spin_unlock(&self->pi_lock);
+ 
++	for (;;) {
+ 		/* Have all readers left the critical region? */
+ 		if (!atomic_read(&lock->readers)) {
+ 			atomic_set(&lock->readers, WRITER_BIAS);
+@@ -213,6 +213,12 @@ void __sched __write_rt_lock(struct rt_r
+ 
+ 		if (atomic_read(&lock->readers) != 0)
+ 			schedule();
++
++		raw_spin_lock_irqsave(&m->wait_lock, flags);
++
++		raw_spin_lock(&self->pi_lock);
++		__set_current_state_no_track(TASK_UNINTERRUPTIBLE);
++		raw_spin_unlock(&self->pi_lock);
+ 	}
+ }
+ 
diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch
index b59fd2ade25886..225838f8b5e8cb 100644
--- a/patches/preempt-lazy-support.patch
+++ b/patches/preempt-lazy-support.patch
@@ -362,15 +362,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	/*
  	 * The idle tasks have their own, simple scheduling class:
  	 */
-@@ -7443,6 +7519,7 @@ void migrate_disable(void)
+@@ -7484,6 +7560,7 @@ void migrate_disable(void)
  	}
  
  	preempt_disable();
 +	preempt_lazy_disable();
  	pin_current_cpu();
- 	p->migrate_disable = 1;
  
-@@ -7512,6 +7589,7 @@ void migrate_enable(void)
+ 	migrate_disable_update_cpus_allowed(p);
+@@ -7551,6 +7628,7 @@ void migrate_enable(void)
  			arg.dest_cpu = dest_cpu;
  
  			unpin_current_cpu();
@@ -378,7 +378,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  			preempt_enable();
  			stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
  			tlb_migrate_finish(p->mm);
-@@ -7520,6 +7598,7 @@ void migrate_enable(void)
+@@ -7559,6 +7637,7 @@ void migrate_enable(void)
  		}
  	}
  	unpin_current_cpu();
diff --git a/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch b/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch
new file mode 100644
index 00000000000000..a18dbc5b97fc49
--- /dev/null
+++ b/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch
@@ -0,0 +1,154 @@
+From: Daniel Bristot de Oliveira <bristot@redhat.com>
+Date: Mon, 26 Jun 2017 17:07:15 +0200
+Subject: rt: Increase/decrease the nr of migratory tasks when enabling/disabling migration
+
+There is a problem in the migrate_disable()/enable() implementation
+regarding the number of migratory tasks in the rt/dl RQs. The problem
+is the following:
+
+When a task is attached to the rt runqueue, it is checked if it either
+can run in more than one CPU, or if it is with migration disable. If
+either check is true, the rt_rq->rt_nr_migratory counter is not
+increased. The counter increases otherwise.
+
+When the task is detached, the same check is done. If either check is
+true, the rt_rq->rt_nr_migratory counter is not decreased. The counter
+decreases otherwise. The same check is done in the dl scheduler.
+
+One important thing is that, migrate disable/enable does not touch this
+counter for tasks attached to the rt rq. So suppose the following chain
+of events.
+
+Assumptions:
+Task A is the only runnable task in A      Task B runs on the CPU B
+Task A runs on CFS (non-rt)                Task B has RT priority
+Thus, rt_nr_migratory is 0                 B is running
+Task A can run on all CPUS.
+
+Timeline:
+        CPU A/TASK A                        CPU B/TASK B
+A takes the rt mutex X                           .
+A disables migration                             .
+           .                          B tries to take the rt mutex X
+           .                          As it is held by A {
+           .                            A inherits the rt priority of B
+           .                            A is dequeued from CFS RQ of CPU A
+           .                            A is enqueued in the RT RQ of CPU A
+           .                            As migration is disabled
+           .                              rt_nr_migratory in A is not increased
+           .
+A enables migration
+A releases the rt mutex X {
+  A returns to its original priority
+  A ask to be dequeued from RT RQ {
+    As migration is now enabled and it can run on all CPUS {
+       rt_nr_migratory should be decreased
+       As rt_nr_migratory is 0, rt_nr_migratory under flows
+    }
+}
+
+This variable is important because it notifies if there are more than one
+runnable & migratory task in the runqueue. If there are more than one
+tasks, the rt_rq is set as overloaded, and then tries to migrate some
+tasks. This rule is important to keep the scheduler working conserving,
+that is, in a system with M CPUs, the M highest priority tasks should be
+running.
+
+As rt_nr_migratory is unsigned, it will become > 0, notifying that the
+RQ is overloaded, activating pushing mechanism without need.
+
+This patch fixes this problem by decreasing/increasing the
+rt/dl_nr_migratory in the migrate disable/enable operations.
+
+Reported-by: Pei Zhang <pezhang@redhat.com>
+Reported-by: Luiz Capitulino <lcapitulino@redhat.com>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
+Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com>
+Cc: Clark Williams <williams@redhat.com>
+Cc: Luiz Capitulino <lcapitulino@redhat.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: LKML <linux-kernel@vger.kernel.org>
+Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/core.c |   49 ++++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 44 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7449,6 +7449,47 @@ const u32 sched_prio_to_wmult[40] = {
+ 
+ #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+ 
++static inline void
++update_nr_migratory(struct task_struct *p, long delta)
++{
++	if (unlikely((p->sched_class == &rt_sched_class ||
++		      p->sched_class == &dl_sched_class) &&
++		      p->nr_cpus_allowed > 1)) {
++		if (p->sched_class == &rt_sched_class)
++			task_rq(p)->rt.rt_nr_migratory += delta;
++		else
++			task_rq(p)->dl.dl_nr_migratory += delta;
++	}
++}
++
++static inline void
++migrate_disable_update_cpus_allowed(struct task_struct *p)
++{
++	struct rq *rq;
++	struct rq_flags rf;
++
++	p->cpus_ptr = cpumask_of(smp_processor_id());
++
++	rq = task_rq_lock(p, &rf);
++	update_nr_migratory(p, -1);
++	p->nr_cpus_allowed = 1;
++	task_rq_unlock(rq, p, &rf);
++}
++
++static inline void
++migrate_enable_update_cpus_allowed(struct task_struct *p)
++{
++	struct rq *rq;
++	struct rq_flags rf;
++
++	p->cpus_ptr = &p->cpus_mask;
++
++	rq = task_rq_lock(p, &rf);
++	p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
++	update_nr_migratory(p, 1);
++	task_rq_unlock(rq, p, &rf);
++}
++
+ void migrate_disable(void)
+ {
+ 	struct task_struct *p = current;
+@@ -7469,10 +7510,9 @@ void migrate_disable(void)
+ 	}
+ 
+ 	preempt_disable();
+-	p->migrate_disable = 1;
+ 
+-	p->cpus_ptr = cpumask_of(smp_processor_id());
+-	p->nr_cpus_allowed = 1;
++	migrate_disable_update_cpus_allowed(p);
++	p->migrate_disable = 1;
+ 
+ 	preempt_enable();
+ }
+@@ -7501,9 +7541,8 @@ void migrate_enable(void)
+ 
+ 	preempt_disable();
+ 
+-	p->cpus_ptr = &p->cpus_mask;
+-	p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
+ 	p->migrate_disable = 0;
++	migrate_enable_update_cpus_allowed(p);
+ 
+ 	if (p->migrate_disable_update) {
+ 		struct rq *rq;
diff --git a/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch b/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch
new file mode 100644
index 00000000000000..0ca8e397154247
--- /dev/null
+++ b/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch
@@ -0,0 +1,83 @@
+From: Daniel Bristot de Oliveira <bristot@redhat.com>
+Date: Mon, 26 Jun 2017 17:07:14 +0200
+Subject: sched/debug: Inform the number of rt/dl task that can migrate
+
+Add the value of the rt_rq.rt_nr_migratory and dl_rq.dl_nr_migratory
+to the sched_debug output, for instance:
+
+rt_rq[0]:
+  .rt_nr_running                 : 2
+  .rt_nr_migratory               : 1     <--- Like this
+  .rt_throttled                  : 0
+  .rt_time                       : 828.645877
+  .rt_runtime                    : 1000.000000
+
+This is useful to debug problems related to the dl/rt schedulers.
+
+This also fixes the format of some variables, that were unsigned, rather
+than signed.
+
+Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
+Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com>
+Cc: Clark Williams <williams@redhat.com>
+Cc: Luiz Capitulino <lcapitulino@redhat.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: LKML <linux-kernel@vger.kernel.org>
+Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/debug.c |   17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int
+ 
+ #define P(x) \
+ 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
++#define PU(x) \
++	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
+ #define PN(x) \
+ 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
+ 
+-	P(rt_nr_running);
++	PU(rt_nr_running);
++#ifdef CONFIG_SMP
++	PU(rt_nr_migratory);
++#endif
+ 	P(rt_throttled);
+ 	PN(rt_time);
+ 	PN(rt_runtime);
+ 
+ #undef PN
++#undef PU
+ #undef P
+ }
+ 
+@@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int
+ 	struct dl_bw *dl_bw;
+ 
+ 	SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
+-	SEQ_printf(m, "  .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
++
++#define PU(x) \
++	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
++
++	PU(dl_nr_running);
+ #ifdef CONFIG_SMP
++	PU(dl_nr_migratory);
+ 	dl_bw = &cpu_rq(cpu)->rd->dl_bw;
+ #else
+ 	dl_bw = &dl_rq->dl_bw;
+ #endif
+ 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
+ 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
++
++#undef PU
+ }
+ 
+ extern __read_mostly int sched_clock_running;
diff --git a/patches/series b/patches/series
index 9f369c6e42b20e..813e43113cff40 100644
--- a/patches/series
+++ b/patches/series
@@ -395,6 +395,7 @@ sched-deadline-dl_task_timer-has-to-be-irqsafe.patch
 timer-fd-avoid-live-lock.patch
 tick-broadcast--Make-hrtimer-irqsafe.patch
 timer-hrtimer-check-properly-for-a-running-timer.patch
+kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch
 
 # POSIX-CPU-TIMERS
 posix-timers-thread-posix-cpu-timers-on-rt.patch
@@ -414,6 +415,8 @@ sched-disable-ttwu-queue.patch
 sched-disable-rt-group-sched-on-rt.patch
 sched-ttwu-ensure-success-return-is-correct.patch
 sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch
+sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch
+rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch
 
 # STOP MACHINE
 stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch
@@ -477,6 +480,7 @@ locking-rtmutex--Make-inner-working-of-rt_spin_slow_lock---accessible.patch
 locking-rt-rwlock--Provide-reader-biased-rwlock-for-RT.patch
 locking-rt-rwlock--Make-reader-biased-rwlocks-selectable.patch
 rt-locking--Consolidate-rwlock-variants.patch
+locking-rwlock-rt-do-not-save-state-multiple-times-i.patch
 
 # RCU
 peter_zijlstra-frob-rcu.patch
author	Sebastian Andrzej Siewior <bigeasy@linutronix.de>	2017-08-18 12:29:58 +0200
committer	Sebastian Andrzej Siewior <bigeasy@linutronix.de>	2017-08-18 12:29:58 +0200
commit	5a4f22d5ee59f1e83f70b870afcf7f78b3f7ac6c (patch)
tree	37d44422d6e455a1b97666ce53b768babfcb3ef0
parent	f6122c410207c72c5bd3f9a6e5a722b86b251568 (diff)
download	4.12-rt-patches-5a4f22d5ee59f1e83f70b870afcf7f78b3f7ac6c.tar.gz