diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2017-08-18 12:29:58 +0200 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2017-08-18 12:29:58 +0200 |
commit | 5a4f22d5ee59f1e83f70b870afcf7f78b3f7ac6c (patch) | |
tree | 37d44422d6e455a1b97666ce53b768babfcb3ef0 | |
parent | f6122c410207c72c5bd3f9a6e5a722b86b251568 (diff) | |
download | 4.12-rt-patches-5a4f22d5ee59f1e83f70b870afcf7f78b3f7ac6c.tar.gz |
[ANNOUNCE] v4.11.12-rt10
Dear RT folks!
I'm pleased to announce the v4.11.12-rt10 patch set.
Changes since v4.11.12-rt9:
- A tweak to scheduler to let it know that a task is in a migration
disabled region so there are less possible tasks to migrate. Idea
and patch by Daniel Bristot de Oliveira.
- A fix for the CPU idle code on arm64 was merged in v4.11.9-rt6 and
now updated to version which queued for mainline.
- hrtimers which fired during a bad window while a shutdown would be
postponed for ever and could corrupt the deferred list. Reported by
Mike Galbraith.
- The new RWLOCK code a flaw in the write-lock path where a task could
lose its task state. Reported and fixed by Mike Galbraith.
Known issues
- There was a report regarding a deadlock within the rtmutex code.
The delta patch against v4.11.12-rt9 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/incr/patch-4.11.12-rt9-rt10.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.11.12-rt10
The RT patch against v4.11.12 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patch-4.11.12-rt10.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.12-rt10.tar.xz
Sebastian
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -28,8 +28,15 @@ static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
{
int ret;
+ /*
+ * __atomic_notifier_call_chain has a RCU read critical section, which
+ * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
+ * RCU know this.
+ */
+ rcu_irq_enter_irqson();
ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
nr_to_call, nr_calls);
+ rcu_irq_exit_irqson();
return notifier_to_errno(ret);
}
diff --git a/kernel/locking/rwlock-rt.c b/kernel/locking/rwlock-rt.c
--- a/kernel/locking/rwlock-rt.c
+++ b/kernel/locking/rwlock-rt.c
@@ -190,14 +190,14 @@ void __sched __write_rt_lock(struct rt_rw_lock *lock)
/* Force readers into slow path */
atomic_sub(READER_BIAS, &lock->readers);
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
+
+ raw_spin_lock(&self->pi_lock);
+ self->saved_state = self->state;
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+ raw_spin_unlock(&self->pi_lock);
+
for (;;) {
- raw_spin_lock_irqsave(&m->wait_lock, flags);
-
- raw_spin_lock(&self->pi_lock);
- self->saved_state = self->state;
- __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
- raw_spin_unlock(&self->pi_lock);
-
/* Have all readers left the critical region? */
if (!atomic_read(&lock->readers)) {
atomic_set(&lock->readers, WRITER_BIAS);
@@ -213,6 +213,12 @@ void __sched __write_rt_lock(struct rt_rw_lock *lock)
if (atomic_read(&lock->readers) != 0)
schedule();
+
+ raw_spin_lock_irqsave(&m->wait_lock, flags);
+
+ raw_spin_lock(&self->pi_lock);
+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
+ raw_spin_unlock(&self->pi_lock);
}
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7499,6 +7499,47 @@ const u32 sched_prio_to_wmult[40] = {
#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+static inline void
+update_nr_migratory(struct task_struct *p, long delta)
+{
+ if (unlikely((p->sched_class == &rt_sched_class ||
+ p->sched_class == &dl_sched_class) &&
+ p->nr_cpus_allowed > 1)) {
+ if (p->sched_class == &rt_sched_class)
+ task_rq(p)->rt.rt_nr_migratory += delta;
+ else
+ task_rq(p)->dl.dl_nr_migratory += delta;
+ }
+}
+
+static inline void
+migrate_disable_update_cpus_allowed(struct task_struct *p)
+{
+ struct rq *rq;
+ struct rq_flags rf;
+
+ p->cpus_ptr = cpumask_of(smp_processor_id());
+
+ rq = task_rq_lock(p, &rf);
+ update_nr_migratory(p, -1);
+ p->nr_cpus_allowed = 1;
+ task_rq_unlock(rq, p, &rf);
+}
+
+static inline void
+migrate_enable_update_cpus_allowed(struct task_struct *p)
+{
+ struct rq *rq;
+ struct rq_flags rf;
+
+ p->cpus_ptr = &p->cpus_mask;
+
+ rq = task_rq_lock(p, &rf);
+ p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
+ update_nr_migratory(p, 1);
+ task_rq_unlock(rq, p, &rf);
+}
+
void migrate_disable(void)
{
struct task_struct *p = current;
@@ -7524,10 +7565,9 @@ void migrate_disable(void)
preempt_disable();
preempt_lazy_disable();
pin_current_cpu();
- p->migrate_disable = 1;
- p->cpus_ptr = cpumask_of(smp_processor_id());
- p->nr_cpus_allowed = 1;
+ migrate_disable_update_cpus_allowed(p);
+ p->migrate_disable = 1;
preempt_enable();
}
@@ -7559,9 +7599,8 @@ void migrate_enable(void)
preempt_disable();
- p->cpus_ptr = &p->cpus_mask;
- p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
p->migrate_disable = 0;
+ migrate_enable_update_cpus_allowed(p);
if (p->migrate_disable_update) {
struct rq *rq;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
#define P(x) \
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
+#define PU(x) \
+ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
#define PN(x) \
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
- P(rt_nr_running);
+ PU(rt_nr_running);
+#ifdef CONFIG_SMP
+ PU(rt_nr_migratory);
+#endif
P(rt_throttled);
PN(rt_time);
PN(rt_runtime);
#undef PN
+#undef PU
#undef P
}
@@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
struct dl_bw *dl_bw;
SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
- SEQ_printf(m, " .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
+
+#define PU(x) \
+ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
+
+ PU(dl_nr_running);
#ifdef CONFIG_SMP
+ PU(dl_nr_migratory);
dl_bw = &cpu_rq(cpu)->rd->dl_bw;
#else
dl_bw = &dl_rq->dl_bw;
#endif
SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
+
+#undef PU
}
extern __read_mostly int sched_clock_running;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1802,6 +1802,11 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
*/
enqueue_hrtimer(timer, new_base);
}
+#ifdef CONFIG_PREEMPT_RT_BASE
+ list_splice_tail(&old_base->expired, &new_base->expired);
+ if (!list_empty(&new_base->expired))
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+#endif
}
int hrtimers_dead_cpu(unsigned int scpu)
diff --git a/localversion-rt b/localversion-rt
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt9
+-rt10
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r-- | patches/completion-use-simple-wait-queues.patch | 4 | ||||
-rw-r--r-- | patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch | 64 | ||||
-rw-r--r-- | patches/hotplug-light-get-online-cpus.patch | 8 | ||||
-rw-r--r-- | patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch | 32 | ||||
-rw-r--r-- | patches/localversion.patch | 2 | ||||
-rw-r--r-- | patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch | 51 | ||||
-rw-r--r-- | patches/preempt-lazy-support.patch | 8 | ||||
-rw-r--r-- | patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch | 154 | ||||
-rw-r--r-- | patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch | 83 | ||||
-rw-r--r-- | patches/series | 4 |
10 files changed, 369 insertions, 41 deletions
diff --git a/patches/completion-use-simple-wait-queues.patch b/patches/completion-use-simple-wait-queues.patch index 80e8f53b3086af..3ce5296d8d20bd 100644 --- a/patches/completion-use-simple-wait-queues.patch +++ b/patches/completion-use-simple-wait-queues.patch @@ -276,7 +276,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> EXPORT_SYMBOL(completion_done); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7487,7 +7487,10 @@ void migrate_disable(void) +@@ -7528,7 +7528,10 @@ void migrate_disable(void) return; } #ifdef CONFIG_SCHED_DEBUG @@ -288,7 +288,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif if (p->migrate_disable) { -@@ -7518,7 +7521,10 @@ void migrate_enable(void) +@@ -7558,7 +7561,10 @@ void migrate_enable(void) } #ifdef CONFIG_SCHED_DEBUG diff --git a/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch b/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch index f24636b8b1f30e..14e5556fc3290c 100644 --- a/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch +++ b/patches/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch @@ -1,14 +1,14 @@ From: Alex Shi <alex.shi@linaro.org> -Date: Thu, 6 Jul 2017 16:47:46 +0800 -Subject: [PATCH] cpu_pm: replace raw_notifier to atomic_notifier +Date: Fri, 28 Jul 2017 15:09:25 +0800 +Subject: PM / CPU: replace raw_notifier with atomic_notifier -This patch replace a rwlock and raw notifier by atomic notifier which -protected by spin_lock and rcu. +This patch replaces an rwlock and raw notifier by an atomic notifier +protected by a spin_lock and RCU. -The first to reason to have this replace is due to a 'scheduling while - atomic' bug of RT kernel on arm/arm64 platform. On arm/arm64, rwlock -cpu_pm_notifier_lock in cpu_pm cause a potential schedule after irq -disable in idle call chain: +The main reason for this change is due to a 'scheduling while atomic' +bug with RT kernels on ARM/ARM64. On ARM/ARM64, the rwlock +cpu_pm_notifier_lock in cpu_pm_enter/exit() causes a potential +schedule after IRQ disable in the idle call chain: cpu_startup_entry cpu_idle_loop @@ -38,28 +38,24 @@ The kernel panic is here: Daniel Lezcano said this notification is needed on arm/arm64 platforms. Sebastian suggested using atomic_notifier instead of rwlock, which is not -only removing the sleeping in idle, but also getting better latency -improvement. +only removing the sleeping in idle, but also improving latency. -This patch passed Fengguang's 0day testing. +Tony Lindgren found a miss use that rcu_read_lock used after rcu_idle_enter +Paul McKenney suggested trying RCU_NONIDLE. Signed-off-by: Alex Shi <alex.shi@linaro.org> -Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Cc: Thomas Gleixner <tglx@linutronix.de> -Cc: Anders Roxell <anders.roxell@linaro.org> -Cc: Rik van Riel <riel@redhat.com> -Cc: Steven Rostedt <rostedt@goodmis.org> -Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> -Cc: Daniel Lezcano <daniel.lezcano@linaro.org> -Cc: linux-rt-users <linux-rt-users@vger.kernel.org> +Tested-by: Tony Lindgren <tony@atomide.com> +Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +[ rjw: Subject & changelog ] +Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - kernel/cpu_pm.c | 43 ++++++------------------------------------- - 1 file changed, 6 insertions(+), 37 deletions(-) + kernel/cpu_pm.c | 50 +++++++++++++------------------------------------- + 1 file changed, 13 insertions(+), 37 deletions(-) --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c -@@ -22,14 +22,13 @@ +@@ -22,15 +22,21 @@ #include <linux/spinlock.h> #include <linux/syscore_ops.h> @@ -72,11 +68,19 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int ret; - ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, ++ /* ++ * __atomic_notifier_call_chain has a RCU read critical section, which ++ * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let ++ * RCU know this. ++ */ ++ rcu_irq_enter_irqson(); + ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, nr_to_call, nr_calls); ++ rcu_irq_exit_irqson(); return notifier_to_errno(ret); -@@ -47,14 +46,7 @@ static int cpu_pm_notify(enum cpu_pm_eve + } +@@ -47,14 +53,7 @@ static int cpu_pm_notify(enum cpu_pm_eve */ int cpu_pm_register_notifier(struct notifier_block *nb) { @@ -92,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL_GPL(cpu_pm_register_notifier); -@@ -69,14 +61,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi +@@ -69,14 +68,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi */ int cpu_pm_unregister_notifier(struct notifier_block *nb) { @@ -108,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); -@@ -100,7 +85,6 @@ int cpu_pm_enter(void) +@@ -100,7 +92,6 @@ int cpu_pm_enter(void) int nr_calls; int ret = 0; @@ -116,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls); if (ret) /* -@@ -108,7 +92,6 @@ int cpu_pm_enter(void) +@@ -108,7 +99,6 @@ int cpu_pm_enter(void) * PM entry who are notified earlier to prepare for it. */ cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL); @@ -124,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -128,13 +111,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter); +@@ -128,13 +118,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter); */ int cpu_pm_exit(void) { @@ -139,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL_GPL(cpu_pm_exit); -@@ -159,7 +136,6 @@ int cpu_cluster_pm_enter(void) +@@ -159,7 +143,6 @@ int cpu_cluster_pm_enter(void) int nr_calls; int ret = 0; @@ -147,7 +151,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls); if (ret) /* -@@ -167,7 +143,6 @@ int cpu_cluster_pm_enter(void) +@@ -167,7 +150,6 @@ int cpu_cluster_pm_enter(void) * PM entry who are notified earlier to prepare for it. */ cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL); @@ -155,7 +159,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -190,13 +165,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); +@@ -190,13 +172,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); */ int cpu_cluster_pm_exit(void) { diff --git a/patches/hotplug-light-get-online-cpus.patch b/patches/hotplug-light-get-online-cpus.patch index 5ddb9c5c2b06eb..48a5697ada2d47 100644 --- a/patches/hotplug-light-get-online-cpus.patch +++ b/patches/hotplug-light-get-online-cpus.patch @@ -64,15 +64,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void cpus_read_lock(void) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7469,6 +7469,7 @@ void migrate_disable(void) +@@ -7510,6 +7510,7 @@ void migrate_disable(void) } preempt_disable(); + pin_current_cpu(); - p->migrate_disable = 1; - p->cpus_ptr = cpumask_of(smp_processor_id()); -@@ -7533,12 +7534,15 @@ void migrate_enable(void) + migrate_disable_update_cpus_allowed(p); + p->migrate_disable = 1; +@@ -7572,12 +7573,15 @@ void migrate_enable(void) arg.task = p; arg.dest_cpu = dest_cpu; diff --git a/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch b/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch new file mode 100644 index 00000000000000..67e8c0e37ef343 --- /dev/null +++ b/patches/kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch @@ -0,0 +1,32 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 18 Aug 2017 10:09:09 +0200 +Subject: [PATCH] kernel/hrtimer: migrate deferred timer on CPU down + +hrtimers, which were deferred to the softirq context, and expire between +softirq shutdown and hrtimer migration are dangling around. If the CPU +goes back up the list head will be initialized and this corrupts the +timer's list. It will remain unnoticed until a hrtimer_cancel(). +This moves those timers so they will expire. + +Cc: stable-rt@vger.kernel.org +Reported-by: Mike Galbraith <efault@gmx.de> +Tested-by: Mike Galbraith <efault@gmx.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/time/hrtimer.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1748,6 +1748,11 @@ static void migrate_hrtimer_list(struct + */ + enqueue_hrtimer(timer, new_base); + } ++#ifdef CONFIG_PREEMPT_RT_BASE ++ list_splice_tail(&old_base->expired, &new_base->expired); ++ if (!list_empty(&new_base->expired)) ++ raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++#endif + } + + int hrtimers_dead_cpu(unsigned int scpu) diff --git a/patches/localversion.patch b/patches/localversion.patch index 02952cda4bfa23..e16fb07c0a7d6f 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt9 ++-rt10 diff --git a/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch b/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch new file mode 100644 index 00000000000000..b5f9fc1bb79c10 --- /dev/null +++ b/patches/locking-rwlock-rt-do-not-save-state-multiple-times-i.patch @@ -0,0 +1,51 @@ +From: Mike Galbraith <efault@gmx.de> +Date: Fri, 18 Aug 2017 10:56:14 +0200 +Subject: [PATCH] locking, rwlock-rt: do not save state multiple times in + __write_rt_lock() + +Save state prior to entering the acquisition loop, otherwise we may +initially see readers, but upon releasing ->wait_lock see none, loop +back around, and having not slept, save TASK_UNINTERRUPTIBLE. + +Signed-off-by: Mike Galbraith <efault@gmx.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/locking/rwlock-rt.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/kernel/locking/rwlock-rt.c ++++ b/kernel/locking/rwlock-rt.c +@@ -190,14 +190,14 @@ void __sched __write_rt_lock(struct rt_r + /* Force readers into slow path */ + atomic_sub(READER_BIAS, &lock->readers); + +- for (;;) { +- raw_spin_lock_irqsave(&m->wait_lock, flags); ++ raw_spin_lock_irqsave(&m->wait_lock, flags); + +- raw_spin_lock(&self->pi_lock); +- self->saved_state = self->state; +- __set_current_state_no_track(TASK_UNINTERRUPTIBLE); +- raw_spin_unlock(&self->pi_lock); ++ raw_spin_lock(&self->pi_lock); ++ self->saved_state = self->state; ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); + ++ for (;;) { + /* Have all readers left the critical region? */ + if (!atomic_read(&lock->readers)) { + atomic_set(&lock->readers, WRITER_BIAS); +@@ -213,6 +213,12 @@ void __sched __write_rt_lock(struct rt_r + + if (atomic_read(&lock->readers) != 0) + schedule(); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); + } + } + diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch index b59fd2ade25886..225838f8b5e8cb 100644 --- a/patches/preempt-lazy-support.patch +++ b/patches/preempt-lazy-support.patch @@ -362,15 +362,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The idle tasks have their own, simple scheduling class: */ -@@ -7443,6 +7519,7 @@ void migrate_disable(void) +@@ -7484,6 +7560,7 @@ void migrate_disable(void) } preempt_disable(); + preempt_lazy_disable(); pin_current_cpu(); - p->migrate_disable = 1; -@@ -7512,6 +7589,7 @@ void migrate_enable(void) + migrate_disable_update_cpus_allowed(p); +@@ -7551,6 +7628,7 @@ void migrate_enable(void) arg.dest_cpu = dest_cpu; unpin_current_cpu(); @@ -378,7 +378,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); tlb_migrate_finish(p->mm); -@@ -7520,6 +7598,7 @@ void migrate_enable(void) +@@ -7559,6 +7637,7 @@ void migrate_enable(void) } } unpin_current_cpu(); diff --git a/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch b/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch new file mode 100644 index 00000000000000..a18dbc5b97fc49 --- /dev/null +++ b/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch @@ -0,0 +1,154 @@ +From: Daniel Bristot de Oliveira <bristot@redhat.com> +Date: Mon, 26 Jun 2017 17:07:15 +0200 +Subject: rt: Increase/decrease the nr of migratory tasks when enabling/disabling migration + +There is a problem in the migrate_disable()/enable() implementation +regarding the number of migratory tasks in the rt/dl RQs. The problem +is the following: + +When a task is attached to the rt runqueue, it is checked if it either +can run in more than one CPU, or if it is with migration disable. If +either check is true, the rt_rq->rt_nr_migratory counter is not +increased. The counter increases otherwise. + +When the task is detached, the same check is done. If either check is +true, the rt_rq->rt_nr_migratory counter is not decreased. The counter +decreases otherwise. The same check is done in the dl scheduler. + +One important thing is that, migrate disable/enable does not touch this +counter for tasks attached to the rt rq. So suppose the following chain +of events. + +Assumptions: +Task A is the only runnable task in A Task B runs on the CPU B +Task A runs on CFS (non-rt) Task B has RT priority +Thus, rt_nr_migratory is 0 B is running +Task A can run on all CPUS. + +Timeline: + CPU A/TASK A CPU B/TASK B +A takes the rt mutex X . +A disables migration . + . B tries to take the rt mutex X + . As it is held by A { + . A inherits the rt priority of B + . A is dequeued from CFS RQ of CPU A + . A is enqueued in the RT RQ of CPU A + . As migration is disabled + . rt_nr_migratory in A is not increased + . +A enables migration +A releases the rt mutex X { + A returns to its original priority + A ask to be dequeued from RT RQ { + As migration is now enabled and it can run on all CPUS { + rt_nr_migratory should be decreased + As rt_nr_migratory is 0, rt_nr_migratory under flows + } +} + +This variable is important because it notifies if there are more than one +runnable & migratory task in the runqueue. If there are more than one +tasks, the rt_rq is set as overloaded, and then tries to migrate some +tasks. This rule is important to keep the scheduler working conserving, +that is, in a system with M CPUs, the M highest priority tasks should be +running. + +As rt_nr_migratory is unsigned, it will become > 0, notifying that the +RQ is overloaded, activating pushing mechanism without need. + +This patch fixes this problem by decreasing/increasing the +rt/dl_nr_migratory in the migrate disable/enable operations. + +Reported-by: Pei Zhang <pezhang@redhat.com> +Reported-by: Luiz Capitulino <lcapitulino@redhat.com> +Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com> +Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> +Cc: Clark Williams <williams@redhat.com> +Cc: Luiz Capitulino <lcapitulino@redhat.com> +Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Steven Rostedt <rostedt@goodmis.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Ingo Molnar <mingo@kernel.org> +Cc: LKML <linux-kernel@vger.kernel.org> +Cc: linux-rt-users <linux-rt-users@vger.kernel.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/sched/core.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 44 insertions(+), 5 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -7449,6 +7449,47 @@ const u32 sched_prio_to_wmult[40] = { + + #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) + ++static inline void ++update_nr_migratory(struct task_struct *p, long delta) ++{ ++ if (unlikely((p->sched_class == &rt_sched_class || ++ p->sched_class == &dl_sched_class) && ++ p->nr_cpus_allowed > 1)) { ++ if (p->sched_class == &rt_sched_class) ++ task_rq(p)->rt.rt_nr_migratory += delta; ++ else ++ task_rq(p)->dl.dl_nr_migratory += delta; ++ } ++} ++ ++static inline void ++migrate_disable_update_cpus_allowed(struct task_struct *p) ++{ ++ struct rq *rq; ++ struct rq_flags rf; ++ ++ p->cpus_ptr = cpumask_of(smp_processor_id()); ++ ++ rq = task_rq_lock(p, &rf); ++ update_nr_migratory(p, -1); ++ p->nr_cpus_allowed = 1; ++ task_rq_unlock(rq, p, &rf); ++} ++ ++static inline void ++migrate_enable_update_cpus_allowed(struct task_struct *p) ++{ ++ struct rq *rq; ++ struct rq_flags rf; ++ ++ p->cpus_ptr = &p->cpus_mask; ++ ++ rq = task_rq_lock(p, &rf); ++ p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); ++ update_nr_migratory(p, 1); ++ task_rq_unlock(rq, p, &rf); ++} ++ + void migrate_disable(void) + { + struct task_struct *p = current; +@@ -7469,10 +7510,9 @@ void migrate_disable(void) + } + + preempt_disable(); +- p->migrate_disable = 1; + +- p->cpus_ptr = cpumask_of(smp_processor_id()); +- p->nr_cpus_allowed = 1; ++ migrate_disable_update_cpus_allowed(p); ++ p->migrate_disable = 1; + + preempt_enable(); + } +@@ -7501,9 +7541,8 @@ void migrate_enable(void) + + preempt_disable(); + +- p->cpus_ptr = &p->cpus_mask; +- p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); + p->migrate_disable = 0; ++ migrate_enable_update_cpus_allowed(p); + + if (p->migrate_disable_update) { + struct rq *rq; diff --git a/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch b/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch new file mode 100644 index 00000000000000..0ca8e397154247 --- /dev/null +++ b/patches/sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch @@ -0,0 +1,83 @@ +From: Daniel Bristot de Oliveira <bristot@redhat.com> +Date: Mon, 26 Jun 2017 17:07:14 +0200 +Subject: sched/debug: Inform the number of rt/dl task that can migrate + +Add the value of the rt_rq.rt_nr_migratory and dl_rq.dl_nr_migratory +to the sched_debug output, for instance: + +rt_rq[0]: + .rt_nr_running : 2 + .rt_nr_migratory : 1 <--- Like this + .rt_throttled : 0 + .rt_time : 828.645877 + .rt_runtime : 1000.000000 + +This is useful to debug problems related to the dl/rt schedulers. + +This also fixes the format of some variables, that were unsigned, rather +than signed. + +Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com> +Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> +Cc: Clark Williams <williams@redhat.com> +Cc: Luiz Capitulino <lcapitulino@redhat.com> +Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: Steven Rostedt <rostedt@goodmis.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Ingo Molnar <mingo@kernel.org> +Cc: LKML <linux-kernel@vger.kernel.org> +Cc: linux-rt-users <linux-rt-users@vger.kernel.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/sched/debug.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int + + #define P(x) \ + SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) ++#define PU(x) \ ++ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x)) + #define PN(x) \ + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) + +- P(rt_nr_running); ++ PU(rt_nr_running); ++#ifdef CONFIG_SMP ++ PU(rt_nr_migratory); ++#endif + P(rt_throttled); + PN(rt_time); + PN(rt_runtime); + + #undef PN ++#undef PU + #undef P + } + +@@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int + struct dl_bw *dl_bw; + + SEQ_printf(m, "\ndl_rq[%d]:\n", cpu); +- SEQ_printf(m, " .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running); ++ ++#define PU(x) \ ++ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x)) ++ ++ PU(dl_nr_running); + #ifdef CONFIG_SMP ++ PU(dl_nr_migratory); + dl_bw = &cpu_rq(cpu)->rd->dl_bw; + #else + dl_bw = &dl_rq->dl_bw; + #endif + SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw); + SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw); ++ ++#undef PU + } + + extern __read_mostly int sched_clock_running; diff --git a/patches/series b/patches/series index 9f369c6e42b20e..813e43113cff40 100644 --- a/patches/series +++ b/patches/series @@ -395,6 +395,7 @@ sched-deadline-dl_task_timer-has-to-be-irqsafe.patch timer-fd-avoid-live-lock.patch tick-broadcast--Make-hrtimer-irqsafe.patch timer-hrtimer-check-properly-for-a-running-timer.patch +kernel-hrtimer-migrate-deferred-timer-on-CPU-down.patch # POSIX-CPU-TIMERS posix-timers-thread-posix-cpu-timers-on-rt.patch @@ -414,6 +415,8 @@ sched-disable-ttwu-queue.patch sched-disable-rt-group-sched-on-rt.patch sched-ttwu-ensure-success-return-is-correct.patch sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch +sched-debug-Inform-the-number-of-rt-dl-task-that-can.patch +rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch # STOP MACHINE stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch @@ -477,6 +480,7 @@ locking-rtmutex--Make-inner-working-of-rt_spin_slow_lock---accessible.patch locking-rt-rwlock--Provide-reader-biased-rwlock-for-RT.patch locking-rt-rwlock--Make-reader-biased-rwlocks-selectable.patch rt-locking--Consolidate-rwlock-variants.patch +locking-rwlock-rt-do-not-save-state-multiple-times-i.patch # RCU peter_zijlstra-frob-rcu.patch |