summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2015-09-21 18:41:01 +0200
committerThomas Gleixner <tglx@linutronix.de>2015-09-21 18:41:01 +0200
commit7461758b9e982e4ea6280ce9308492e7cceda2ed (patch)
treeee87a74437efc4659f99604b32a2b08298ddd616
parent2afdf8087c2f343df801ac6bff591ed8b05658b1 (diff)
download4.9-rt-patches-7461758b9e982e4ea6280ce9308492e7cceda2ed.tar.gz
[ANNOUNCE] 4.1.7.-rt8
Dear RT folks! I'm pleased to announce the v4.1.7-rt8 patch set. v4.1.6-rt6 and v4.1.7-rt7 are non-announced updates to incorporate the linux-4.1.y stable tree changes. Changes since v4.1.5-rt5: - Update to 4.1.7 - Cherry-pick a XFS lockdep annotation fix from mainline - Use preempt_xxx_nort in the generic implementation of k[un]map_atomic. - Revert d04ea10ba1ea mmc: sdhci: don't provide hard irq handler - Force thread primary handlers of interrupts which provide both a primary and a threaded handler - Move clear_tasks_mm_cpumask() call to __cpu_die() on ARM (Grygoriii) - Fix a RCU splat in the trace histogram (Philipp) Solved issues: - The high CPU usage problem reported by Nicholas turned out to be a scalability issue of the gcov instrumentation Known issues: - bcache stays disabled - CPU hotplug is not better than before - The netlink_release() OOPS, reported by Clark, is still on the list, but unsolved due to lack of information The delta patch against 4.1.7-rt7 is appended below and can be found here: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/incr/patch-4.1.7-rt7-rt8.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.1.7-rt8 The RT patch against 4.1.5 can be found here: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patch-4.1.7-rt8.patch.xz The split quilt queue is available at: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.7-rt8.tar.xz Enjoy! tglx Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--patches/genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully337
-rw-r--r--patches/localversion.patch8
-rw-r--r--patches/mm--rt--Fix-generic-kmap_atomic-for-RT42
-rw-r--r--patches/mmc-sdhci-don-t-provide-hard-irq-handler.patch73
-rw-r--r--patches/random-make-it-work-on-rt.patch36
-rw-r--r--patches/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch97
-rw-r--r--patches/series6
-rw-r--r--patches/tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch86
-rw-r--r--patches/xfs--clean-up-inode-lockdep-annotations285
9 files changed, 880 insertions, 90 deletions
diff --git a/patches/genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully b/patches/genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully
new file mode 100644
index 00000000000000..a84bff7945759c
--- /dev/null
+++ b/patches/genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully
@@ -0,0 +1,337 @@
+Subject: genirq: Handle force threading of interrupts with primary and thread handler
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 19 Sep 2015 11:56:20 +0200
+
+Force threading of interrupts does not deal with interrupts which are
+requested with a primary and a threaded handler. The current policy is
+to leave them alone and let the primary handler run in interrupt
+context, but we set the ONESHOT flag for those interrupts as well.
+
+Kohji Okuno debugged a problem with the SDHCI driver where the
+interrupt thread waits for a hardware interrupt to trigger, which cant
+work well because the hardware interrupt is masked due to the ONESHOT
+flag being set. He proposed to set the ONESHOT flag only if the
+interrupt does not provide a thread handler.
+
+Though that does not work either because these interrupts can be
+shared. So the other interrupt would rightfully get the ONESHOT flag
+set and therefor the same situation would happen again.
+
+To deal with this proper, we need to force thread the primary handler
+of such interrupts as well. That means that the primary interrupt
+handler is treated as any other primary interrupt handler which is not
+marked IRQF_NO_THREAD. The threaded handler becomes a separate thread
+so the SDHCI flow logic can be handled gracefully.
+
+The same issue was reported against 4.1-rt.
+
+Reported-by: Kohji Okuno <okuno.kohji@jp.panasonic.com>
+Reported-By: Michal Šmucr <msmucr@gmail.com>
+Reported-and-tested-by: Nathan Sullivan <nathan.sullivan@ni.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: stable-rt@vger.kernel.org
+---
+
+This requires to revert commit: d04ea10ba1ea 'mmc: sdhci: don't provide
+hard irq handler'
+
+ include/linux/interrupt.h | 2
+ kernel/irq/manage.c | 160 +++++++++++++++++++++++++++++++++-------------
+ 2 files changed, 120 insertions(+), 42 deletions(-)
+
+Index: linux-rt-devel/include/linux/interrupt.h
+===================================================================
+--- linux-rt-devel.orig/include/linux/interrupt.h
++++ linux-rt-devel/include/linux/interrupt.h
+@@ -102,6 +102,7 @@ typedef irqreturn_t (*irq_handler_t)(int
+ * @flags: flags (see IRQF_* above)
+ * @thread_fn: interrupt handler function for threaded interrupts
+ * @thread: thread pointer for threaded interrupts
++ * @secondary: pointer to secondary irqaction (force threading)
+ * @thread_flags: flags related to @thread
+ * @thread_mask: bitmask for keeping track of @thread activity
+ * @dir: pointer to the proc/irq/NN/name entry
+@@ -113,6 +114,7 @@ struct irqaction {
+ struct irqaction *next;
+ irq_handler_t thread_fn;
+ struct task_struct *thread;
++ struct irqaction *secondary;
+ unsigned int irq;
+ unsigned int flags;
+ unsigned long thread_flags;
+Index: linux-rt-devel/kernel/irq/manage.c
+===================================================================
+--- linux-rt-devel.orig/kernel/irq/manage.c
++++ linux-rt-devel/kernel/irq/manage.c
+@@ -697,6 +697,12 @@ static irqreturn_t irq_nested_primary_ha
+ return IRQ_NONE;
+ }
+
++static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
++{
++ WARN(1, "Secondary action handler called for irq %d\n", irq);
++ return IRQ_NONE;
++}
++
+ static int irq_wait_for_interrupt(struct irqaction *action)
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+@@ -723,7 +729,8 @@ static int irq_wait_for_interrupt(struct
+ static void irq_finalize_oneshot(struct irq_desc *desc,
+ struct irqaction *action)
+ {
+- if (!(desc->istate & IRQS_ONESHOT))
++ if (!(desc->istate & IRQS_ONESHOT) ||
++ action->handler == irq_forced_secondary_handler)
+ return;
+ again:
+ chip_bus_lock(desc);
+@@ -877,6 +884,18 @@ static void irq_thread_dtor(struct callb
+ irq_finalize_oneshot(desc, action);
+ }
+
++static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
++{
++ struct irqaction *secondary = action->secondary;
++
++ if (WARN_ON_ONCE(!secondary))
++ return;
++
++ raw_spin_lock_irq(&desc->lock);
++ __irq_wake_thread(desc, secondary);
++ raw_spin_unlock_irq(&desc->lock);
++}
++
+ /*
+ * Interrupt handler thread
+ */
+@@ -907,6 +926,8 @@ static int irq_thread(void *data)
+ action_ret = handler_fn(desc, action);
+ if (action_ret == IRQ_HANDLED)
+ atomic_inc(&desc->threads_handled);
++ if (action_ret == IRQ_WAKE_THREAD)
++ irq_wake_secondary(desc, action);
+
+ wake_threads_waitq(desc);
+ }
+@@ -951,20 +972,36 @@ void irq_wake_thread(unsigned int irq, v
+ }
+ EXPORT_SYMBOL_GPL(irq_wake_thread);
+
+-static void irq_setup_forced_threading(struct irqaction *new)
++static int irq_setup_forced_threading(struct irqaction *new)
+ {
+ if (!force_irqthreads)
+- return;
++ return 0;
+ if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
+- return;
++ return 0;
+
+ new->flags |= IRQF_ONESHOT;
+
+- if (!new->thread_fn) {
+- set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
+- new->thread_fn = new->handler;
+- new->handler = irq_default_primary_handler;
+- }
++ /*
++ * Handle the case where we have a real primary handler and a
++ * thread handler. We force thread them as well by creating a
++ * secondary action.
++ */
++ if (new->handler != irq_default_primary_handler && new->thread_fn) {
++ /* Allocate the secondary action */
++ new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
++ if (!new->secondary)
++ return -ENOMEM;
++ new->secondary->handler = irq_forced_secondary_handler;
++ new->secondary->thread_fn = new->thread_fn;
++ new->secondary->dev_id = new->dev_id;
++ new->secondary->irq = new->irq;
++ new->secondary->name = new->name;
++ }
++ /* Deal with the primary handler */
++ set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
++ new->thread_fn = new->handler;
++ new->handler = irq_default_primary_handler;
++ return 0;
+ }
+
+ static int irq_request_resources(struct irq_desc *desc)
+@@ -984,6 +1021,48 @@ static void irq_release_resources(struct
+ c->irq_release_resources(d);
+ }
+
++static int
++setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
++{
++ struct task_struct *t;
++ struct sched_param param = {
++ .sched_priority = MAX_USER_RT_PRIO/2,
++ };
++
++ if (!secondary) {
++ t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
++ new->name);
++ } else {
++ t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq,
++ new->name);
++ param.sched_priority += 1;
++ }
++
++ if (IS_ERR(t))
++ return PTR_ERR(t);
++
++ sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
++
++ /*
++ * We keep the reference to the task struct even if
++ * the thread dies to avoid that the interrupt code
++ * references an already freed task_struct.
++ */
++ get_task_struct(t);
++ new->thread = t;
++ /*
++ * Tell the thread to set its affinity. This is
++ * important for shared interrupt handlers as we do
++ * not invoke setup_affinity() for the secondary
++ * handlers as everything is already set up. Even for
++ * interrupts marked with IRQF_NO_BALANCE this is
++ * correct as we want the thread to move to the cpu(s)
++ * on which the requesting code placed the interrupt.
++ */
++ set_bit(IRQTF_AFFINITY, &new->thread_flags);
++ return 0;
++}
++
+ /*
+ * Internal function to register an irqaction - typically used to
+ * allocate special interrupts that are part of the architecture.
+@@ -1004,6 +1083,8 @@ __setup_irq(unsigned int irq, struct irq
+ if (!try_module_get(desc->owner))
+ return -ENODEV;
+
++ new->irq = irq;
++
+ /*
+ * Check whether the interrupt nests into another interrupt
+ * thread.
+@@ -1021,8 +1102,11 @@ __setup_irq(unsigned int irq, struct irq
+ */
+ new->handler = irq_nested_primary_handler;
+ } else {
+- if (irq_settings_can_thread(desc))
+- irq_setup_forced_threading(new);
++ if (irq_settings_can_thread(desc)) {
++ ret = irq_setup_forced_threading(new);
++ if (ret)
++ goto out_mput;
++ }
+ }
+
+ /*
+@@ -1031,37 +1115,14 @@ __setup_irq(unsigned int irq, struct irq
+ * thread.
+ */
+ if (new->thread_fn && !nested) {
+- struct task_struct *t;
+- static const struct sched_param param = {
+- .sched_priority = MAX_USER_RT_PRIO/2,
+- };
+-
+- t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
+- new->name);
+- if (IS_ERR(t)) {
+- ret = PTR_ERR(t);
++ ret = setup_irq_thread(new, irq, false);
++ if (ret)
+ goto out_mput;
++ if (new->secondary) {
++ ret = setup_irq_thread(new->secondary, irq, true);
++ if (ret)
++ goto out_thread;
+ }
+-
+- sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
+-
+- /*
+- * We keep the reference to the task struct even if
+- * the thread dies to avoid that the interrupt code
+- * references an already freed task_struct.
+- */
+- get_task_struct(t);
+- new->thread = t;
+- /*
+- * Tell the thread to set its affinity. This is
+- * important for shared interrupt handlers as we do
+- * not invoke setup_affinity() for the secondary
+- * handlers as everything is already set up. Even for
+- * interrupts marked with IRQF_NO_BALANCE this is
+- * correct as we want the thread to move to the cpu(s)
+- * on which the requesting code placed the interrupt.
+- */
+- set_bit(IRQTF_AFFINITY, &new->thread_flags);
+ }
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
+@@ -1234,7 +1295,6 @@ __setup_irq(unsigned int irq, struct irq
+ irq, nmsk, omsk);
+ }
+
+- new->irq = irq;
+ *old_ptr = new;
+
+ irq_pm_install_action(desc, new);
+@@ -1260,6 +1320,8 @@ __setup_irq(unsigned int irq, struct irq
+ */
+ if (new->thread)
+ wake_up_process(new->thread);
++ if (new->secondary)
++ wake_up_process(new->secondary->thread);
+
+ register_irq_proc(irq, desc);
+ new->dir = NULL;
+@@ -1290,6 +1352,13 @@ out_thread:
+ kthread_stop(t);
+ put_task_struct(t);
+ }
++ if (new->secondary && new->secondary->thread) {
++ struct task_struct *t = new->secondary->thread;
++
++ new->secondary->thread = NULL;
++ kthread_stop(t);
++ put_task_struct(t);
++ }
+ out_mput:
+ module_put(desc->owner);
+ return ret;
+@@ -1397,9 +1466,14 @@ static struct irqaction *__free_irq(unsi
+ if (action->thread) {
+ kthread_stop(action->thread);
+ put_task_struct(action->thread);
++ if (action->secondary && action->secondary->thread) {
++ kthread_stop(action->secondary->thread);
++ put_task_struct(action->secondary->thread);
++ }
+ }
+
+ module_put(desc->owner);
++ kfree(action->secondary);
+ return action;
+ }
+
+@@ -1543,8 +1617,10 @@ int request_threaded_irq(unsigned int ir
+ retval = __setup_irq(irq, desc, action);
+ chip_bus_sync_unlock(desc);
+
+- if (retval)
++ if (retval) {
++ kfree(action->secondary);
+ kfree(action);
++ }
+
+ #ifdef CONFIG_DEBUG_SHIRQ_FIXME
+ if (!retval && (irqflags & IRQF_SHARED)) {
diff --git a/patches/localversion.patch b/patches/localversion.patch
index a8e4298085e50c..1aadf5ce0d321e 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -1,4 +1,4 @@
-Subject: v4.1.7-rt7
+Subject: v4.1.7-rt8
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 08 Jul 2011 20:25:16 +0200
@@ -7,7 +7,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
localversion-rt | 1 +
1 file changed, 1 insertion(+)
+Index: linux-rt-devel/localversion-rt
+===================================================================
--- /dev/null
-+++ b/localversion-rt
++++ linux-rt-devel/localversion-rt
@@ -0,0 +1 @@
-+-rt7
++-rt8
diff --git a/patches/mm--rt--Fix-generic-kmap_atomic-for-RT b/patches/mm--rt--Fix-generic-kmap_atomic-for-RT
new file mode 100644
index 00000000000000..58245b4343c6c9
--- /dev/null
+++ b/patches/mm--rt--Fix-generic-kmap_atomic-for-RT
@@ -0,0 +1,42 @@
+Subject: mm: rt: Fix generic kmap_atomic for RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 19 Sep 2015 10:15:00 +0200
+
+The update to 4.1 brought in the mainline variant of the pagefault
+disable distangling from preempt count. That introduced a
+preempt_disable/enable pair in the generic kmap_atomic/kunmap_atomic
+implementations which got not converted to the _nort() variant.
+
+That results in massive 'scheduling while atomic/sleeping function
+called from invalid context' splats.
+
+Fix that up.
+
+Reported-and-tested-by: Juergen Borleis <jbe@pengutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/highmem.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+Index: linux-rt-devel/include/linux/highmem.h
+===================================================================
+--- linux-rt-devel.orig/include/linux/highmem.h
++++ linux-rt-devel/include/linux/highmem.h
+@@ -66,7 +66,7 @@ static inline void kunmap(struct page *p
+
+ static inline void *kmap_atomic(struct page *page)
+ {
+- preempt_disable();
++ preempt_disable_nort();
+ pagefault_disable();
+ return page_address(page);
+ }
+@@ -75,7 +75,7 @@ static inline void *kmap_atomic(struct p
+ static inline void __kunmap_atomic(void *addr)
+ {
+ pagefault_enable();
+- preempt_enable();
++ preempt_enable_nort();
+ }
+
+ #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
diff --git a/patches/mmc-sdhci-don-t-provide-hard-irq-handler.patch b/patches/mmc-sdhci-don-t-provide-hard-irq-handler.patch
deleted file mode 100644
index 7767df6b64a3c1..00000000000000
--- a/patches/mmc-sdhci-don-t-provide-hard-irq-handler.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Thu, 26 Feb 2015 12:13:36 +0100
-Subject: mmc: sdhci: don't provide hard irq handler
-
-the sdhci code provides both irq handlers: the primary and the thread
-handler. Initially it was meant for the primary handler to be very
-short.
-The result is not that on -RT we have the primrary handler grabing locks
-and this isn't really working. As a hack for now I just push both
-handler into the threaded mode.
-
-
-Reported-By: Michal Šmucr <msmucr@gmail.com>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- drivers/mmc/host/sdhci.c | 32 +++++++++++++++++++++++++++-----
- 1 file changed, 27 insertions(+), 5 deletions(-)
-
---- a/drivers/mmc/host/sdhci.c
-+++ b/drivers/mmc/host/sdhci.c
-@@ -2691,6 +2691,31 @@ static irqreturn_t sdhci_thread_irq(int
- return isr ? IRQ_HANDLED : IRQ_NONE;
- }
-
-+#ifdef CONFIG_PREEMPT_RT_BASE
-+static irqreturn_t sdhci_rt_irq(int irq, void *dev_id)
-+{
-+ irqreturn_t ret;
-+
-+ local_bh_disable();
-+ ret = sdhci_irq(irq, dev_id);
-+ local_bh_enable();
-+ if (ret == IRQ_WAKE_THREAD)
-+ ret = sdhci_thread_irq(irq, dev_id);
-+ return ret;
-+}
-+#endif
-+
-+static int sdhci_req_irq(struct sdhci_host *host)
-+{
-+#ifdef CONFIG_PREEMPT_RT_BASE
-+ return request_threaded_irq(host->irq, NULL, sdhci_rt_irq,
-+ IRQF_SHARED, mmc_hostname(host->mmc), host);
-+#else
-+ return request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
-+ IRQF_SHARED, mmc_hostname(host->mmc), host);
-+#endif
-+}
-+
- /*****************************************************************************\
- * *
- * Suspend/resume *
-@@ -2758,9 +2783,7 @@ int sdhci_resume_host(struct sdhci_host
- }
-
- if (!device_may_wakeup(mmc_dev(host->mmc))) {
-- ret = request_threaded_irq(host->irq, sdhci_irq,
-- sdhci_thread_irq, IRQF_SHARED,
-- mmc_hostname(host->mmc), host);
-+ ret = sdhci_req_irq(host);
- if (ret)
- return ret;
- } else {
-@@ -3417,8 +3440,7 @@ int sdhci_add_host(struct sdhci_host *ho
-
- sdhci_init(host, 0);
-
-- ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
-- IRQF_SHARED, mmc_hostname(mmc), host);
-+ ret = sdhci_req_irq(host);
- if (ret) {
- pr_err("%s: Failed to request IRQ %d: %d\n",
- mmc_hostname(mmc), host->irq, ret);
diff --git a/patches/random-make-it-work-on-rt.patch b/patches/random-make-it-work-on-rt.patch
index 8da86e0e7102d5..76748a595d5982 100644
--- a/patches/random-make-it-work-on-rt.patch
+++ b/patches/random-make-it-work-on-rt.patch
@@ -17,8 +17,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
kernel/irq/manage.c | 6 ++++++
5 files changed, 20 insertions(+), 8 deletions(-)
---- a/drivers/char/random.c
-+++ b/drivers/char/random.c
+Index: linux-rt-devel/drivers/char/random.c
+===================================================================
+--- linux-rt-devel.orig/drivers/char/random.c
++++ linux-rt-devel/drivers/char/random.c
@@ -868,28 +868,27 @@ static __u32 get_reg(struct fast_pool *f
return *(ptr + f->reg_idx++);
}
@@ -53,8 +55,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
fast_mix(fast_pool);
add_interrupt_bench(cycles);
---- a/include/linux/irqdesc.h
-+++ b/include/linux/irqdesc.h
+Index: linux-rt-devel/include/linux/irqdesc.h
+===================================================================
+--- linux-rt-devel.orig/include/linux/irqdesc.h
++++ linux-rt-devel/include/linux/irqdesc.h
@@ -63,6 +63,7 @@ struct irq_desc {
unsigned int irqs_unhandled;
atomic_t threads_handled;
@@ -63,8 +67,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
raw_spinlock_t lock;
struct cpumask *percpu_enabled;
#ifdef CONFIG_SMP
---- a/include/linux/random.h
-+++ b/include/linux/random.h
+Index: linux-rt-devel/include/linux/random.h
+===================================================================
+--- linux-rt-devel.orig/include/linux/random.h
++++ linux-rt-devel/include/linux/random.h
@@ -11,7 +11,7 @@
extern void add_device_randomness(const void *, unsigned int);
extern void add_input_randomness(unsigned int type, unsigned int code,
@@ -74,8 +80,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
extern void get_random_bytes(void *buf, int nbytes);
extern void get_random_bytes_arch(void *buf, int nbytes);
---- a/kernel/irq/handle.c
-+++ b/kernel/irq/handle.c
+Index: linux-rt-devel/kernel/irq/handle.c
+===================================================================
+--- linux-rt-devel.orig/kernel/irq/handle.c
++++ linux-rt-devel/kernel/irq/handle.c
@@ -133,6 +133,8 @@ void __irq_wake_thread(struct irq_desc *
irqreturn_t
handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
@@ -98,11 +106,13 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
if (!noirqdebug)
note_interrupt(irq, desc, retval);
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -991,6 +991,12 @@ static int irq_thread(void *data)
- if (action_ret == IRQ_HANDLED)
- atomic_inc(&desc->threads_handled);
+Index: linux-rt-devel/kernel/irq/manage.c
+===================================================================
+--- linux-rt-devel.orig/kernel/irq/manage.c
++++ linux-rt-devel/kernel/irq/manage.c
+@@ -1012,6 +1012,12 @@ static int irq_thread(void *data)
+ if (action_ret == IRQ_WAKE_THREAD)
+ irq_wake_secondary(desc, action);
+#ifdef CONFIG_PREEMPT_RT_FULL
+ migrate_disable();
diff --git a/patches/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch b/patches/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch
new file mode 100644
index 00000000000000..8d54636e8f4679
--- /dev/null
+++ b/patches/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch
@@ -0,0 +1,97 @@
+Subject: ARM: smp: Move clear_tasks_mm_cpumask() call to __cpu_die()
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+Date: Fri, 11 Sep 2015 21:21:23 +0300
+
+When running with the RT-kernel (4.1.5-rt5) on TI OMAP dra7-evm and trying
+to do Suspend to RAM, the following backtrace occurs:
+
+ Disabling non-boot CPUs ...
+ PM: noirq suspend of devices complete after 7.295 msecs
+ Disabling non-boot CPUs ...
+ BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917
+ in_atomic(): 1, irqs_disabled(): 128, pid: 18, name: migration/1
+ INFO: lockdep is turned off.
+ irq event stamp: 122
+ hardirqs last enabled at (121): [<c06ac0ac>] _raw_spin_unlock_irqrestore+0x88/0x90
+ hardirqs last disabled at (122): [<c06abed0>] _raw_spin_lock_irq+0x28/0x5c
+ softirqs last enabled at (0): [<c003d294>] copy_process.part.52+0x410/0x19d8
+ softirqs last disabled at (0): [< (null)>] (null)
+ Preemption disabled at:[< (null)>] (null)
+ CPU: 1 PID: 18 Comm: migration/1 Tainted: G W 4.1.4-rt3-01046-g96ac8da #204
+ Hardware name: Generic DRA74X (Flattened Device Tree)
+ [<c0019134>] (unwind_backtrace) from [<c0014774>] (show_stack+0x20/0x24)
+ [<c0014774>] (show_stack) from [<c06a70f4>] (dump_stack+0x88/0xdc)
+ [<c06a70f4>] (dump_stack) from [<c006cab8>] (___might_sleep+0x198/0x2a8)
+ [<c006cab8>] (___might_sleep) from [<c06ac4dc>] (rt_spin_lock+0x30/0x70)
+ [<c06ac4dc>] (rt_spin_lock) from [<c013f790>] (find_lock_task_mm+0x9c/0x174)
+ [<c013f790>] (find_lock_task_mm) from [<c00409ac>] (clear_tasks_mm_cpumask+0xb4/0x1ac)
+ [<c00409ac>] (clear_tasks_mm_cpumask) from [<c00166a4>] (__cpu_disable+0x98/0xbc)
+ [<c00166a4>] (__cpu_disable) from [<c06a2e8c>] (take_cpu_down+0x1c/0x50)
+ [<c06a2e8c>] (take_cpu_down) from [<c00f2600>] (multi_cpu_stop+0x11c/0x158)
+ [<c00f2600>] (multi_cpu_stop) from [<c00f2a9c>] (cpu_stopper_thread+0xc4/0x184)
+ [<c00f2a9c>] (cpu_stopper_thread) from [<c0069058>] (smpboot_thread_fn+0x18c/0x324)
+ [<c0069058>] (smpboot_thread_fn) from [<c00649c4>] (kthread+0xe8/0x104)
+ [<c00649c4>] (kthread) from [<c0010058>] (ret_from_fork+0x14/0x3c)
+ CPU1: shutdown
+ PM: Calling sched_clock_suspend+0x0/0x40
+ PM: Calling timekeeping_suspend+0x0/0x2e0
+ PM: Calling irq_gc_suspend+0x0/0x68
+ PM: Calling fw_suspend+0x0/0x2c
+ PM: Calling cpu_pm_suspend+0x0/0x28
+
+Also, sometimes system stucks right after displaying "Disabling non-boot
+CPUs ...". The root cause of above backtrace is task_lock() which takes
+a sleeping lock on -RT.
+
+To fix the issue, move clear_tasks_mm_cpumask() call from __cpu_disable()
+to __cpu_die() which is called on the thread which is asking for a target
+CPU to be shutdown. In addition, this change restores CPUhotplug functionality
+on TI OMAP dra7-evm and CPU1 can be unplugged/plugged many times.
+
+Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: <linux-arm-kernel@lists.infradead.org>
+Cc: Sekhar Nori <nsekhar@ti.com>
+Cc: Austin Schuh <austin@peloton-tech.com>
+Cc: <philipp@peloton-tech.com>
+Cc: Russell King <linux@arm.linux.org.uk>
+Cc: <bigeasy@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+Link: http://lkml.kernel.org/r/1441995683-30817-1-git-send-email-grygorii.strashko@ti.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+
+RFC: I'm not sure how safe this change is and will be appreciated for any comments.
+Most of arches call clear_tasks_mm_cpumask() from __cpu_disable(), but *powerpc*
+calls it from CPU_DEAD notifier. This patch follows powerpc's approach in
+general.
+
+This issue was first reported in:
+ http://www.spinics.net/lists/linux-rt-users/msg13752.html
+
+ arch/arm/kernel/smp.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+Index: linux-rt-devel/arch/arm/kernel/smp.c
+===================================================================
+--- linux-rt-devel.orig/arch/arm/kernel/smp.c
++++ linux-rt-devel/arch/arm/kernel/smp.c
+@@ -213,8 +213,6 @@ int __cpu_disable(void)
+ flush_cache_louis();
+ local_flush_tlb_all();
+
+- clear_tasks_mm_cpumask(cpu);
+-
+ return 0;
+ }
+
+@@ -230,6 +228,9 @@ void __cpu_die(unsigned int cpu)
+ pr_err("CPU%u: cpu didn't die\n", cpu);
+ return;
+ }
++
++ clear_tasks_mm_cpumask(cpu);
++
+ pr_notice("CPU%u: shutdown\n", cpu);
+
+ /*
diff --git a/patches/series b/patches/series
index bdbd1e8462b7f6..3c403bb145e7e3 100644
--- a/patches/series
+++ b/patches/series
@@ -5,6 +5,7 @@
############################################################
# UPSTREAM changes queued
############################################################
+xfs--clean-up-inode-lockdep-annotations
############################################################
# UPSTREAM FIXES, patches pending
@@ -34,11 +35,13 @@ mm-slub-move-slab-initialization-into-irq-enabled-region.patch
# Stuff broken upstream, patches submitted
############################################################
cpufreq-Remove-cpufreq_rwsem.patch
+genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully
############################################################
# Stuff which needs addressing upstream, but requires more
# information
############################################################
+rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch
############################################################
# Stuff broken upstream, need to be sent
@@ -155,6 +158,7 @@ futex-avoid-double-wake-up-in-PI-futex-wait-wake-on-.patch
# TRACING
latency-hist.patch
+tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch
# HW LATENCY DETECTOR - this really wants a rewrite
hwlatdetect.patch
@@ -466,6 +470,7 @@ sysfs-realtime-entry.patch
power-disable-highmem-on-rt.patch
mips-disable-highmem-on-rt.patch
mm-rt-kmap-atomic-scheduling.patch
+mm--rt--Fix-generic-kmap_atomic-for-RT
x86-highmem-add-a-already-used-pte-check.patch
arm-highmem-flush-tlb-on-unmap.patch
arm-enable-highmem-for-rt.patch
@@ -549,7 +554,6 @@ leds-trigger-disable-CPU-trigger-on-RT.patch
# DRIVERS
i2c-omap-drop-the-lock-hard-irq-context.patch
mmci-remove-bogus-irq-save.patch
-mmc-sdhci-don-t-provide-hard-irq-handler.patch
cpufreq-drop-K8-s-driver-from-beeing-selected.patch
# I915
diff --git a/patches/tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch b/patches/tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch
new file mode 100644
index 00000000000000..054c1c904ec724
--- /dev/null
+++ b/patches/tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch
@@ -0,0 +1,86 @@
+Subject: tracing: Fix rcu splat from idle CPU on boot
+From: Philipp Schrader <philipp@peloton-tech.com>
+Date: Thu, 3 Sep 2015 14:29:14 -0700
+
+With PREEMPT_RT and most of the lockdep-related options enabled I
+encountered this splat when booting our DRA7 evaluation module:
+
+[ 0.055073]
+[ 0.055076] ===============================
+[ 0.055079] [ INFO: suspicious RCU usage. ]
+[ 0.055084] 4.1.6+ #2 Not tainted
+[ 0.055086] -------------------------------
+[ 0.055090] include/trace/events/hist.h:31 suspicious
+rcu_dereference_check() usage!
+[ 0.055093]
+[ 0.055093] other info that might help us debug this:
+[ 0.055093]
+[ 0.055097]
+[ 0.055097] RCU used illegally from idle CPU!
+[ 0.055097] rcu_scheduler_active = 1, debug_locks = 1
+[ 0.055100] RCU used illegally from extended quiescent state!
+[ 0.055104] no locks held by swapper/0/0.
+[ 0.055106]
+[ 0.055106] stack backtrace:
+[ 0.055112] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.6+ #2
+[ 0.055116] Hardware name: Generic DRA74X (Flattened Device Tree)
+[ 0.055130] [<c00196b8>] (unwind_backtrace) from [<c001515c>]
+(show_stack+0x20/0x24)
+[ 0.055146] [<c001515c>] (show_stack) from [<c07bc408>]
+(dump_stack+0x84/0xa0)
+[ 0.055160] [<c07bc408>] (dump_stack) from [<c009bc38>]
+(lockdep_rcu_suspicious+0xb0/0x110)
+[ 0.055172] [<c009bc38>] (lockdep_rcu_suspicious) from [<c01246c4>]
+(time_hardirqs_off+0x2b8/0x3c8)
+[ 0.055184] [<c01246c4>] (time_hardirqs_off) from [<c009a218>]
+(trace_hardirqs_off_caller+0x2c/0xf4)
+[ 0.055194] [<c009a218>] (trace_hardirqs_off_caller) from
+[<c009a2f4>] (trace_hardirqs_off+0x14/0x18)
+[ 0.055204] [<c009a2f4>] (trace_hardirqs_off) from [<c00c7ecc>]
+(rcu_idle_enter+0x78/0xcc)
+[ 0.055213] [<c00c7ecc>] (rcu_idle_enter) from [<c0093eb0>]
+(cpu_startup_entry+0x190/0x518)
+[ 0.055222] [<c0093eb0>] (cpu_startup_entry) from [<c07b95b4>]
+(rest_init+0x13c/0x17c)
+[ 0.055231] [<c07b95b4>] (rest_init) from [<c0b32c74>]
+(start_kernel+0x320/0x380)
+[ 0.055238] [<c0b32c74>] (start_kernel) from [<8000807c>] (0x8000807c)
+
+As per Steve Rotstedt's suggestion I changed the trace_* calls to
+trace_*_rcuidle calls. He pointed out that the trace points were getting
+triggered when rcu wasn't watching.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: stable-rt@vger.kernel.org
+---
+ kernel/trace/trace_irqsoff.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
+index aaade2e..d0e1d0e 100644
+--- a/kernel/trace/trace_irqsoff.c
++++ b/kernel/trace/trace_irqsoff.c
+@@ -450,7 +450,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
+ #ifdef CONFIG_PROVE_LOCKING
+ void time_hardirqs_on(unsigned long a0, unsigned long a1)
+ {
+- trace_preemptirqsoff_hist(IRQS_ON, 0);
++ trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(a0, a1);
+ }
+@@ -459,7 +459,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
+ {
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(a0, a1);
+- trace_preemptirqsoff_hist(IRQS_OFF, 1);
++ trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
+ }
+
+ #else /* !CONFIG_PROVE_LOCKING */
+--
+2.1.4
+
+
+
diff --git a/patches/xfs--clean-up-inode-lockdep-annotations b/patches/xfs--clean-up-inode-lockdep-annotations
new file mode 100644
index 00000000000000..b11e5f69263467
--- /dev/null
+++ b/patches/xfs--clean-up-inode-lockdep-annotations
@@ -0,0 +1,285 @@
+Subject: xfs: clean up inode lockdep annotations
+From: Dave Chinner <dchinner@redhat.com>
+Date: Wed Aug 19 10:32:49 2015 +1000
+
+Upstream commit 0952c8183c1575a78dc416b5e168987ff98728bb
+
+Lockdep annotations are a maintenance nightmare. Locking has to be
+modified to suit the limitations of the annotations, and we're
+always having to fix the annotations because they are unable to
+express the complexity of locking heirarchies correctly.
+
+So, next up, we've got more issues with lockdep annotations for
+inode locking w.r.t. XFS_LOCK_PARENT:
+
+ - lockdep classes are exclusive and can't be ORed together
+ to form new classes.
+ - IOLOCK needs multiple PARENT subclasses to express the
+ changes needed for the readdir locking rework needed to
+ stop the endless flow of lockdep false positives involving
+ readdir calling filldir under the ILOCK.
+ - there are only 8 unique lockdep subclasses available,
+ so we can't create a generic solution.
+
+IOWs we need to treat the 3-bit space available to each lock type
+differently:
+
+ - IOLOCK uses xfs_lock_two_inodes(), so needs:
+ - at least 2 IOLOCK subclasses
+ - at least 2 IOLOCK_PARENT subclasses
+ - MMAPLOCK uses xfs_lock_two_inodes(), so needs:
+ - at least 2 MMAPLOCK subclasses
+ - ILOCK uses xfs_lock_inodes with up to 5 inodes, so needs:
+ - at least 5 ILOCK subclasses
+ - one ILOCK_PARENT subclass
+ - one RTBITMAP subclass
+ - one RTSUM subclass
+
+For the IOLOCK, split the space into two sets of subclasses.
+For the MMAPLOCK, just use half the space for the one subclass to
+match the non-parent lock classes of the IOLOCK.
+For the ILOCK, use 0-4 as the ILOCK subclasses, 5-7 for the
+remaining individual subclasses.
+
+Because they are now all different, modify xfs_lock_inumorder() to
+handle the nested subclasses, and to assert fail if passed an
+invalid subclass. Further, annotate xfs_lock_inodes() to assert fail
+if an invalid combination of lock primitives and inode counts are
+passed that would result in a lockdep subclass annotation overflow.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ fs/xfs/xfs_inode.c | 68 ++++++++++++++++++++++++++++++++++-----------
+ fs/xfs/xfs_inode.h | 79 +++++++++++++++++++++++++++++++++++++----------------
+ 2 files changed, 107 insertions(+), 40 deletions(-)
+
+Index: linux-rt-devel/fs/xfs/xfs_inode.c
+===================================================================
+--- linux-rt-devel.orig/fs/xfs/xfs_inode.c
++++ linux-rt-devel/fs/xfs/xfs_inode.c
+@@ -164,7 +164,7 @@ xfs_ilock(
+ (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
+ (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
++ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+
+ if (lock_flags & XFS_IOLOCK_EXCL)
+ mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
+@@ -212,7 +212,7 @@ xfs_ilock_nowait(
+ (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
+ (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
++ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+
+ if (lock_flags & XFS_IOLOCK_EXCL) {
+ if (!mrtryupdate(&ip->i_iolock))
+@@ -281,7 +281,7 @@ xfs_iunlock(
+ (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
+ (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
++ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+ ASSERT(lock_flags != 0);
+
+ if (lock_flags & XFS_IOLOCK_EXCL)
+@@ -364,30 +364,38 @@ int xfs_lock_delays;
+
+ /*
+ * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
+- * value. This shouldn't be called for page fault locking, but we also need to
+- * ensure we don't overrun the number of lockdep subclasses for the iolock or
+- * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
++ * value. This can be called for any type of inode lock combination, including
++ * parent locking. Care must be taken to ensure we don't overrun the subclass
++ * storage fields in the class mask we build.
+ */
+ static inline int
+ xfs_lock_inumorder(int lock_mode, int subclass)
+ {
++ int class = 0;
++
++ ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
++ XFS_ILOCK_RTSUM)));
++
+ if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+- ASSERT(subclass + XFS_LOCK_INUMORDER <
+- (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
+- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
++ ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
++ ASSERT(subclass + XFS_IOLOCK_PARENT_VAL <
++ MAX_LOCKDEP_SUBCLASSES);
++ class += subclass << XFS_IOLOCK_SHIFT;
++ if (lock_mode & XFS_IOLOCK_PARENT)
++ class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
+ }
+
+ if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
+- ASSERT(subclass + XFS_LOCK_INUMORDER <
+- (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
+- lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
+- XFS_MMAPLOCK_SHIFT;
++ ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
++ class += subclass << XFS_MMAPLOCK_SHIFT;
+ }
+
+- if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
+- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
++ if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
++ ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
++ class += subclass << XFS_ILOCK_SHIFT;
++ }
+
+- return lock_mode;
++ return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
+ }
+
+ /*
+@@ -399,6 +407,11 @@ xfs_lock_inumorder(int lock_mode, int su
+ * transaction (such as truncate). This can result in deadlock since the long
+ * running trans might need to wait for the inode we just locked in order to
+ * push the tail and free space in the log.
++ *
++ * xfs_lock_inodes() can only be used to lock one type of lock at a time -
++ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
++ * lock more than one at a time, lockdep will report false positives saying we
++ * have violated locking orders.
+ */
+ void
+ xfs_lock_inodes(
+@@ -409,8 +422,29 @@ xfs_lock_inodes(
+ int attempts = 0, i, j, try_lock;
+ xfs_log_item_t *lp;
+
+- /* currently supports between 2 and 5 inodes */
++ /*
++ * Currently supports between 2 and 5 inodes with exclusive locking. We
++ * support an arbitrary depth of locking here, but absolute limits on
++ * inodes depend on the the type of locking and the limits placed by
++ * lockdep annotations in xfs_lock_inumorder. These are all checked by
++ * the asserts.
++ */
+ ASSERT(ips && inodes >= 2 && inodes <= 5);
++ ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
++ XFS_ILOCK_EXCL));
++ ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
++ XFS_ILOCK_SHARED)));
++ ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
++ inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
++ ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
++ inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
++ ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
++ inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
++
++ if (lock_mode & XFS_IOLOCK_EXCL) {
++ ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
++ } else if (lock_mode & XFS_MMAPLOCK_EXCL)
++ ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
+
+ try_lock = 0;
+ i = 0;
+Index: linux-rt-devel/fs/xfs/xfs_inode.h
+===================================================================
+--- linux-rt-devel.orig/fs/xfs/xfs_inode.h
++++ linux-rt-devel/fs/xfs/xfs_inode.h
+@@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct
+ * Flags for lockdep annotations.
+ *
+ * XFS_LOCK_PARENT - for directory operations that require locking a
+- * parent directory inode and a child entry inode. The parent gets locked
+- * with this flag so it gets a lockdep subclass of 1 and the child entry
+- * lock will have a lockdep subclass of 0.
++ * parent directory inode and a child entry inode. IOLOCK requires nesting,
++ * MMAPLOCK does not support this class, ILOCK requires a single subclass
++ * to differentiate parent from child.
+ *
+ * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
+ * inodes do not participate in the normal lock order, and thus have their
+@@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct
+ * XFS_LOCK_INUMORDER - for locking several inodes at the some time
+ * with xfs_lock_inodes(). This flag is used as the starting subclass
+ * and each subsequent lock acquired will increment the subclass by one.
+- * So the first lock acquired will have a lockdep subclass of 4, the
+- * second lock will have a lockdep subclass of 5, and so on. It is
+- * the responsibility of the class builder to shift this to the correct
+- * portion of the lock_mode lockdep mask.
++ * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
++ * limited to the subclasses we can represent via nesting. We need at least
++ * 5 inodes nest depth for the ILOCK through rename, and we also have to support
++ * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
++ * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
++ * 8 subclasses supported by lockdep.
++ *
++ * This also means we have to number the sub-classes in the lowest bits of
++ * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
++ * mask and we can't use bit-masking to build the subclasses. What a mess.
++ *
++ * Bit layout:
++ *
++ * Bit Lock Region
++ * 16-19 XFS_IOLOCK_SHIFT dependencies
++ * 20-23 XFS_MMAPLOCK_SHIFT dependencies
++ * 24-31 XFS_ILOCK_SHIFT dependencies
++ *
++ * IOLOCK values
++ *
++ * 0-3 subclass value
++ * 4-7 PARENT subclass values
++ *
++ * MMAPLOCK values
++ *
++ * 0-3 subclass value
++ * 4-7 unused
++ *
++ * ILOCK values
++ * 0-4 subclass values
++ * 5 PARENT subclass (not nestable)
++ * 6 RTBITMAP subclass (not nestable)
++ * 7 RTSUM subclass (not nestable)
++ *
+ */
+-#define XFS_LOCK_PARENT 1
+-#define XFS_LOCK_RTBITMAP 2
+-#define XFS_LOCK_RTSUM 3
+-#define XFS_LOCK_INUMORDER 4
+-
+-#define XFS_IOLOCK_SHIFT 16
+-#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
++#define XFS_IOLOCK_SHIFT 16
++#define XFS_IOLOCK_PARENT_VAL 4
++#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
++#define XFS_IOLOCK_DEP_MASK 0x000f0000
++#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
+
+-#define XFS_MMAPLOCK_SHIFT 20
++#define XFS_MMAPLOCK_SHIFT 20
++#define XFS_MMAPLOCK_NUMORDER 0
++#define XFS_MMAPLOCK_MAX_SUBCLASS 3
++#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
+
+-#define XFS_ILOCK_SHIFT 24
+-#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
+-#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
+-#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
++#define XFS_ILOCK_SHIFT 24
++#define XFS_ILOCK_PARENT_VAL 5
++#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1)
++#define XFS_ILOCK_RTBITMAP_VAL 6
++#define XFS_ILOCK_RTSUM_VAL 7
++#define XFS_ILOCK_DEP_MASK 0xff000000
++#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
++#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
++#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
+
+-#define XFS_IOLOCK_DEP_MASK 0x000f0000
+-#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
+-#define XFS_ILOCK_DEP_MASK 0xff000000
+-#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
++#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
+ XFS_MMAPLOCK_DEP_MASK | \
+ XFS_ILOCK_DEP_MASK)
+