diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2015-09-21 18:41:01 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2015-09-21 18:41:01 +0200 |
commit | 7461758b9e982e4ea6280ce9308492e7cceda2ed (patch) | |
tree | ee87a74437efc4659f99604b32a2b08298ddd616 | |
parent | 2afdf8087c2f343df801ac6bff591ed8b05658b1 (diff) | |
download | 4.9-rt-patches-7461758b9e982e4ea6280ce9308492e7cceda2ed.tar.gz |
[ANNOUNCE] 4.1.7.-rt8
Dear RT folks!
I'm pleased to announce the v4.1.7-rt8 patch set. v4.1.6-rt6 and
v4.1.7-rt7 are non-announced updates to incorporate the linux-4.1.y
stable tree changes.
Changes since v4.1.5-rt5:
- Update to 4.1.7
- Cherry-pick a XFS lockdep annotation fix from mainline
- Use preempt_xxx_nort in the generic implementation of
k[un]map_atomic.
- Revert d04ea10ba1ea mmc: sdhci: don't provide hard irq handler
- Force thread primary handlers of interrupts which provide both a
primary and a threaded handler
- Move clear_tasks_mm_cpumask() call to __cpu_die() on ARM
(Grygoriii)
- Fix a RCU splat in the trace histogram (Philipp)
Solved issues:
- The high CPU usage problem reported by Nicholas turned out to be a
scalability issue of the gcov instrumentation
Known issues:
- bcache stays disabled
- CPU hotplug is not better than before
- The netlink_release() OOPS, reported by Clark, is still on the
list, but unsolved due to lack of information
The delta patch against 4.1.7-rt7 is appended below and can be found here:
https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/incr/patch-4.1.7-rt7-rt8.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.1.7-rt8
The RT patch against 4.1.5 can be found here:
https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patch-4.1.7-rt8.patch.xz
The split quilt queue is available at:
https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.7-rt8.tar.xz
Enjoy!
tglx
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | patches/genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully | 337 | ||||
-rw-r--r-- | patches/localversion.patch | 8 | ||||
-rw-r--r-- | patches/mm--rt--Fix-generic-kmap_atomic-for-RT | 42 | ||||
-rw-r--r-- | patches/mmc-sdhci-don-t-provide-hard-irq-handler.patch | 73 | ||||
-rw-r--r-- | patches/random-make-it-work-on-rt.patch | 36 | ||||
-rw-r--r-- | patches/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch | 97 | ||||
-rw-r--r-- | patches/series | 6 | ||||
-rw-r--r-- | patches/tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch | 86 | ||||
-rw-r--r-- | patches/xfs--clean-up-inode-lockdep-annotations | 285 |
9 files changed, 880 insertions, 90 deletions
diff --git a/patches/genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully b/patches/genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully new file mode 100644 index 00000000000000..a84bff7945759c --- /dev/null +++ b/patches/genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully @@ -0,0 +1,337 @@ +Subject: genirq: Handle force threading of interrupts with primary and thread handler +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sat, 19 Sep 2015 11:56:20 +0200 + +Force threading of interrupts does not deal with interrupts which are +requested with a primary and a threaded handler. The current policy is +to leave them alone and let the primary handler run in interrupt +context, but we set the ONESHOT flag for those interrupts as well. + +Kohji Okuno debugged a problem with the SDHCI driver where the +interrupt thread waits for a hardware interrupt to trigger, which cant +work well because the hardware interrupt is masked due to the ONESHOT +flag being set. He proposed to set the ONESHOT flag only if the +interrupt does not provide a thread handler. + +Though that does not work either because these interrupts can be +shared. So the other interrupt would rightfully get the ONESHOT flag +set and therefor the same situation would happen again. + +To deal with this proper, we need to force thread the primary handler +of such interrupts as well. That means that the primary interrupt +handler is treated as any other primary interrupt handler which is not +marked IRQF_NO_THREAD. The threaded handler becomes a separate thread +so the SDHCI flow logic can be handled gracefully. + +The same issue was reported against 4.1-rt. + +Reported-by: Kohji Okuno <okuno.kohji@jp.panasonic.com> +Reported-By: Michal Šmucr <msmucr@gmail.com> +Reported-and-tested-by: Nathan Sullivan <nathan.sullivan@ni.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Cc: Steven Rostedt <rostedt@goodmis.org> +Cc: stable-rt@vger.kernel.org +--- + +This requires to revert commit: d04ea10ba1ea 'mmc: sdhci: don't provide +hard irq handler' + + include/linux/interrupt.h | 2 + kernel/irq/manage.c | 160 +++++++++++++++++++++++++++++++++------------- + 2 files changed, 120 insertions(+), 42 deletions(-) + +Index: linux-rt-devel/include/linux/interrupt.h +=================================================================== +--- linux-rt-devel.orig/include/linux/interrupt.h ++++ linux-rt-devel/include/linux/interrupt.h +@@ -102,6 +102,7 @@ typedef irqreturn_t (*irq_handler_t)(int + * @flags: flags (see IRQF_* above) + * @thread_fn: interrupt handler function for threaded interrupts + * @thread: thread pointer for threaded interrupts ++ * @secondary: pointer to secondary irqaction (force threading) + * @thread_flags: flags related to @thread + * @thread_mask: bitmask for keeping track of @thread activity + * @dir: pointer to the proc/irq/NN/name entry +@@ -113,6 +114,7 @@ struct irqaction { + struct irqaction *next; + irq_handler_t thread_fn; + struct task_struct *thread; ++ struct irqaction *secondary; + unsigned int irq; + unsigned int flags; + unsigned long thread_flags; +Index: linux-rt-devel/kernel/irq/manage.c +=================================================================== +--- linux-rt-devel.orig/kernel/irq/manage.c ++++ linux-rt-devel/kernel/irq/manage.c +@@ -697,6 +697,12 @@ static irqreturn_t irq_nested_primary_ha + return IRQ_NONE; + } + ++static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id) ++{ ++ WARN(1, "Secondary action handler called for irq %d\n", irq); ++ return IRQ_NONE; ++} ++ + static int irq_wait_for_interrupt(struct irqaction *action) + { + set_current_state(TASK_INTERRUPTIBLE); +@@ -723,7 +729,8 @@ static int irq_wait_for_interrupt(struct + static void irq_finalize_oneshot(struct irq_desc *desc, + struct irqaction *action) + { +- if (!(desc->istate & IRQS_ONESHOT)) ++ if (!(desc->istate & IRQS_ONESHOT) || ++ action->handler == irq_forced_secondary_handler) + return; + again: + chip_bus_lock(desc); +@@ -877,6 +884,18 @@ static void irq_thread_dtor(struct callb + irq_finalize_oneshot(desc, action); + } + ++static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) ++{ ++ struct irqaction *secondary = action->secondary; ++ ++ if (WARN_ON_ONCE(!secondary)) ++ return; ++ ++ raw_spin_lock_irq(&desc->lock); ++ __irq_wake_thread(desc, secondary); ++ raw_spin_unlock_irq(&desc->lock); ++} ++ + /* + * Interrupt handler thread + */ +@@ -907,6 +926,8 @@ static int irq_thread(void *data) + action_ret = handler_fn(desc, action); + if (action_ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); ++ if (action_ret == IRQ_WAKE_THREAD) ++ irq_wake_secondary(desc, action); + + wake_threads_waitq(desc); + } +@@ -951,20 +972,36 @@ void irq_wake_thread(unsigned int irq, v + } + EXPORT_SYMBOL_GPL(irq_wake_thread); + +-static void irq_setup_forced_threading(struct irqaction *new) ++static int irq_setup_forced_threading(struct irqaction *new) + { + if (!force_irqthreads) +- return; ++ return 0; + if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) +- return; ++ return 0; + + new->flags |= IRQF_ONESHOT; + +- if (!new->thread_fn) { +- set_bit(IRQTF_FORCED_THREAD, &new->thread_flags); +- new->thread_fn = new->handler; +- new->handler = irq_default_primary_handler; +- } ++ /* ++ * Handle the case where we have a real primary handler and a ++ * thread handler. We force thread them as well by creating a ++ * secondary action. ++ */ ++ if (new->handler != irq_default_primary_handler && new->thread_fn) { ++ /* Allocate the secondary action */ ++ new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); ++ if (!new->secondary) ++ return -ENOMEM; ++ new->secondary->handler = irq_forced_secondary_handler; ++ new->secondary->thread_fn = new->thread_fn; ++ new->secondary->dev_id = new->dev_id; ++ new->secondary->irq = new->irq; ++ new->secondary->name = new->name; ++ } ++ /* Deal with the primary handler */ ++ set_bit(IRQTF_FORCED_THREAD, &new->thread_flags); ++ new->thread_fn = new->handler; ++ new->handler = irq_default_primary_handler; ++ return 0; + } + + static int irq_request_resources(struct irq_desc *desc) +@@ -984,6 +1021,48 @@ static void irq_release_resources(struct + c->irq_release_resources(d); + } + ++static int ++setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) ++{ ++ struct task_struct *t; ++ struct sched_param param = { ++ .sched_priority = MAX_USER_RT_PRIO/2, ++ }; ++ ++ if (!secondary) { ++ t = kthread_create(irq_thread, new, "irq/%d-%s", irq, ++ new->name); ++ } else { ++ t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq, ++ new->name); ++ param.sched_priority += 1; ++ } ++ ++ if (IS_ERR(t)) ++ return PTR_ERR(t); ++ ++ sched_setscheduler_nocheck(t, SCHED_FIFO, ¶m); ++ ++ /* ++ * We keep the reference to the task struct even if ++ * the thread dies to avoid that the interrupt code ++ * references an already freed task_struct. ++ */ ++ get_task_struct(t); ++ new->thread = t; ++ /* ++ * Tell the thread to set its affinity. This is ++ * important for shared interrupt handlers as we do ++ * not invoke setup_affinity() for the secondary ++ * handlers as everything is already set up. Even for ++ * interrupts marked with IRQF_NO_BALANCE this is ++ * correct as we want the thread to move to the cpu(s) ++ * on which the requesting code placed the interrupt. ++ */ ++ set_bit(IRQTF_AFFINITY, &new->thread_flags); ++ return 0; ++} ++ + /* + * Internal function to register an irqaction - typically used to + * allocate special interrupts that are part of the architecture. +@@ -1004,6 +1083,8 @@ __setup_irq(unsigned int irq, struct irq + if (!try_module_get(desc->owner)) + return -ENODEV; + ++ new->irq = irq; ++ + /* + * Check whether the interrupt nests into another interrupt + * thread. +@@ -1021,8 +1102,11 @@ __setup_irq(unsigned int irq, struct irq + */ + new->handler = irq_nested_primary_handler; + } else { +- if (irq_settings_can_thread(desc)) +- irq_setup_forced_threading(new); ++ if (irq_settings_can_thread(desc)) { ++ ret = irq_setup_forced_threading(new); ++ if (ret) ++ goto out_mput; ++ } + } + + /* +@@ -1031,37 +1115,14 @@ __setup_irq(unsigned int irq, struct irq + * thread. + */ + if (new->thread_fn && !nested) { +- struct task_struct *t; +- static const struct sched_param param = { +- .sched_priority = MAX_USER_RT_PRIO/2, +- }; +- +- t = kthread_create(irq_thread, new, "irq/%d-%s", irq, +- new->name); +- if (IS_ERR(t)) { +- ret = PTR_ERR(t); ++ ret = setup_irq_thread(new, irq, false); ++ if (ret) + goto out_mput; ++ if (new->secondary) { ++ ret = setup_irq_thread(new->secondary, irq, true); ++ if (ret) ++ goto out_thread; + } +- +- sched_setscheduler_nocheck(t, SCHED_FIFO, ¶m); +- +- /* +- * We keep the reference to the task struct even if +- * the thread dies to avoid that the interrupt code +- * references an already freed task_struct. +- */ +- get_task_struct(t); +- new->thread = t; +- /* +- * Tell the thread to set its affinity. This is +- * important for shared interrupt handlers as we do +- * not invoke setup_affinity() for the secondary +- * handlers as everything is already set up. Even for +- * interrupts marked with IRQF_NO_BALANCE this is +- * correct as we want the thread to move to the cpu(s) +- * on which the requesting code placed the interrupt. +- */ +- set_bit(IRQTF_AFFINITY, &new->thread_flags); + } + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { +@@ -1234,7 +1295,6 @@ __setup_irq(unsigned int irq, struct irq + irq, nmsk, omsk); + } + +- new->irq = irq; + *old_ptr = new; + + irq_pm_install_action(desc, new); +@@ -1260,6 +1320,8 @@ __setup_irq(unsigned int irq, struct irq + */ + if (new->thread) + wake_up_process(new->thread); ++ if (new->secondary) ++ wake_up_process(new->secondary->thread); + + register_irq_proc(irq, desc); + new->dir = NULL; +@@ -1290,6 +1352,13 @@ out_thread: + kthread_stop(t); + put_task_struct(t); + } ++ if (new->secondary && new->secondary->thread) { ++ struct task_struct *t = new->secondary->thread; ++ ++ new->secondary->thread = NULL; ++ kthread_stop(t); ++ put_task_struct(t); ++ } + out_mput: + module_put(desc->owner); + return ret; +@@ -1397,9 +1466,14 @@ static struct irqaction *__free_irq(unsi + if (action->thread) { + kthread_stop(action->thread); + put_task_struct(action->thread); ++ if (action->secondary && action->secondary->thread) { ++ kthread_stop(action->secondary->thread); ++ put_task_struct(action->secondary->thread); ++ } + } + + module_put(desc->owner); ++ kfree(action->secondary); + return action; + } + +@@ -1543,8 +1617,10 @@ int request_threaded_irq(unsigned int ir + retval = __setup_irq(irq, desc, action); + chip_bus_sync_unlock(desc); + +- if (retval) ++ if (retval) { ++ kfree(action->secondary); + kfree(action); ++ } + + #ifdef CONFIG_DEBUG_SHIRQ_FIXME + if (!retval && (irqflags & IRQF_SHARED)) { diff --git a/patches/localversion.patch b/patches/localversion.patch index a8e4298085e50c..1aadf5ce0d321e 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -1,4 +1,4 @@ -Subject: v4.1.7-rt7 +Subject: v4.1.7-rt8 From: Thomas Gleixner <tglx@linutronix.de> Date: Fri, 08 Jul 2011 20:25:16 +0200 @@ -7,7 +7,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> localversion-rt | 1 + 1 file changed, 1 insertion(+) +Index: linux-rt-devel/localversion-rt +=================================================================== --- /dev/null -+++ b/localversion-rt ++++ linux-rt-devel/localversion-rt @@ -0,0 +1 @@ -+-rt7 ++-rt8 diff --git a/patches/mm--rt--Fix-generic-kmap_atomic-for-RT b/patches/mm--rt--Fix-generic-kmap_atomic-for-RT new file mode 100644 index 00000000000000..58245b4343c6c9 --- /dev/null +++ b/patches/mm--rt--Fix-generic-kmap_atomic-for-RT @@ -0,0 +1,42 @@ +Subject: mm: rt: Fix generic kmap_atomic for RT +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sat, 19 Sep 2015 10:15:00 +0200 + +The update to 4.1 brought in the mainline variant of the pagefault +disable distangling from preempt count. That introduced a +preempt_disable/enable pair in the generic kmap_atomic/kunmap_atomic +implementations which got not converted to the _nort() variant. + +That results in massive 'scheduling while atomic/sleeping function +called from invalid context' splats. + +Fix that up. + +Reported-and-tested-by: Juergen Borleis <jbe@pengutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + include/linux/highmem.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +Index: linux-rt-devel/include/linux/highmem.h +=================================================================== +--- linux-rt-devel.orig/include/linux/highmem.h ++++ linux-rt-devel/include/linux/highmem.h +@@ -66,7 +66,7 @@ static inline void kunmap(struct page *p + + static inline void *kmap_atomic(struct page *page) + { +- preempt_disable(); ++ preempt_disable_nort(); + pagefault_disable(); + return page_address(page); + } +@@ -75,7 +75,7 @@ static inline void *kmap_atomic(struct p + static inline void __kunmap_atomic(void *addr) + { + pagefault_enable(); +- preempt_enable(); ++ preempt_enable_nort(); + } + + #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) diff --git a/patches/mmc-sdhci-don-t-provide-hard-irq-handler.patch b/patches/mmc-sdhci-don-t-provide-hard-irq-handler.patch deleted file mode 100644 index 7767df6b64a3c1..00000000000000 --- a/patches/mmc-sdhci-don-t-provide-hard-irq-handler.patch +++ /dev/null @@ -1,73 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 26 Feb 2015 12:13:36 +0100 -Subject: mmc: sdhci: don't provide hard irq handler - -the sdhci code provides both irq handlers: the primary and the thread -handler. Initially it was meant for the primary handler to be very -short. -The result is not that on -RT we have the primrary handler grabing locks -and this isn't really working. As a hack for now I just push both -handler into the threaded mode. - - -Reported-By: Michal Šmucr <msmucr@gmail.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/mmc/host/sdhci.c | 32 +++++++++++++++++++++++++++----- - 1 file changed, 27 insertions(+), 5 deletions(-) - ---- a/drivers/mmc/host/sdhci.c -+++ b/drivers/mmc/host/sdhci.c -@@ -2691,6 +2691,31 @@ static irqreturn_t sdhci_thread_irq(int - return isr ? IRQ_HANDLED : IRQ_NONE; - } - -+#ifdef CONFIG_PREEMPT_RT_BASE -+static irqreturn_t sdhci_rt_irq(int irq, void *dev_id) -+{ -+ irqreturn_t ret; -+ -+ local_bh_disable(); -+ ret = sdhci_irq(irq, dev_id); -+ local_bh_enable(); -+ if (ret == IRQ_WAKE_THREAD) -+ ret = sdhci_thread_irq(irq, dev_id); -+ return ret; -+} -+#endif -+ -+static int sdhci_req_irq(struct sdhci_host *host) -+{ -+#ifdef CONFIG_PREEMPT_RT_BASE -+ return request_threaded_irq(host->irq, NULL, sdhci_rt_irq, -+ IRQF_SHARED, mmc_hostname(host->mmc), host); -+#else -+ return request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq, -+ IRQF_SHARED, mmc_hostname(host->mmc), host); -+#endif -+} -+ - /*****************************************************************************\ - * * - * Suspend/resume * -@@ -2758,9 +2783,7 @@ int sdhci_resume_host(struct sdhci_host - } - - if (!device_may_wakeup(mmc_dev(host->mmc))) { -- ret = request_threaded_irq(host->irq, sdhci_irq, -- sdhci_thread_irq, IRQF_SHARED, -- mmc_hostname(host->mmc), host); -+ ret = sdhci_req_irq(host); - if (ret) - return ret; - } else { -@@ -3417,8 +3440,7 @@ int sdhci_add_host(struct sdhci_host *ho - - sdhci_init(host, 0); - -- ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq, -- IRQF_SHARED, mmc_hostname(mmc), host); -+ ret = sdhci_req_irq(host); - if (ret) { - pr_err("%s: Failed to request IRQ %d: %d\n", - mmc_hostname(mmc), host->irq, ret); diff --git a/patches/random-make-it-work-on-rt.patch b/patches/random-make-it-work-on-rt.patch index 8da86e0e7102d5..76748a595d5982 100644 --- a/patches/random-make-it-work-on-rt.patch +++ b/patches/random-make-it-work-on-rt.patch @@ -17,8 +17,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> kernel/irq/manage.c | 6 ++++++ 5 files changed, 20 insertions(+), 8 deletions(-) ---- a/drivers/char/random.c -+++ b/drivers/char/random.c +Index: linux-rt-devel/drivers/char/random.c +=================================================================== +--- linux-rt-devel.orig/drivers/char/random.c ++++ linux-rt-devel/drivers/char/random.c @@ -868,28 +868,27 @@ static __u32 get_reg(struct fast_pool *f return *(ptr + f->reg_idx++); } @@ -53,8 +55,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> fast_mix(fast_pool); add_interrupt_bench(cycles); ---- a/include/linux/irqdesc.h -+++ b/include/linux/irqdesc.h +Index: linux-rt-devel/include/linux/irqdesc.h +=================================================================== +--- linux-rt-devel.orig/include/linux/irqdesc.h ++++ linux-rt-devel/include/linux/irqdesc.h @@ -63,6 +63,7 @@ struct irq_desc { unsigned int irqs_unhandled; atomic_t threads_handled; @@ -63,8 +67,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> raw_spinlock_t lock; struct cpumask *percpu_enabled; #ifdef CONFIG_SMP ---- a/include/linux/random.h -+++ b/include/linux/random.h +Index: linux-rt-devel/include/linux/random.h +=================================================================== +--- linux-rt-devel.orig/include/linux/random.h ++++ linux-rt-devel/include/linux/random.h @@ -11,7 +11,7 @@ extern void add_device_randomness(const void *, unsigned int); extern void add_input_randomness(unsigned int type, unsigned int code, @@ -74,8 +80,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> extern void get_random_bytes(void *buf, int nbytes); extern void get_random_bytes_arch(void *buf, int nbytes); ---- a/kernel/irq/handle.c -+++ b/kernel/irq/handle.c +Index: linux-rt-devel/kernel/irq/handle.c +=================================================================== +--- linux-rt-devel.orig/kernel/irq/handle.c ++++ linux-rt-devel/kernel/irq/handle.c @@ -133,6 +133,8 @@ void __irq_wake_thread(struct irq_desc * irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) @@ -98,11 +106,13 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (!noirqdebug) note_interrupt(irq, desc, retval); ---- a/kernel/irq/manage.c -+++ b/kernel/irq/manage.c -@@ -991,6 +991,12 @@ static int irq_thread(void *data) - if (action_ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); +Index: linux-rt-devel/kernel/irq/manage.c +=================================================================== +--- linux-rt-devel.orig/kernel/irq/manage.c ++++ linux-rt-devel/kernel/irq/manage.c +@@ -1012,6 +1012,12 @@ static int irq_thread(void *data) + if (action_ret == IRQ_WAKE_THREAD) + irq_wake_secondary(desc, action); +#ifdef CONFIG_PREEMPT_RT_FULL + migrate_disable(); diff --git a/patches/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch b/patches/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch new file mode 100644 index 00000000000000..8d54636e8f4679 --- /dev/null +++ b/patches/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch @@ -0,0 +1,97 @@ +Subject: ARM: smp: Move clear_tasks_mm_cpumask() call to __cpu_die() +From: Grygorii Strashko <grygorii.strashko@ti.com> +Date: Fri, 11 Sep 2015 21:21:23 +0300 + +When running with the RT-kernel (4.1.5-rt5) on TI OMAP dra7-evm and trying +to do Suspend to RAM, the following backtrace occurs: + + Disabling non-boot CPUs ... + PM: noirq suspend of devices complete after 7.295 msecs + Disabling non-boot CPUs ... + BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917 + in_atomic(): 1, irqs_disabled(): 128, pid: 18, name: migration/1 + INFO: lockdep is turned off. + irq event stamp: 122 + hardirqs last enabled at (121): [<c06ac0ac>] _raw_spin_unlock_irqrestore+0x88/0x90 + hardirqs last disabled at (122): [<c06abed0>] _raw_spin_lock_irq+0x28/0x5c + softirqs last enabled at (0): [<c003d294>] copy_process.part.52+0x410/0x19d8 + softirqs last disabled at (0): [< (null)>] (null) + Preemption disabled at:[< (null)>] (null) + CPU: 1 PID: 18 Comm: migration/1 Tainted: G W 4.1.4-rt3-01046-g96ac8da #204 + Hardware name: Generic DRA74X (Flattened Device Tree) + [<c0019134>] (unwind_backtrace) from [<c0014774>] (show_stack+0x20/0x24) + [<c0014774>] (show_stack) from [<c06a70f4>] (dump_stack+0x88/0xdc) + [<c06a70f4>] (dump_stack) from [<c006cab8>] (___might_sleep+0x198/0x2a8) + [<c006cab8>] (___might_sleep) from [<c06ac4dc>] (rt_spin_lock+0x30/0x70) + [<c06ac4dc>] (rt_spin_lock) from [<c013f790>] (find_lock_task_mm+0x9c/0x174) + [<c013f790>] (find_lock_task_mm) from [<c00409ac>] (clear_tasks_mm_cpumask+0xb4/0x1ac) + [<c00409ac>] (clear_tasks_mm_cpumask) from [<c00166a4>] (__cpu_disable+0x98/0xbc) + [<c00166a4>] (__cpu_disable) from [<c06a2e8c>] (take_cpu_down+0x1c/0x50) + [<c06a2e8c>] (take_cpu_down) from [<c00f2600>] (multi_cpu_stop+0x11c/0x158) + [<c00f2600>] (multi_cpu_stop) from [<c00f2a9c>] (cpu_stopper_thread+0xc4/0x184) + [<c00f2a9c>] (cpu_stopper_thread) from [<c0069058>] (smpboot_thread_fn+0x18c/0x324) + [<c0069058>] (smpboot_thread_fn) from [<c00649c4>] (kthread+0xe8/0x104) + [<c00649c4>] (kthread) from [<c0010058>] (ret_from_fork+0x14/0x3c) + CPU1: shutdown + PM: Calling sched_clock_suspend+0x0/0x40 + PM: Calling timekeeping_suspend+0x0/0x2e0 + PM: Calling irq_gc_suspend+0x0/0x68 + PM: Calling fw_suspend+0x0/0x2c + PM: Calling cpu_pm_suspend+0x0/0x28 + +Also, sometimes system stucks right after displaying "Disabling non-boot +CPUs ...". The root cause of above backtrace is task_lock() which takes +a sleeping lock on -RT. + +To fix the issue, move clear_tasks_mm_cpumask() call from __cpu_disable() +to __cpu_die() which is called on the thread which is asking for a target +CPU to be shutdown. In addition, this change restores CPUhotplug functionality +on TI OMAP dra7-evm and CPU1 can be unplugged/plugged many times. + +Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> +Cc: Steven Rostedt <rostedt@goodmis.org> +Cc: <linux-arm-kernel@lists.infradead.org> +Cc: Sekhar Nori <nsekhar@ti.com> +Cc: Austin Schuh <austin@peloton-tech.com> +Cc: <philipp@peloton-tech.com> +Cc: Russell King <linux@arm.linux.org.uk> +Cc: <bigeasy@linutronix.de> +Cc: stable-rt@vger.kernel.org +Link: http://lkml.kernel.org/r/1441995683-30817-1-git-send-email-grygorii.strashko@ti.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + +RFC: I'm not sure how safe this change is and will be appreciated for any comments. +Most of arches call clear_tasks_mm_cpumask() from __cpu_disable(), but *powerpc* +calls it from CPU_DEAD notifier. This patch follows powerpc's approach in +general. + +This issue was first reported in: + http://www.spinics.net/lists/linux-rt-users/msg13752.html + + arch/arm/kernel/smp.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +Index: linux-rt-devel/arch/arm/kernel/smp.c +=================================================================== +--- linux-rt-devel.orig/arch/arm/kernel/smp.c ++++ linux-rt-devel/arch/arm/kernel/smp.c +@@ -213,8 +213,6 @@ int __cpu_disable(void) + flush_cache_louis(); + local_flush_tlb_all(); + +- clear_tasks_mm_cpumask(cpu); +- + return 0; + } + +@@ -230,6 +228,9 @@ void __cpu_die(unsigned int cpu) + pr_err("CPU%u: cpu didn't die\n", cpu); + return; + } ++ ++ clear_tasks_mm_cpumask(cpu); ++ + pr_notice("CPU%u: shutdown\n", cpu); + + /* diff --git a/patches/series b/patches/series index bdbd1e8462b7f6..3c403bb145e7e3 100644 --- a/patches/series +++ b/patches/series @@ -5,6 +5,7 @@ ############################################################ # UPSTREAM changes queued ############################################################ +xfs--clean-up-inode-lockdep-annotations ############################################################ # UPSTREAM FIXES, patches pending @@ -34,11 +35,13 @@ mm-slub-move-slab-initialization-into-irq-enabled-region.patch # Stuff broken upstream, patches submitted ############################################################ cpufreq-Remove-cpufreq_rwsem.patch +genirq--Handle-interrupts-with-primary-and-threaded-handler-gracefully ############################################################ # Stuff which needs addressing upstream, but requires more # information ############################################################ +rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch ############################################################ # Stuff broken upstream, need to be sent @@ -155,6 +158,7 @@ futex-avoid-double-wake-up-in-PI-futex-wait-wake-on-.patch # TRACING latency-hist.patch +tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch # HW LATENCY DETECTOR - this really wants a rewrite hwlatdetect.patch @@ -466,6 +470,7 @@ sysfs-realtime-entry.patch power-disable-highmem-on-rt.patch mips-disable-highmem-on-rt.patch mm-rt-kmap-atomic-scheduling.patch +mm--rt--Fix-generic-kmap_atomic-for-RT x86-highmem-add-a-already-used-pte-check.patch arm-highmem-flush-tlb-on-unmap.patch arm-enable-highmem-for-rt.patch @@ -549,7 +554,6 @@ leds-trigger-disable-CPU-trigger-on-RT.patch # DRIVERS i2c-omap-drop-the-lock-hard-irq-context.patch mmci-remove-bogus-irq-save.patch -mmc-sdhci-don-t-provide-hard-irq-handler.patch cpufreq-drop-K8-s-driver-from-beeing-selected.patch # I915 diff --git a/patches/tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch b/patches/tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch new file mode 100644 index 00000000000000..054c1c904ec724 --- /dev/null +++ b/patches/tracing-fix-rcu-splat-from-idle-cpu-on-boot.patch @@ -0,0 +1,86 @@ +Subject: tracing: Fix rcu splat from idle CPU on boot +From: Philipp Schrader <philipp@peloton-tech.com> +Date: Thu, 3 Sep 2015 14:29:14 -0700 + +With PREEMPT_RT and most of the lockdep-related options enabled I +encountered this splat when booting our DRA7 evaluation module: + +[ 0.055073] +[ 0.055076] =============================== +[ 0.055079] [ INFO: suspicious RCU usage. ] +[ 0.055084] 4.1.6+ #2 Not tainted +[ 0.055086] ------------------------------- +[ 0.055090] include/trace/events/hist.h:31 suspicious +rcu_dereference_check() usage! +[ 0.055093] +[ 0.055093] other info that might help us debug this: +[ 0.055093] +[ 0.055097] +[ 0.055097] RCU used illegally from idle CPU! +[ 0.055097] rcu_scheduler_active = 1, debug_locks = 1 +[ 0.055100] RCU used illegally from extended quiescent state! +[ 0.055104] no locks held by swapper/0/0. +[ 0.055106] +[ 0.055106] stack backtrace: +[ 0.055112] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.6+ #2 +[ 0.055116] Hardware name: Generic DRA74X (Flattened Device Tree) +[ 0.055130] [<c00196b8>] (unwind_backtrace) from [<c001515c>] +(show_stack+0x20/0x24) +[ 0.055146] [<c001515c>] (show_stack) from [<c07bc408>] +(dump_stack+0x84/0xa0) +[ 0.055160] [<c07bc408>] (dump_stack) from [<c009bc38>] +(lockdep_rcu_suspicious+0xb0/0x110) +[ 0.055172] [<c009bc38>] (lockdep_rcu_suspicious) from [<c01246c4>] +(time_hardirqs_off+0x2b8/0x3c8) +[ 0.055184] [<c01246c4>] (time_hardirqs_off) from [<c009a218>] +(trace_hardirqs_off_caller+0x2c/0xf4) +[ 0.055194] [<c009a218>] (trace_hardirqs_off_caller) from +[<c009a2f4>] (trace_hardirqs_off+0x14/0x18) +[ 0.055204] [<c009a2f4>] (trace_hardirqs_off) from [<c00c7ecc>] +(rcu_idle_enter+0x78/0xcc) +[ 0.055213] [<c00c7ecc>] (rcu_idle_enter) from [<c0093eb0>] +(cpu_startup_entry+0x190/0x518) +[ 0.055222] [<c0093eb0>] (cpu_startup_entry) from [<c07b95b4>] +(rest_init+0x13c/0x17c) +[ 0.055231] [<c07b95b4>] (rest_init) from [<c0b32c74>] +(start_kernel+0x320/0x380) +[ 0.055238] [<c0b32c74>] (start_kernel) from [<8000807c>] (0x8000807c) + +As per Steve Rotstedt's suggestion I changed the trace_* calls to +trace_*_rcuidle calls. He pointed out that the trace points were getting +triggered when rcu wasn't watching. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Steven Rostedt <rostedt@goodmis.org> +Cc: stable-rt@vger.kernel.org +--- + kernel/trace/trace_irqsoff.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c +index aaade2e..d0e1d0e 100644 +--- a/kernel/trace/trace_irqsoff.c ++++ b/kernel/trace/trace_irqsoff.c +@@ -450,7 +450,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings); + #ifdef CONFIG_PROVE_LOCKING + void time_hardirqs_on(unsigned long a0, unsigned long a1) + { +- trace_preemptirqsoff_hist(IRQS_ON, 0); ++ trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0); + if (!preempt_trace() && irq_trace()) + stop_critical_timing(a0, a1); + } +@@ -459,7 +459,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) + { + if (!preempt_trace() && irq_trace()) + start_critical_timing(a0, a1); +- trace_preemptirqsoff_hist(IRQS_OFF, 1); ++ trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1); + } + + #else /* !CONFIG_PROVE_LOCKING */ +-- +2.1.4 + + + diff --git a/patches/xfs--clean-up-inode-lockdep-annotations b/patches/xfs--clean-up-inode-lockdep-annotations new file mode 100644 index 00000000000000..b11e5f69263467 --- /dev/null +++ b/patches/xfs--clean-up-inode-lockdep-annotations @@ -0,0 +1,285 @@ +Subject: xfs: clean up inode lockdep annotations +From: Dave Chinner <dchinner@redhat.com> +Date: Wed Aug 19 10:32:49 2015 +1000 + +Upstream commit 0952c8183c1575a78dc416b5e168987ff98728bb + +Lockdep annotations are a maintenance nightmare. Locking has to be +modified to suit the limitations of the annotations, and we're +always having to fix the annotations because they are unable to +express the complexity of locking heirarchies correctly. + +So, next up, we've got more issues with lockdep annotations for +inode locking w.r.t. XFS_LOCK_PARENT: + + - lockdep classes are exclusive and can't be ORed together + to form new classes. + - IOLOCK needs multiple PARENT subclasses to express the + changes needed for the readdir locking rework needed to + stop the endless flow of lockdep false positives involving + readdir calling filldir under the ILOCK. + - there are only 8 unique lockdep subclasses available, + so we can't create a generic solution. + +IOWs we need to treat the 3-bit space available to each lock type +differently: + + - IOLOCK uses xfs_lock_two_inodes(), so needs: + - at least 2 IOLOCK subclasses + - at least 2 IOLOCK_PARENT subclasses + - MMAPLOCK uses xfs_lock_two_inodes(), so needs: + - at least 2 MMAPLOCK subclasses + - ILOCK uses xfs_lock_inodes with up to 5 inodes, so needs: + - at least 5 ILOCK subclasses + - one ILOCK_PARENT subclass + - one RTBITMAP subclass + - one RTSUM subclass + +For the IOLOCK, split the space into two sets of subclasses. +For the MMAPLOCK, just use half the space for the one subclass to +match the non-parent lock classes of the IOLOCK. +For the ILOCK, use 0-4 as the ILOCK subclasses, 5-7 for the +remaining individual subclasses. + +Because they are now all different, modify xfs_lock_inumorder() to +handle the nested subclasses, and to assert fail if passed an +invalid subclass. Further, annotate xfs_lock_inodes() to assert fail +if an invalid combination of lock primitives and inode counts are +passed that would result in a lockdep subclass annotation overflow. + +Signed-off-by: Dave Chinner <dchinner@redhat.com> +Reviewed-by: Brian Foster <bfoster@redhat.com> +Signed-off-by: Dave Chinner <david@fromorbit.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + fs/xfs/xfs_inode.c | 68 ++++++++++++++++++++++++++++++++++----------- + fs/xfs/xfs_inode.h | 79 +++++++++++++++++++++++++++++++++++++---------------- + 2 files changed, 107 insertions(+), 40 deletions(-) + +Index: linux-rt-devel/fs/xfs/xfs_inode.c +=================================================================== +--- linux-rt-devel.orig/fs/xfs/xfs_inode.c ++++ linux-rt-devel/fs/xfs/xfs_inode.c +@@ -164,7 +164,7 @@ xfs_ilock( + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); + ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != + (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); +- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); ++ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); + + if (lock_flags & XFS_IOLOCK_EXCL) + mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); +@@ -212,7 +212,7 @@ xfs_ilock_nowait( + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); + ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != + (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); +- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); ++ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); + + if (lock_flags & XFS_IOLOCK_EXCL) { + if (!mrtryupdate(&ip->i_iolock)) +@@ -281,7 +281,7 @@ xfs_iunlock( + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); + ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != + (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); +- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); ++ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); + ASSERT(lock_flags != 0); + + if (lock_flags & XFS_IOLOCK_EXCL) +@@ -364,30 +364,38 @@ int xfs_lock_delays; + + /* + * Bump the subclass so xfs_lock_inodes() acquires each lock with a different +- * value. This shouldn't be called for page fault locking, but we also need to +- * ensure we don't overrun the number of lockdep subclasses for the iolock or +- * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. ++ * value. This can be called for any type of inode lock combination, including ++ * parent locking. Care must be taken to ensure we don't overrun the subclass ++ * storage fields in the class mask we build. + */ + static inline int + xfs_lock_inumorder(int lock_mode, int subclass) + { ++ int class = 0; ++ ++ ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP | ++ XFS_ILOCK_RTSUM))); ++ + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { +- ASSERT(subclass + XFS_LOCK_INUMORDER < +- (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); +- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; ++ ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); ++ ASSERT(subclass + XFS_IOLOCK_PARENT_VAL < ++ MAX_LOCKDEP_SUBCLASSES); ++ class += subclass << XFS_IOLOCK_SHIFT; ++ if (lock_mode & XFS_IOLOCK_PARENT) ++ class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT; + } + + if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { +- ASSERT(subclass + XFS_LOCK_INUMORDER < +- (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); +- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << +- XFS_MMAPLOCK_SHIFT; ++ ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS); ++ class += subclass << XFS_MMAPLOCK_SHIFT; + } + +- if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) +- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; ++ if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) { ++ ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS); ++ class += subclass << XFS_ILOCK_SHIFT; ++ } + +- return lock_mode; ++ return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class; + } + + /* +@@ -399,6 +407,11 @@ xfs_lock_inumorder(int lock_mode, int su + * transaction (such as truncate). This can result in deadlock since the long + * running trans might need to wait for the inode we just locked in order to + * push the tail and free space in the log. ++ * ++ * xfs_lock_inodes() can only be used to lock one type of lock at a time - ++ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we ++ * lock more than one at a time, lockdep will report false positives saying we ++ * have violated locking orders. + */ + void + xfs_lock_inodes( +@@ -409,8 +422,29 @@ xfs_lock_inodes( + int attempts = 0, i, j, try_lock; + xfs_log_item_t *lp; + +- /* currently supports between 2 and 5 inodes */ ++ /* ++ * Currently supports between 2 and 5 inodes with exclusive locking. We ++ * support an arbitrary depth of locking here, but absolute limits on ++ * inodes depend on the the type of locking and the limits placed by ++ * lockdep annotations in xfs_lock_inumorder. These are all checked by ++ * the asserts. ++ */ + ASSERT(ips && inodes >= 2 && inodes <= 5); ++ ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL | ++ XFS_ILOCK_EXCL)); ++ ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | ++ XFS_ILOCK_SHARED))); ++ ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) || ++ inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1); ++ ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || ++ inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); ++ ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || ++ inodes <= XFS_ILOCK_MAX_SUBCLASS + 1); ++ ++ if (lock_mode & XFS_IOLOCK_EXCL) { ++ ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL))); ++ } else if (lock_mode & XFS_MMAPLOCK_EXCL) ++ ASSERT(!(lock_mode & XFS_ILOCK_EXCL)); + + try_lock = 0; + i = 0; +Index: linux-rt-devel/fs/xfs/xfs_inode.h +=================================================================== +--- linux-rt-devel.orig/fs/xfs/xfs_inode.h ++++ linux-rt-devel/fs/xfs/xfs_inode.h +@@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct + * Flags for lockdep annotations. + * + * XFS_LOCK_PARENT - for directory operations that require locking a +- * parent directory inode and a child entry inode. The parent gets locked +- * with this flag so it gets a lockdep subclass of 1 and the child entry +- * lock will have a lockdep subclass of 0. ++ * parent directory inode and a child entry inode. IOLOCK requires nesting, ++ * MMAPLOCK does not support this class, ILOCK requires a single subclass ++ * to differentiate parent from child. + * + * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary + * inodes do not participate in the normal lock order, and thus have their +@@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct + * XFS_LOCK_INUMORDER - for locking several inodes at the some time + * with xfs_lock_inodes(). This flag is used as the starting subclass + * and each subsequent lock acquired will increment the subclass by one. +- * So the first lock acquired will have a lockdep subclass of 4, the +- * second lock will have a lockdep subclass of 5, and so on. It is +- * the responsibility of the class builder to shift this to the correct +- * portion of the lock_mode lockdep mask. ++ * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly ++ * limited to the subclasses we can represent via nesting. We need at least ++ * 5 inodes nest depth for the ILOCK through rename, and we also have to support ++ * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP ++ * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all ++ * 8 subclasses supported by lockdep. ++ * ++ * This also means we have to number the sub-classes in the lowest bits of ++ * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep ++ * mask and we can't use bit-masking to build the subclasses. What a mess. ++ * ++ * Bit layout: ++ * ++ * Bit Lock Region ++ * 16-19 XFS_IOLOCK_SHIFT dependencies ++ * 20-23 XFS_MMAPLOCK_SHIFT dependencies ++ * 24-31 XFS_ILOCK_SHIFT dependencies ++ * ++ * IOLOCK values ++ * ++ * 0-3 subclass value ++ * 4-7 PARENT subclass values ++ * ++ * MMAPLOCK values ++ * ++ * 0-3 subclass value ++ * 4-7 unused ++ * ++ * ILOCK values ++ * 0-4 subclass values ++ * 5 PARENT subclass (not nestable) ++ * 6 RTBITMAP subclass (not nestable) ++ * 7 RTSUM subclass (not nestable) ++ * + */ +-#define XFS_LOCK_PARENT 1 +-#define XFS_LOCK_RTBITMAP 2 +-#define XFS_LOCK_RTSUM 3 +-#define XFS_LOCK_INUMORDER 4 +- +-#define XFS_IOLOCK_SHIFT 16 +-#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) ++#define XFS_IOLOCK_SHIFT 16 ++#define XFS_IOLOCK_PARENT_VAL 4 ++#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1) ++#define XFS_IOLOCK_DEP_MASK 0x000f0000 ++#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT) + +-#define XFS_MMAPLOCK_SHIFT 20 ++#define XFS_MMAPLOCK_SHIFT 20 ++#define XFS_MMAPLOCK_NUMORDER 0 ++#define XFS_MMAPLOCK_MAX_SUBCLASS 3 ++#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 + +-#define XFS_ILOCK_SHIFT 24 +-#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) +-#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) +-#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) ++#define XFS_ILOCK_SHIFT 24 ++#define XFS_ILOCK_PARENT_VAL 5 ++#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1) ++#define XFS_ILOCK_RTBITMAP_VAL 6 ++#define XFS_ILOCK_RTSUM_VAL 7 ++#define XFS_ILOCK_DEP_MASK 0xff000000 ++#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT) ++#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT) ++#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT) + +-#define XFS_IOLOCK_DEP_MASK 0x000f0000 +-#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 +-#define XFS_ILOCK_DEP_MASK 0xff000000 +-#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ ++#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \ + XFS_MMAPLOCK_DEP_MASK | \ + XFS_ILOCK_DEP_MASK) + |