summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Gortmaker <paul.gortmaker@windriver.com>2013-04-05 18:50:13 -0400
committerPaul Gortmaker <paul.gortmaker@windriver.com>2013-04-05 18:50:13 -0400
commit2ea0c1b15819d8a162c1a52d93d69776ea1e5265 (patch)
treefba07164bf90b36aa5fc51848e9087aac423df9a
parentd7d35d919088ba501f1c8d00fdc5ba2485383366 (diff)
download3.8-rt-patches-2ea0c1b15819d8a162c1a52d93d69776ea1e5265.tar.gz
patches-3.8.4-rt1.tar.xzv3.8.4-rt1
md5sum: fb2132c1466f1e2c3fb35a57d512a305 patches-3.8.4-rt1.tar.xz Announce: ----------------------------- Dear RT Folks, I'm pleased to announce the 3.8.4-rt1 release. Again the credit for the heavy lifting goes to Sebastian Siewior, AKA bigeasy, who took up most of the work to get this out. He's on my companies engineering team and I hope you trust him as much as I do. Known issues: - SLUB behaves worse than SLAB on ARM - SLAB is broken on PowerPC Still we think that it's time to get out the stuff for broader testing. It's -rt1 and we need your help to get this stabilized. The RT patch against 3.8.4 can be found here: http://www.kernel.org/pub/linux/kernel/projects/rt/3.8/patch-3.8.4-rt1.patch.xz The split quilt queue is available at: http://www.kernel.org/pub/linux/kernel/projects/rt/3.8/patches-3.8.4-rt1.tar.xz Enjoy, tglx ----------------------------- http://marc.info/?l=linux-kernel&m=136399022624459&w=2 Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
-rw-r--r--patches/0001-kernel-srcu-merge-common-code-into-a-macro.patch36
-rw-r--r--patches/0001-of-fix-recursive-locking-in-of_get_next_available_ch.patch83
-rw-r--r--patches/0002-kernel-SRCU-provide-a-static-initializer.patch100
-rw-r--r--patches/0002-x86-highmem-add-a-already-used-pte-check.patch23
-rw-r--r--patches/0003-arm-highmem-flush-tlb-on-unmap.patch28
-rw-r--r--patches/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch44
-rw-r--r--patches/HACK-printk-drop-the-logbuf_lock-more-often.patch77
-rw-r--r--patches/acpi-use-local-irq-nort.patch25
-rw-r--r--patches/arch-use-pagefault-disabled.patch286
-rw-r--r--patches/arm-allow-irq-threading.patch22
-rw-r--r--patches/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch54
-rw-r--r--patches/arm-at91-tclib-default-to-tclib-timer-for-rt.patch32
-rw-r--r--patches/arm-convert-boot-lock-to-raw.patch279
-rw-r--r--patches/arm-disable-highmem-on-rt.patch20
-rw-r--r--patches/arm-enable-highmem-for-rt.patch140
-rw-r--r--patches/arm-mark-pmu-interupt-no-thread.patch23
-rw-r--r--patches/arm-omap-make-wakeupgen_lock-raw.patch62
-rw-r--r--patches/arm-preempt-lazy-support.patch103
-rw-r--r--patches/ata-disable-interrupts-if-non-rt.patch64
-rw-r--r--patches/block-shorten-interrupt-disabled-regions.patch97
-rw-r--r--patches/block-use-cpu-chill.patch45
-rw-r--r--patches/bug-rt-dependend-variants.patch34
-rw-r--r--patches/clocksource-tclib-allow-higher-clockrates.patch159
-rw-r--r--patches/completion-use-simple-wait-queues.patch155
-rw-r--r--patches/cond-resched-lock-rt-tweak.patch20
-rw-r--r--patches/cond-resched-softirq-rt.patch47
-rw-r--r--patches/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch125
-rw-r--r--patches/cpu-rt-rework-cpu-down.patch548
-rw-r--r--patches/cpu-rt-variants.patch26
-rw-r--r--patches/cpumask-disable-offstack-on-rt.patch34
-rw-r--r--patches/debugobjects-rt.patch23
-rw-r--r--patches/dm-make-rt-aware.patch34
-rw-r--r--patches/drivers-net-8139-disable-irq-nosync.patch25
-rw-r--r--patches/drivers-net-fix-livelock-issues.patch126
-rw-r--r--patches/drivers-net-gianfar-make-rt-aware.patch55
-rw-r--r--patches/drivers-net-tulip-add-missing-pci-disable.patch23
-rw-r--r--patches/drivers-net-vortex-fix-locking-issues.patch48
-rw-r--r--patches/drivers-random-reduce-preempt-disabled-region.patch39
-rw-r--r--patches/drivers-serial-call-flush_to_ldisc-when-the-irq-is-t.patch28
-rw-r--r--patches/drivers-serial-cleanup-locking-for-rt.patch42
-rw-r--r--patches/drivers-tty-fix-omap-lock-crap.patch38
-rw-r--r--patches/drivers-tty-pl011-irq-disable-madness.patch44
-rw-r--r--patches/early-printk-consolidate.patch485
-rw-r--r--patches/epoll-use-get-cpu-light.patch26
-rw-r--r--patches/filemap-fix-up.patch22
-rw-r--r--patches/fix-1-2-slub-do-not-dereference-null-pointer-in-node_match.patch34
-rw-r--r--patches/fix-2-2-slub-tid-must-be-retrieved-from-the-percpu-area-of-the-current-processor.patch65
-rw-r--r--patches/fix-rq-3elock-vs-logbuf_lock-unlock-race.patch34
-rw-r--r--patches/fix-rt-int3-x86_32-3.2-rt.patch112
-rw-r--r--patches/fs-block-rt-support.patch40
-rw-r--r--patches/fs-dcache-use-cpu-chill-in-trylock-loops.patch94
-rw-r--r--patches/fs-jbd-pull-plug-when-waiting-for-space.patch29
-rw-r--r--patches/fs-jbd-replace-bh_state-lock.patch100
-rw-r--r--patches/fs-namespace-preemption-fix.patch30
-rw-r--r--patches/fs-ntfs-disable-interrupt-non-rt.patch59
-rw-r--r--patches/fs-replace-bh_uptodate_lock-for-rt.patch161
-rw-r--r--patches/ftrace-migrate-disable-tracing.patch73
-rw-r--r--patches/futex-requeue-pi-fix.patch114
-rw-r--r--patches/generic-cmpxchg-use-raw-local-irq.patch47
-rw-r--r--patches/genirq-add-default-mask-cmdline-option.patch66
-rw-r--r--patches/genirq-disable-irqpoll-on-rt.patch39
-rw-r--r--patches/genirq-force-threading.patch46
-rw-r--r--patches/genirq-nodebug-shirq.patch20
-rw-r--r--patches/harirq-h-define-softirq_count-as-oul-to-kill-build-warning.patch35
-rw-r--r--patches/hotplug-call-cpu_unplug_begin-a-little-early.patch59
-rw-r--r--patches/hotplug-light-get-online-cpus.patch208
-rw-r--r--patches/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch24
-rw-r--r--patches/hotplug-use-migrate-disable.patch36
-rw-r--r--patches/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch461
-rw-r--r--patches/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch37
-rw-r--r--patches/hrtimers-prepare-full-preemption.patch195
-rw-r--r--patches/hwlatdetect.patch1344
-rw-r--r--patches/i2c-omap-drop-the-lock-hard-irq-context.patch34
-rw-r--r--patches/ide-use-nort-local-irq-variants.patch169
-rw-r--r--patches/idle-state.patch19
-rw-r--r--patches/infiniband-mellanox-ib-use-nort-irq.patch40
-rw-r--r--patches/inpt-gameport-use-local-irq-nort.patch44
-rw-r--r--patches/intel_idle-convert-i7300_idle_lock-to-raw-spinlock.patch67
-rw-r--r--patches/ipc-make-rt-aware.patch85
-rw-r--r--patches/ipc-mqueue-add-a-critical-section-to-avoid-a-deadlock.patch64
-rw-r--r--patches/ipc-sem-rework-semaphore-wakeups.patch73
-rw-r--r--patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch144
-rw-r--r--patches/jump-label-rt.patch21
-rw-r--r--patches/kconfig-disable-a-few-options-rt.patch44
-rw-r--r--patches/kconfig-preempt-rt-full.patch56
-rw-r--r--patches/kgb-serial-hackaround.patch102
-rw-r--r--patches/latency-hist.patch1804
-rw-r--r--patches/lglocks-rt.patch173
-rw-r--r--patches/list-add-list-last-entry.patch29
-rw-r--r--patches/local-irq-rt-depending-variants.patch52
-rw-r--r--patches/local-var.patch23
-rw-r--r--patches/local-vars-migrate-disable.patch46
-rw-r--r--patches/localversion.patch15
-rw-r--r--patches/lockdep-no-softirq-accounting-on-rt.patch56
-rw-r--r--patches/lockdep-selftest-convert-spinlock-to-raw-spinlock.patch90
-rw-r--r--patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch56
-rw-r--r--patches/locking-various-init-fixes.patch74
-rw-r--r--patches/md-raid5-percpu-handling-rt-aware.patch61
-rw-r--r--patches/might-sleep-check-for-idle.patch23
-rw-r--r--patches/migrate-disable-rt-variant.patch27
-rw-r--r--patches/mips-disable-highmem-on-rt.patch20
-rw-r--r--patches/mips-enable-interrupts-in-signal.patch19
-rw-r--r--patches/mm-allow-slab-rt.patch29
-rw-r--r--patches/mm-bounce-local-irq-save-nort.patch27
-rw-r--r--patches/mm-cgroup-page-bit-spinlock.patch91
-rw-r--r--patches/mm-convert-swap-to-percpu-locked.patch113
-rw-r--r--patches/mm-enable-slub.patch402
-rw-r--r--patches/mm-make-vmstat-rt-aware.patch84
-rw-r--r--patches/mm-page-alloc-fix.patch22
-rw-r--r--patches/mm-page-alloc-use-list-last-entry.patch20
-rw-r--r--patches/mm-page-alloc-use-local-lock-on-target-cpu.patch55
-rw-r--r--patches/mm-page_alloc-reduce-lock-sections-further.patch219
-rw-r--r--patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch213
-rw-r--r--patches/mm-prepare-pf-disable-discoupling.patch118
-rw-r--r--patches/mm-protect-activate-switch-mm.patch69
-rw-r--r--patches/mm-remove-preempt-count-from-pf.patch34
-rw-r--r--patches/mm-rt-kmap-atomic-scheduling.patch274
-rw-r--r--patches/mm-scatterlist-dont-disable-irqs-on-RT.patch38
-rw-r--r--patches/mm-shrink-the-page-frame-to-rt-size.patch140
-rw-r--r--patches/mm-slab-more-lock-breaks.patch229
-rw-r--r--patches/mm-slab-move-debug-out.patch37
-rw-r--r--patches/mm-slab-wrap-functions.patch458
-rw-r--r--patches/mm-vmalloc-use-get-cpu-light.patch64
-rw-r--r--patches/mmci-remove-bogus-irq-save.patch39
-rw-r--r--patches/mutex-no-spin-on-rt.patch17
-rw-r--r--patches/net-another-local-irq-disable-alloc-atomic-headache.patch47
-rw-r--r--patches/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch94
-rw-r--r--patches/net-flip-lock-dep-thingy.patch111
-rw-r--r--patches/net-make-devnet_rename_seq-a-mutex.patch150
-rw-r--r--patches/net-netif-rx-ni-use-local-bh-disable.patch31
-rw-r--r--patches/net-netif_rx_ni-migrate-disable.patch25
-rw-r--r--patches/net-tx-action-avoid-livelock-on-rt.patch92
-rw-r--r--patches/net-use-cpu-chill.patch62
-rw-r--r--patches/net-use-cpu-light-in-ip-send-unicast-reply.patch30
-rw-r--r--patches/net-wireless-warn-nort.patch20
-rw-r--r--patches/ntp-make-ntp-lock-raw-sigh.patch125
-rw-r--r--patches/of-convert-devtree-lock-from-rw_lock-to-raw-spinlock.patch449
-rw-r--r--patches/of-fixup-resursive-locking-code-paths.patch209
-rw-r--r--patches/oleg-signal-rt-fix.patch142
-rw-r--r--patches/panic-disable-random-on-rt.patch21
-rw-r--r--patches/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch173
-rw-r--r--patches/pci-access-use-__wake_up_all_locked.patch25
-rw-r--r--patches/perf-make-swevent-hrtimer-irqsafe.patch68
-rw-r--r--patches/perf-move-irq-work-to-softirq-in-rt.patch61
-rw-r--r--patches/peter_zijlstra-frob-migrate_disable-2.patch174
-rw-r--r--patches/peter_zijlstra-frob-migrate_disable.patch67
-rw-r--r--patches/peter_zijlstra-frob-pagefault_disable.patch342
-rw-r--r--patches/peter_zijlstra-frob-rcu.patch166
-rw-r--r--patches/peterz-raw_pagefault_disable.patch147
-rw-r--r--patches/peterz-srcu-crypto-chain.patch182
-rw-r--r--patches/pid-h-include-atomic-h.patch19
-rw-r--r--patches/ping-sysrq.patch121
-rw-r--r--patches/posix-timers-avoid-wakeups-when-no-timers-are-active.patch57
-rw-r--r--patches/posix-timers-no-broadcast.patch33
-rw-r--r--patches/posix-timers-shorten-cpu-timers-thread.patch26
-rw-r--r--patches/posix-timers-thread-posix-cpu-timers-on-rt.patch303
-rw-r--r--patches/power-disable-highmem-on-rt.patch20
-rw-r--r--patches/power-use-generic-rwsem-on-rt.patch23
-rw-r--r--patches/powerpc-fsl-msi-use-a-different-locklcass-for-the-ca.patch35
-rw-r--r--patches/powerpc-preempt-lazy-support.patch166
-rw-r--r--patches/ppc-mark-low-level-handlers-no-thread.patch35
-rw-r--r--patches/preempt-lazy-support.patch581
-rw-r--r--patches/preempt-nort-rt-variants.patch52
-rw-r--r--patches/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch35
-rw-r--r--patches/printk-kill.patch115
-rw-r--r--patches/printk-rt-aware.patch101
-rw-r--r--patches/radix-tree-rt-aware.patch68
-rw-r--r--patches/random-make-it-work-on-rt.patch114
-rw-r--r--patches/rcu-disable-rcu-fast-no-hz-on-rt.patch24
-rw-r--r--patches/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch258
-rw-r--r--patches/rcu-tiny-merge-bh.patch24
-rw-r--r--patches/rcu-tiny-solve-rt-mistery.patch42
-rw-r--r--patches/rcutiny-use-simple-waitqueue.patch79
-rw-r--r--patches/re-migrate_disable-race-with-cpu-hotplug-3f.patch34
-rw-r--r--patches/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch69
-rw-r--r--patches/relay-fix-timer-madness.patch52
-rw-r--r--patches/resource-counters-use-localirq-nort.patch86
-rw-r--r--patches/rfc-printk-don-27t-call-printk_tick-in-printk_needs_cpu.patch45
-rw-r--r--patches/rfc-sched-rt-fix-wait_task_interactive-to-test-rt_spin_lock-state.patch46
-rw-r--r--patches/rt-add-rt-locks.patch894
-rw-r--r--patches/rt-add-rt-spinlock-to-headers.patch118
-rw-r--r--patches/rt-add-rt-to-mutex-headers.patch141
-rw-r--r--patches/rt-introduce-cpu-chill.patch28
-rw-r--r--patches/rt-local-irq-lock.patch250
-rw-r--r--patches/rt-mutex-add-sleeping-spinlocks-support.patch619
-rw-r--r--patches/rt-preempt-base-config.patch49
-rw-r--r--patches/rt-rw-lockdep-annotations.patch133
-rw-r--r--patches/rt-sched-do-not-compare-cpu-masks-in-scheduler.patch38
-rw-r--r--patches/rt-sched-have-migrate_disable-ignore-bounded-threads.patch68
-rw-r--r--patches/rt-sched-postpone-actual-migration-disalbe-to-schedule.patch305
-rw-r--r--patches/rt-serial-warn-fix.patch37
-rw-r--r--patches/rt-tracing-show-padding-as-unsigned-short.patch46
-rw-r--r--patches/rtmutex-avoid-include-hell.patch20
-rw-r--r--patches/rtmutex-futex-prepare-rt.patch215
-rw-r--r--patches/rtmutex-lock-killable.patch80
-rw-r--r--patches/rwsem-add-rt-variant.patch170
-rw-r--r--patches/sched-adjust-reset-on-fork-always.patch31
-rw-r--r--patches/sched-better-debug-output-for-might-sleep.patch71
-rw-r--r--patches/sched-clear-pf-thread-bound-on-fallback-rq.patch24
-rw-r--r--patches/sched-cond-resched.patch32
-rw-r--r--patches/sched-consider-pi-boosting-in-setscheduler.patch161
-rw-r--r--patches/sched-delay-put-task.patch78
-rw-r--r--patches/sched-disable-rt-group-sched-on-rt.patch28
-rw-r--r--patches/sched-disable-ttwu-queue.patch27
-rw-r--r--patches/sched-enqueue-to-head.patch68
-rw-r--r--patches/sched-limit-nr-migrate.patch23
-rw-r--r--patches/sched-might-sleep-do-not-account-rcu-depth.patch45
-rw-r--r--patches/sched-migrate-disable.patch192
-rw-r--r--patches/sched-mmdrop-delayed.patch134
-rw-r--r--patches/sched-rt-fix-migrate_enable-thinko.patch63
-rw-r--r--patches/sched-rt-mutex-wakeup.patch84
-rw-r--r--patches/sched-teach-migrate_disable-about-atomic-contexts.patch88
-rw-r--r--patches/sched-ttwu-ensure-success-return-is-correct.patch34
-rw-r--r--patches/scsi-fcoe-rt-aware.patch111
-rw-r--r--patches/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch47
-rw-r--r--patches/seqlock-prevent-rt-starvation.patch185
-rw-r--r--patches/seqlock-remove-unused-functions.patch44
-rw-r--r--patches/seqlock-use-seqcount.patch218
-rw-r--r--patches/series603
-rw-r--r--patches/signal-fix-up-rcu-wreckage.patch35
-rw-r--r--patches/signal-revert-ptrace-preempt-magic.patch27
-rw-r--r--patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch205
-rw-r--r--patches/skbufhead-raw-lock.patch127
-rw-r--r--patches/slub-enable-irqs-for-no-wait.patch46
-rw-r--r--patches/softirq-adapt-nohz-pending-debug-code-to-new-scheme.patch174
-rw-r--r--patches/softirq-disable-softirq-stacks-for-rt.patch173
-rw-r--r--patches/softirq-export-in-serving-softirq.patch28
-rw-r--r--patches/softirq-init-softirq-local-lock-after-per-cpu-section-is-set-up.patch133
-rw-r--r--patches/softirq-local-lock.patch315
-rw-r--r--patches/softirq-make-fifo.patch50
-rw-r--r--patches/softirq-make-serving-softirqs-a-task-flag.patch74
-rw-r--r--patches/softirq-preempt-fix-3-re.patch145
-rw-r--r--patches/softirq-sanitize-softirq-pending.patch113
-rw-r--r--patches/softirq-split-handling-function.patch68
-rw-r--r--patches/softirq-split-locks.patch452
-rw-r--r--patches/softirq-split-out-code.patch101
-rw-r--r--patches/softirq-thread-do-softirq.patch31
-rw-r--r--patches/spi-omap-mcspi-check-condition-also-after-timeout.patch33
-rw-r--r--patches/spinlock-types-separate-raw.patch204
-rw-r--r--patches/stomp-machine-deal-clever-with-stopper-lock.patch58
-rw-r--r--patches/stomp-machine-mark-stomper-thread.patch30
-rw-r--r--patches/stomp-machine-raw-lock.patch174
-rw-r--r--patches/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch62
-rw-r--r--patches/suspend-prevernt-might-sleep-splats.patch106
-rw-r--r--patches/sysctl-include-atomic-h.patch19
-rw-r--r--patches/sysfs-realtime-entry.patch47
-rw-r--r--patches/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch402
-rw-r--r--patches/tasklist-lock-fix-section-conflict.patch57
-rw-r--r--patches/timekeeping-delay-clock-cycle-last-update.patch33
-rw-r--r--patches/timekeeping-do-not-calc-crap-over-and-over.patch34
-rw-r--r--patches/timekeeping-implement-shadow-timekeeper.patch156
-rw-r--r--patches/timekeeping-make-jiffies-lock-internal.patch44
-rw-r--r--patches/timekeeping-move-lock-out-of-timekeeper.patch402
-rw-r--r--patches/timekeeping-shorten-seq-count-region.patch44
-rw-r--r--patches/timekeeping-split-jiffies-lock.patch148
-rw-r--r--patches/timekeeping-split-timekeeper-lock.patch427
-rw-r--r--patches/timekeeping-store-cycle-last-in-timekeeper.patch47
-rw-r--r--patches/timer-delay-waking-softirqs-from-the-jiffy-tick.patch75
-rw-r--r--patches/timer-fd-avoid-live-lock.patch27
-rw-r--r--patches/timer-handle-idle-trylock-in-get-next-timer-irq.patch78
-rw-r--r--patches/timers-avoid-the-base-null-otptimization-on-rt.patch68
-rw-r--r--patches/timers-mov-printk_tick-to-soft-interrupt.patch29
-rw-r--r--patches/timers-preempt-rt-support.patch56
-rw-r--r--patches/timers-prepare-for-full-preemption.patch128
-rw-r--r--patches/tracing-account-for-preempt-off-in-preempt_schedule.patch46
-rw-r--r--patches/treercu-use-simple-waitqueue.patch73
-rw-r--r--patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch65
-rw-r--r--patches/usb-fix-mouse-problem-copying-large-data.patch36
-rw-r--r--patches/usb-hcd-use-local-irq-nort.patch34
-rw-r--r--patches/user-use-local-irq-nort.patch29
-rw-r--r--patches/wait-simple-implementation.patch337
-rw-r--r--patches/wait-simple-rework-for-completions.patch209
-rw-r--r--patches/workqueue-use-locallock.patch130
-rw-r--r--patches/x86-crypto-reduce-preempt-disabled-regions.patch112
-rw-r--r--patches/x86-disable-debug-stack.patch102
-rw-r--r--patches/x86-hpet-disable-msi-on-lenovo-w510.patch64
-rw-r--r--patches/x86-io-apic-migra-no-unmask.patch26
-rw-r--r--patches/x86-kvm-require-const-tsc-for-rt.patch25
-rw-r--r--patches/x86-mce-timer-hrtimer.patch176
-rw-r--r--patches/x86-perf-uncore-deal-with-kfree.patch68
-rw-r--r--patches/x86-preempt-lazy.patch177
-rw-r--r--patches/x86-stackprot-no-random-on-rt.patch47
-rw-r--r--patches/x86-use-gen-rwsem-spinlocks-rt.patch28
283 files changed, 32080 insertions, 0 deletions
diff --git a/patches/0001-kernel-srcu-merge-common-code-into-a-macro.patch b/patches/0001-kernel-srcu-merge-common-code-into-a-macro.patch
new file mode 100644
index 0000000..cb44c1a
--- /dev/null
+++ b/patches/0001-kernel-srcu-merge-common-code-into-a-macro.patch
@@ -0,0 +1,36 @@
+From db28051c97688cfceaa9a2cea0202af74bb64fdc Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 19 Mar 2013 14:41:04 +0100
+Subject: [PATCH 1/2] kernel/srcu: merge common code into a macro
+
+DEFINE_SRCU() and DEFINE_STATIC_SRCU() does the same thing except for
+the "static" attribute. This patch moves the common pieces into
+_DEFINE_SRCU() which is used by the the former macros either adding the
+static attribute or not.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/srcu.h | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/include/linux/srcu.h
++++ b/include/linux/srcu.h
+@@ -102,13 +102,13 @@ void process_srcu(struct work_struct *wo
+ * define and init a srcu struct at build time.
+ * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ */
+-#define DEFINE_SRCU(name) \
++#define _DEFINE_SRCU(name, mod) \
+ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+- struct srcu_struct name = __SRCU_STRUCT_INIT(name);
++ mod struct srcu_struct name = \
++ __SRCU_STRUCT_INIT(name);
+
+-#define DEFINE_STATIC_SRCU(name) \
+- static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+- static struct srcu_struct name = __SRCU_STRUCT_INIT(name);
++#define DEFINE_SRCU(name) _DEFINE_SRCU(name, )
++#define DEFINE_STATIC_SRCU(name) _DEFINE_SRCU(name, static)
+
+ /**
+ * call_srcu() - Queue a callback for invocation after an SRCU grace period
diff --git a/patches/0001-of-fix-recursive-locking-in-of_get_next_available_ch.patch b/patches/0001-of-fix-recursive-locking-in-of_get_next_available_ch.patch
new file mode 100644
index 0000000..4a1597d
--- /dev/null
+++ b/patches/0001-of-fix-recursive-locking-in-of_get_next_available_ch.patch
@@ -0,0 +1,83 @@
+From c31a0c052205e3ec24efc3fe18ef70c3e913f2d4 Mon Sep 17 00:00:00 2001
+From: Stephen Warren <swarren@nvidia.com>
+Date: Mon, 11 Feb 2013 14:15:32 -0700
+Subject: [PATCH] of: fix recursive locking in of_get_next_available_child()
+
+of_get_next_available_child() acquires devtree_lock, then calls
+of_device_is_available() which calls of_get_property() which calls
+of_find_property() which tries to re-acquire devtree_lock, thus causing
+deadlock.
+
+To avoid this, create a new __of_device_is_available() which calls
+__of_get_property() instead, which calls __of_find_property(), which
+does not take the lock,. Update of_get_next_available_child() to call
+the new __of_device_is_available() since it already owns the lock.
+
+Signed-off-by: Stephen Warren <swarren@nvidia.com>
+Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
+---
+ drivers/of/base.c | 30 +++++++++++++++++++++++++-----
+ 1 file changed, 25 insertions(+), 5 deletions(-)
+
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -307,19 +307,19 @@ int of_machine_is_compatible(const char
+ EXPORT_SYMBOL(of_machine_is_compatible);
+
+ /**
+- * of_device_is_available - check if a device is available for use
++ * __of_device_is_available - check if a device is available for use
+ *
+- * @device: Node to check for availability
++ * @device: Node to check for availability, with locks already held
+ *
+ * Returns 1 if the status property is absent or set to "okay" or "ok",
+ * 0 otherwise
+ */
+-int of_device_is_available(const struct device_node *device)
++static int __of_device_is_available(const struct device_node *device)
+ {
+ const char *status;
+ int statlen;
+
+- status = of_get_property(device, "status", &statlen);
++ status = __of_get_property(device, "status", &statlen);
+ if (status == NULL)
+ return 1;
+
+@@ -330,6 +330,26 @@ int of_device_is_available(const struct
+
+ return 0;
+ }
++
++/**
++ * of_device_is_available - check if a device is available for use
++ *
++ * @device: Node to check for availability
++ *
++ * Returns 1 if the status property is absent or set to "okay" or "ok",
++ * 0 otherwise
++ */
++int of_device_is_available(const struct device_node *device)
++{
++ unsigned long flags;
++ int res;
++
++ raw_spin_lock_irqsave(&devtree_lock, flags);
++ res = __of_device_is_available(device);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
++ return res;
++
++}
+ EXPORT_SYMBOL(of_device_is_available);
+
+ /**
+@@ -421,7 +441,7 @@ struct device_node *of_get_next_availabl
+ raw_spin_lock(&devtree_lock);
+ next = prev ? prev->sibling : node->child;
+ for (; next; next = next->sibling) {
+- if (!of_device_is_available(next))
++ if (!__of_device_is_available(next))
+ continue;
+ if (of_node_get(next))
+ break;
diff --git a/patches/0002-kernel-SRCU-provide-a-static-initializer.patch b/patches/0002-kernel-SRCU-provide-a-static-initializer.patch
new file mode 100644
index 0000000..3914991
--- /dev/null
+++ b/patches/0002-kernel-SRCU-provide-a-static-initializer.patch
@@ -0,0 +1,100 @@
+From 3f09905a6a65ed4fcf8e664abf044c91b2ce7b27 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 19 Mar 2013 14:44:30 +0100
+Subject: [PATCH 2/2] kernel/SRCU: provide a static initializer
+
+There are macros for static initializer for the three out of four
+possible notifier types, that are:
+ ATOMIC_NOTIFIER_HEAD()
+ BLOCKING_NOTIFIER_HEAD()
+ RAW_NOTIFIER_HEAD()
+
+This patch provides a static initilizer for the forth type to make it
+complete.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/notifier.h | 26 +++++++++++++++++++++-----
+ include/linux/srcu.h | 6 +++---
+ 2 files changed, 24 insertions(+), 8 deletions(-)
+
+--- a/include/linux/notifier.h
++++ b/include/linux/notifier.h
+@@ -42,9 +42,7 @@
+ * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
+ * As compensation, srcu_notifier_chain_unregister() is rather expensive.
+ * SRCU notifier chains should be used when the chain will be called very
+- * often but notifier_blocks will seldom be removed. Also, SRCU notifier
+- * chains are slightly more difficult to use because they require special
+- * runtime initialization.
++ * often but notifier_blocks will seldom be removed.
+ */
+
+ struct notifier_block {
+@@ -85,7 +83,7 @@ struct srcu_notifier_head {
+ (name)->head = NULL; \
+ } while (0)
+
+-/* srcu_notifier_heads must be initialized and cleaned up dynamically */
++/* srcu_notifier_heads must be cleaned up dynamically */
+ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
+ #define srcu_cleanup_notifier_head(name) \
+ cleanup_srcu_struct(&(name)->srcu);
+@@ -98,7 +96,13 @@ extern void srcu_init_notifier_head(stru
+ .head = NULL }
+ #define RAW_NOTIFIER_INIT(name) { \
+ .head = NULL }
+-/* srcu_notifier_heads cannot be initialized statically */
++
++#define SRCU_NOTIFIER_INIT(name, pcpu) \
++ { \
++ .mutex = __MUTEX_INITIALIZER(name.mutex), \
++ .head = NULL, \
++ .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
++ }
+
+ #define ATOMIC_NOTIFIER_HEAD(name) \
+ struct atomic_notifier_head name = \
+@@ -110,6 +114,18 @@ extern void srcu_init_notifier_head(stru
+ struct raw_notifier_head name = \
+ RAW_NOTIFIER_INIT(name)
+
++#define _SRCU_NOTIFIER_HEAD(name, mod) \
++ static DEFINE_PER_CPU(struct srcu_struct_array, \
++ name##_head_srcu_array); \
++ mod struct srcu_notifier_head name = \
++ SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
++
++#define SRCU_NOTIFIER_HEAD(name) \
++ _SRCU_NOTIFIER_HEAD(name, )
++
++#define SRCU_NOTIFIER_HEAD_STATIC(name) \
++ _SRCU_NOTIFIER_HEAD(name, static)
++
+ #ifdef __KERNEL__
+
+ extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+--- a/include/linux/srcu.h
++++ b/include/linux/srcu.h
+@@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct
+
+ void process_srcu(struct work_struct *work);
+
+-#define __SRCU_STRUCT_INIT(name) \
++#define __SRCU_STRUCT_INIT(name, pcpu_name) \
+ { \
+ .completed = -300, \
+- .per_cpu_ref = &name##_srcu_array, \
++ .per_cpu_ref = &pcpu_name, \
+ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
+ .running = false, \
+ .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
+@@ -105,7 +105,7 @@ void process_srcu(struct work_struct *wo
+ #define _DEFINE_SRCU(name, mod) \
+ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+ mod struct srcu_struct name = \
+- __SRCU_STRUCT_INIT(name);
++ __SRCU_STRUCT_INIT(name, name##_srcu_array);
+
+ #define DEFINE_SRCU(name) _DEFINE_SRCU(name, )
+ #define DEFINE_STATIC_SRCU(name) _DEFINE_SRCU(name, static)
diff --git a/patches/0002-x86-highmem-add-a-already-used-pte-check.patch b/patches/0002-x86-highmem-add-a-already-used-pte-check.patch
new file mode 100644
index 0000000..c6d8735
--- /dev/null
+++ b/patches/0002-x86-highmem-add-a-already-used-pte-check.patch
@@ -0,0 +1,23 @@
+From 65513f34449eedb6b84c24a3583266534c1627e4 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 11 Mar 2013 17:09:55 +0100
+Subject: [PATCH 2/6] x86/highmem: add a "already used pte" check
+
+This is a copy from kmap_atomic_prot().
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/x86/mm/iomap_32.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/mm/iomap_32.c
++++ b/arch/x86/mm/iomap_32.c
+@@ -65,6 +65,8 @@ void *kmap_atomic_prot_pfn(unsigned long
+ type = kmap_atomic_idx_push();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++ WARN_ON(!pte_none(*(kmap_pte - idx)));
++
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+ #endif
diff --git a/patches/0003-arm-highmem-flush-tlb-on-unmap.patch b/patches/0003-arm-highmem-flush-tlb-on-unmap.patch
new file mode 100644
index 0000000..71a8b20
--- /dev/null
+++ b/patches/0003-arm-highmem-flush-tlb-on-unmap.patch
@@ -0,0 +1,28 @@
+From e2ca4d092d9c6e6b07b465b4d81da207bbcc7437 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 11 Mar 2013 21:37:27 +0100
+Subject: [PATCH 3/6] arm/highmem: flush tlb on unmap
+
+The tlb should be flushed on unmap and thus make the mapping entry
+invalid. This is only done in the non-debug case which does not look
+right.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/arm/mm/highmem.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/mm/highmem.c
++++ b/arch/arm/mm/highmem.c
+@@ -95,10 +95,10 @@ void __kunmap_atomic(void *kvaddr)
+ __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
+ #ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+- set_top_pte(vaddr, __pte(0));
+ #else
+ (void) idx; /* to kill a warning */
+ #endif
++ set_top_pte(vaddr, __pte(0));
+ kmap_atomic_idx_pop();
+ } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
+ /* this address was obtained through kmap_high_get() */
diff --git a/patches/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch b/patches/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch
new file mode 100644
index 0000000..8f6323f
--- /dev/null
+++ b/patches/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch
@@ -0,0 +1,44 @@
+From eef09918aff670a6162d2ae5fe87b393698ef57d Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 1 Mar 2013 11:17:42 +0100
+Subject: [PATCH 5/6] futex: Ensure lock/unlock symetry versus pi_lock and
+ hash bucket lock
+
+In exit_pi_state_list() we have the following locking construct:
+
+ spin_lock(&hb->lock);
+ raw_spin_lock_irq(&curr->pi_lock);
+
+ ...
+ spin_unlock(&hb->lock);
+
+In !RT this works, but on RT the migrate_enable() function which is
+called from spin_unlock() sees atomic context due to the held pi_lock
+and just decrements the migrate_disable_atomic counter of the
+task. Now the next call to migrate_disable() sees the counter being
+negative and issues a warning. That check should be in
+migrate_enable() already.
+
+Fix this by dropping pi_lock before unlocking hb->lock and reaquire
+pi_lock after that again. This is safe as the loop code reevaluates
+head again under the pi_lock.
+
+Reported-by: Yong Zhang <yong.zhang@windriver.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/futex.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -568,7 +568,9 @@ void exit_pi_state_list(struct task_stru
+ * task still owns the PI-state:
+ */
+ if (head->next != next) {
++ raw_spin_unlock_irq(&curr->pi_lock);
+ spin_unlock(&hb->lock);
++ raw_spin_lock_irq(&curr->pi_lock);
+ continue;
+ }
+
diff --git a/patches/HACK-printk-drop-the-logbuf_lock-more-often.patch b/patches/HACK-printk-drop-the-logbuf_lock-more-often.patch
new file mode 100644
index 0000000..38da041
--- /dev/null
+++ b/patches/HACK-printk-drop-the-logbuf_lock-more-often.patch
@@ -0,0 +1,77 @@
+From b72b514282ffad0d665ea94932b968f388304079 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 21 Mar 2013 19:01:05 +0100
+Subject: [PATCH] HACK: printk: drop the logbuf_lock more often
+
+The lock is hold with irgs off. The latency drops 500us+ on my arm bugs
+with a "full" buffer after executing "dmesg" on the shell.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/printk.c | 27 ++++++++++++++++++++++++++-
+ 1 file changed, 26 insertions(+), 1 deletion(-)
+
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -1072,6 +1072,7 @@ static int syslog_print_all(char __user
+ {
+ char *text;
+ int len = 0;
++ int attempts = 0;
+
+ text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ if (!text)
+@@ -1083,7 +1084,14 @@ static int syslog_print_all(char __user
+ u64 seq;
+ u32 idx;
+ enum log_flags prev;
+-
++ int num_msg;
++try_again:
++ attempts++;
++ if (attempts > 10) {
++ len = -EBUSY;
++ goto out;
++ }
++ num_msg = 0;
+ if (clear_seq < log_first_seq) {
+ /* messages are gone, move to first available one */
+ clear_seq = log_first_seq;
+@@ -1104,6 +1112,14 @@ static int syslog_print_all(char __user
+ prev = msg->flags;
+ idx = log_next(idx);
+ seq++;
++ num_msg++;
++ if (num_msg > 5) {
++ num_msg = 0;
++ raw_spin_unlock_irq(&logbuf_lock);
++ raw_spin_lock_irq(&logbuf_lock);
++ if (clear_seq < log_first_seq)
++ goto try_again;
++ }
+ }
+
+ /* move first record forward until length fits into the buffer */
+@@ -1117,6 +1133,14 @@ static int syslog_print_all(char __user
+ prev = msg->flags;
+ idx = log_next(idx);
+ seq++;
++ num_msg++;
++ if (num_msg > 5) {
++ num_msg = 0;
++ raw_spin_unlock_irq(&logbuf_lock);
++ raw_spin_lock_irq(&logbuf_lock);
++ if (clear_seq < log_first_seq)
++ goto try_again;
++ }
+ }
+
+ /* last message fitting into this dump */
+@@ -1158,6 +1182,7 @@ static int syslog_print_all(char __user
+ clear_seq = log_next_seq;
+ clear_idx = log_next_idx;
+ }
++out:
+ raw_spin_unlock_irq(&logbuf_lock);
+
+ kfree(text);
diff --git a/patches/acpi-use-local-irq-nort.patch b/patches/acpi-use-local-irq-nort.patch
new file mode 100644
index 0000000..f1dcb8c
--- /dev/null
+++ b/patches/acpi-use-local-irq-nort.patch
@@ -0,0 +1,25 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 21 Jul 2009 22:54:51 +0200
+Subject: acpi: Do not disable interrupts on PREEMPT_RT
+
+Use the local_irq_*_nort() variants.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/x86/include/asm/acpi.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/acpi.h
++++ b/arch/x86/include/asm/acpi.h
+@@ -51,8 +51,8 @@
+
+ #define ACPI_ASM_MACROS
+ #define BREAKPOINT3
+-#define ACPI_DISABLE_IRQS() local_irq_disable()
+-#define ACPI_ENABLE_IRQS() local_irq_enable()
++#define ACPI_DISABLE_IRQS() local_irq_disable_nort()
++#define ACPI_ENABLE_IRQS() local_irq_enable_nort()
+ #define ACPI_FLUSH_CPU_CACHE() wbinvd()
+
+ int __acpi_acquire_global_lock(unsigned int *lock);
diff --git a/patches/arch-use-pagefault-disabled.patch b/patches/arch-use-pagefault-disabled.patch
new file mode 100644
index 0000000..961cc0c
--- /dev/null
+++ b/patches/arch-use-pagefault-disabled.patch
@@ -0,0 +1,286 @@
+Subject: mm: Fixup all fault handlers to check current->pagefault_disable
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 17 Mar 2011 11:32:28 +0100
+
+Necessary for decoupling pagefault disable from preempt count.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/alpha/mm/fault.c | 2 +-
+ arch/arm/mm/fault.c | 2 +-
+ arch/avr32/mm/fault.c | 3 ++-
+ arch/cris/mm/fault.c | 2 +-
+ arch/frv/mm/fault.c | 2 +-
+ arch/ia64/mm/fault.c | 2 +-
+ arch/m32r/mm/fault.c | 2 +-
+ arch/m68k/mm/fault.c | 2 +-
+ arch/microblaze/mm/fault.c | 2 +-
+ arch/mips/mm/fault.c | 2 +-
+ arch/mn10300/mm/fault.c | 2 +-
+ arch/parisc/mm/fault.c | 2 +-
+ arch/powerpc/mm/fault.c | 2 +-
+ arch/s390/mm/fault.c | 6 ++++--
+ arch/score/mm/fault.c | 2 +-
+ arch/sh/mm/fault.c | 2 +-
+ arch/sparc/mm/fault_32.c | 2 +-
+ arch/sparc/mm/fault_64.c | 2 +-
+ arch/tile/mm/fault.c | 2 +-
+ arch/um/kernel/trap.c | 2 +-
+ arch/x86/mm/fault.c | 2 +-
+ arch/xtensa/mm/fault.c | 2 +-
+ 22 files changed, 26 insertions(+), 23 deletions(-)
+
+--- a/arch/alpha/mm/fault.c
++++ b/arch/alpha/mm/fault.c
+@@ -108,7 +108,7 @@ do_page_fault(unsigned long address, uns
+
+ /* If we're in an interrupt context, or have no user context,
+ we must not take the fault. */
+- if (!mm || in_atomic())
++ if (!mm || in_atomic() || current->pagefault_disabled)
+ goto no_context;
+
+ #ifdef CONFIG_ALPHA_LARGE_VMALLOC
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -279,7 +279,7 @@ do_page_fault(unsigned long addr, unsign
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
+
+ /*
+--- a/arch/avr32/mm/fault.c
++++ b/arch/avr32/mm/fault.c
+@@ -81,7 +81,8 @@ asmlinkage void do_page_fault(unsigned l
+ * If we're in an interrupt or have no user context, we must
+ * not take the fault...
+ */
+- if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
++ if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM) ||
++ current->pagefault_disabled)
+ goto no_context;
+
+ local_irq_enable();
+--- a/arch/cris/mm/fault.c
++++ b/arch/cris/mm/fault.c
+@@ -114,7 +114,7 @@ do_page_fault(unsigned long address, str
+ * user context, we must not take the fault.
+ */
+
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
+
+ retry:
+--- a/arch/frv/mm/fault.c
++++ b/arch/frv/mm/fault.c
+@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datamm
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
+
+ down_read(&mm->mmap_sem);
+--- a/arch/ia64/mm/fault.c
++++ b/arch/ia64/mm/fault.c
+@@ -98,7 +98,7 @@ ia64_do_page_fault (unsigned long addres
+ /*
+ * If we're in an interrupt or have no user context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
+
+ #ifdef CONFIG_VIRTUAL_MEM_MAP
+--- a/arch/m32r/mm/fault.c
++++ b/arch/m32r/mm/fault.c
+@@ -114,7 +114,7 @@ asmlinkage void do_page_fault(struct pt_
+ * If we're in an interrupt or have no user context or are running in an
+ * atomic region then we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto bad_area_nosemaphore;
+
+ /* When running in the kernel we expect faults to occur only to
+--- a/arch/m68k/mm/fault.c
++++ b/arch/m68k/mm/fault.c
+@@ -85,7 +85,7 @@ int do_page_fault(struct pt_regs *regs,
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
+
+ retry:
+--- a/arch/microblaze/mm/fault.c
++++ b/arch/microblaze/mm/fault.c
+@@ -108,7 +108,7 @@ void do_page_fault(struct pt_regs *regs,
+ if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
+ is_write = 0;
+
+- if (unlikely(in_atomic() || !mm)) {
++ if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
+ if (kernel_mode(regs))
+ goto bad_area_nosemaphore;
+
+--- a/arch/mips/mm/fault.c
++++ b/arch/mips/mm/fault.c
+@@ -89,7 +89,7 @@ asmlinkage void __kprobes do_page_fault(
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto bad_area_nosemaphore;
+
+ retry:
+--- a/arch/mn10300/mm/fault.c
++++ b/arch/mn10300/mm/fault.c
+@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
+
+ retry:
+--- a/arch/parisc/mm/fault.c
++++ b/arch/parisc/mm/fault.c
+@@ -176,7 +176,7 @@ void do_page_fault(struct pt_regs *regs,
+ unsigned long acc_type;
+ int fault;
+
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
+
+ down_read(&mm->mmap_sem);
+--- a/arch/powerpc/mm/fault.c
++++ b/arch/powerpc/mm/fault.c
+@@ -259,7 +259,7 @@ int __kprobes do_page_fault(struct pt_re
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
+- if (in_atomic() || mm == NULL) {
++ if (in_atomic() || mm == NULL || current->pagefault_disabled) {
+ if (!user_mode(regs))
+ return SIGSEGV;
+ /* in_atomic() in user mode is really bad,
+--- a/arch/s390/mm/fault.c
++++ b/arch/s390/mm/fault.c
+@@ -296,7 +296,8 @@ static inline int do_exception(struct pt
+ * user context.
+ */
+ fault = VM_FAULT_BADCONTEXT;
+- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
++ if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm ||
++ tsk->pagefault_disabled))
+ goto out;
+
+ address = trans_exc_code & __FAIL_ADDR_MASK;
+@@ -435,7 +436,8 @@ void __kprobes do_asce_exception(struct
+ clear_tsk_thread_flag(current, TIF_PER_TRAP);
+
+ trans_exc_code = regs->int_parm_long;
+- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
++ if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm ||
++ current->pagefault_disabled()));
+ goto no_context;
+
+ down_read(&mm->mmap_sem);
+--- a/arch/score/mm/fault.c
++++ b/arch/score/mm/fault.c
+@@ -72,7 +72,7 @@ asmlinkage void do_page_fault(struct pt_
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto bad_area_nosemaphore;
+
+ down_read(&mm->mmap_sem);
+--- a/arch/sh/mm/fault.c
++++ b/arch/sh/mm/fault.c
+@@ -440,7 +440,7 @@ asmlinkage void __kprobes do_page_fault(
+ * If we're in an interrupt, have no user context or are running
+ * in an atomic region then we must not take the fault:
+ */
+- if (unlikely(in_atomic() || !mm)) {
++ if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
+--- a/arch/sparc/mm/fault_32.c
++++ b/arch/sparc/mm/fault_32.c
+@@ -200,7 +200,7 @@ asmlinkage void do_sparc_fault(struct pt
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+--- a/arch/sparc/mm/fault_64.c
++++ b/arch/sparc/mm/fault_64.c
+@@ -321,7 +321,7 @@ asmlinkage void __kprobes do_sparc64_fau
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm)
++ if (in_atomic() || !mm || current->pagefault_enabled)
+ goto intr_or_no_mm;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+--- a/arch/tile/mm/fault.c
++++ b/arch/tile/mm/fault.c
+@@ -360,7 +360,7 @@ static int handle_page_fault(struct pt_r
+ * If we're in an interrupt, have no user context or are running in an
+ * atomic region then we must not take the fault.
+ */
+- if (in_atomic() || !mm) {
++ if (in_atomic() || !mm || current->pagefault_disabled) {
+ vma = NULL; /* happy compiler */
+ goto bad_area_nosemaphore;
+ }
+--- a/arch/um/kernel/trap.c
++++ b/arch/um/kernel/trap.c
+@@ -39,7 +39,7 @@ int handle_page_fault(unsigned long addr
+ * If the fault was during atomic operation, don't take the fault, just
+ * fail.
+ */
+- if (in_atomic())
++ if (in_atomic() || current->pagefault_disabled)
+ goto out_nosemaphore;
+
+ retry:
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -1108,7 +1108,7 @@ __do_page_fault(struct pt_regs *regs, un
+ * If we're in an interrupt, have no user context or are running
+ * in an atomic region then we must not take the fault:
+ */
+- if (unlikely(in_atomic() || !mm)) {
++ if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
+--- a/arch/xtensa/mm/fault.c
++++ b/arch/xtensa/mm/fault.c
+@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
+ /* If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm) {
++ if (in_atomic() || !mm || current->pagefault_disabled) {
+ bad_page_fault(regs, address, SIGSEGV);
+ return;
+ }
diff --git a/patches/arm-allow-irq-threading.patch b/patches/arm-allow-irq-threading.patch
new file mode 100644
index 0000000..ad9fa31
--- /dev/null
+++ b/patches/arm-allow-irq-threading.patch
@@ -0,0 +1,22 @@
+Subject: arm: Allow forced irq threading
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Jul 2011 13:15:20 +0200
+
+All timer interrupts and the perf interrupt are marked NO_THREAD, so
+its safe to allow forced interrupt threading.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -17,6 +17,7 @@ config ARM
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
+ select HARDIRQS_SW_RESEND
++ select IRQ_FORCED_THREADING
+ select HAVE_AOUT
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
+ select HAVE_ARCH_KGDB
diff --git a/patches/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch b/patches/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch
new file mode 100644
index 0000000..38f059e
--- /dev/null
+++ b/patches/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch
@@ -0,0 +1,54 @@
+From: Benedikt Spranger <b.spranger@linutronix.de>
+Date: Sat, 6 Mar 2010 17:47:10 +0100
+Subject: ARM: AT91: PIT: Remove irq handler when clock event is unused
+
+Setup and remove the interrupt handler in clock event mode selection.
+This avoids calling the (shared) interrupt handler when the device is
+not used.
+
+Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/arm/mach-at91/at91rm9200_time.c | 1 +
+ arch/arm/mach-at91/at91sam926x_time.c | 5 ++++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/mach-at91/at91rm9200_time.c
++++ b/arch/arm/mach-at91/at91rm9200_time.c
+@@ -134,6 +134,7 @@ clkevt32k_mode(enum clock_event_mode mod
+ break;
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ case CLOCK_EVT_MODE_UNUSED:
++ remove_irq(AT91_ID_SYS, &at91rm9200_timer_irq);
+ case CLOCK_EVT_MODE_RESUME:
+ irqmask = 0;
+ break;
+--- a/arch/arm/mach-at91/at91sam926x_time.c
++++ b/arch/arm/mach-at91/at91sam926x_time.c
+@@ -77,7 +77,7 @@ static struct clocksource pit_clk = {
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ };
+
+-
++static struct irqaction at91sam926x_pit_irq;
+ /*
+ * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
+ */
+@@ -86,6 +86,8 @@ pit_clkevt_mode(enum clock_event_mode mo
+ {
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
++ /* Set up irq handler */
++ setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
+ /* update clocksource counter */
+ pit_cnt += pit_cycle * PIT_PICNT(pit_read(AT91_PIT_PIVR));
+ pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN
+@@ -98,6 +100,7 @@ pit_clkevt_mode(enum clock_event_mode mo
+ case CLOCK_EVT_MODE_UNUSED:
+ /* disable irq, leaving the clocksource active */
+ pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN);
++ remove_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
+ break;
+ case CLOCK_EVT_MODE_RESUME:
+ break;
diff --git a/patches/arm-at91-tclib-default-to-tclib-timer-for-rt.patch b/patches/arm-at91-tclib-default-to-tclib-timer-for-rt.patch
new file mode 100644
index 0000000..b649cd4
--- /dev/null
+++ b/patches/arm-at91-tclib-default-to-tclib-timer-for-rt.patch
@@ -0,0 +1,32 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 1 May 2010 18:29:35 +0200
+Subject: ARM: at91: tclib: Default to tclib timer for RT
+
+RT is not too happy about the shared timer interrupt in AT91
+devices. Default to tclib timer for RT.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/misc/Kconfig | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/misc/Kconfig
++++ b/drivers/misc/Kconfig
+@@ -63,6 +63,7 @@ config ATMEL_PWM
+ config ATMEL_TCLIB
+ bool "Atmel AT32/AT91 Timer/Counter Library"
+ depends on (AVR32 || ARCH_AT91)
++ default y if PREEMPT_RT_FULL
+ help
+ Select this if you want a library to allocate the Timer/Counter
+ blocks found on many Atmel processors. This facilitates using
+@@ -95,7 +96,7 @@ config ATMEL_TCB_CLKSRC_BLOCK
+ config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
+ bool "TC Block use 32 KiHz clock"
+ depends on ATMEL_TCB_CLKSRC
+- default y
++ default y if !PREEMPT_RT_FULL
+ help
+ Select this to use 32 KiHz base clock rate as TC block clock
+ source for clock events.
diff --git a/patches/arm-convert-boot-lock-to-raw.patch b/patches/arm-convert-boot-lock-to-raw.patch
new file mode 100644
index 0000000..91be8d9
--- /dev/null
+++ b/patches/arm-convert-boot-lock-to-raw.patch
@@ -0,0 +1,279 @@
+Subject: preempt-rt: Convert arm boot_lock to raw
+From: Frank Rowand <frank.rowand@am.sony.com>
+Date: Mon, 19 Sep 2011 14:51:14 -0700
+
+
+The arm boot_lock is used by the secondary processor startup code. The locking
+task is the idle thread, which has idle->sched_class == &idle_sched_class.
+idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the
+lock, the attempt to wake it when the lock becomes available will fail:
+
+try_to_wake_up()
+ ...
+ activate_task()
+ enqueue_task()
+ p->sched_class->enqueue_task(rq, p, flags)
+
+Fix by converting boot_lock to a raw spin lock.
+
+Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
+Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/mach-exynos/platsmp.c | 12 ++++++------
+ arch/arm/mach-msm/platsmp.c | 10 +++++-----
+ arch/arm/mach-omap2/omap-smp.c | 10 +++++-----
+ arch/arm/mach-spear13xx/platsmp.c | 10 +++++-----
+ arch/arm/mach-ux500/platsmp.c | 10 +++++-----
+ arch/arm/plat-versatile/platsmp.c | 10 +++++-----
+ 6 files changed, 31 insertions(+), 31 deletions(-)
+
+--- a/arch/arm/mach-exynos/platsmp.c
++++ b/arch/arm/mach-exynos/platsmp.c
+@@ -71,7 +71,7 @@ static void __iomem *scu_base_addr(void)
+ return (void __iomem *)(S5P_VA_SCU);
+ }
+
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+
+ static void __cpuinit exynos_secondary_init(unsigned int cpu)
+ {
+@@ -91,8 +91,8 @@ static void __cpuinit exynos_secondary_i
+ /*
+ * Synchronise with the boot thread.
+ */
+- spin_lock(&boot_lock);
+- spin_unlock(&boot_lock);
++ raw_spin_lock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+ }
+
+ static int __cpuinit exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -104,7 +104,7 @@ static int __cpuinit exynos_boot_seconda
+ * Set synchronisation state between this boot processor
+ * and the secondary one
+ */
+- spin_lock(&boot_lock);
++ raw_spin_lock(&boot_lock);
+
+ /*
+ * The secondary processor is waiting to be released from
+@@ -133,7 +133,7 @@ static int __cpuinit exynos_boot_seconda
+
+ if (timeout == 0) {
+ printk(KERN_ERR "cpu1 power enable failed");
+- spin_unlock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+ return -ETIMEDOUT;
+ }
+ }
+@@ -161,7 +161,7 @@ static int __cpuinit exynos_boot_seconda
+ * now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+- spin_unlock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+
+ return pen_release != -1 ? -ENOSYS : 0;
+ }
+--- a/arch/arm/mach-msm/platsmp.c
++++ b/arch/arm/mach-msm/platsmp.c
+@@ -31,7 +31,7 @@
+
+ extern void msm_secondary_startup(void);
+
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+
+ static inline int get_core_count(void)
+ {
+@@ -58,8 +58,8 @@ static void __cpuinit msm_secondary_init
+ /*
+ * Synchronise with the boot thread.
+ */
+- spin_lock(&boot_lock);
+- spin_unlock(&boot_lock);
++ raw_spin_lock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+ }
+
+ static __cpuinit void prepare_cold_cpu(unsigned int cpu)
+@@ -96,7 +96,7 @@ static int __cpuinit msm_boot_secondary(
+ * set synchronisation state between this boot processor
+ * and the secondary one
+ */
+- spin_lock(&boot_lock);
++ raw_spin_lock(&boot_lock);
+
+ /*
+ * The secondary processor is waiting to be released from
+@@ -130,7 +130,7 @@ static int __cpuinit msm_boot_secondary(
+ * now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+- spin_unlock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+
+ return pen_release != -1 ? -ENOSYS : 0;
+ }
+--- a/arch/arm/mach-omap2/omap-smp.c
++++ b/arch/arm/mach-omap2/omap-smp.c
+@@ -45,7 +45,7 @@ u16 pm44xx_errata;
+ /* SCU base address */
+ static void __iomem *scu_base;
+
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+
+ void __iomem *omap4_get_scu_base(void)
+ {
+@@ -76,8 +76,8 @@ static void __cpuinit omap4_secondary_in
+ /*
+ * Synchronise with the boot thread.
+ */
+- spin_lock(&boot_lock);
+- spin_unlock(&boot_lock);
++ raw_spin_lock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+ }
+
+ static int __cpuinit omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -90,7 +90,7 @@ static int __cpuinit omap4_boot_secondar
+ * Set synchronisation state between this boot processor
+ * and the secondary one
+ */
+- spin_lock(&boot_lock);
++ raw_spin_lock(&boot_lock);
+
+ /*
+ * Update the AuxCoreBoot0 with boot state for secondary core.
+@@ -163,7 +163,7 @@ static int __cpuinit omap4_boot_secondar
+ * Now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+- spin_unlock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+
+ return 0;
+ }
+--- a/arch/arm/mach-spear13xx/platsmp.c
++++ b/arch/arm/mach-spear13xx/platsmp.c
+@@ -21,7 +21,7 @@
+ #include <mach/spear.h>
+ #include <mach/generic.h>
+
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+
+ static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
+
+@@ -44,8 +44,8 @@ static void __cpuinit spear13xx_secondar
+ /*
+ * Synchronise with the boot thread.
+ */
+- spin_lock(&boot_lock);
+- spin_unlock(&boot_lock);
++ raw_spin_lock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+ }
+
+ static int __cpuinit spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -56,7 +56,7 @@ static int __cpuinit spear13xx_boot_seco
+ * set synchronisation state between this boot processor
+ * and the secondary one
+ */
+- spin_lock(&boot_lock);
++ raw_spin_lock(&boot_lock);
+
+ /*
+ * The secondary processor is waiting to be released from
+@@ -83,7 +83,7 @@ static int __cpuinit spear13xx_boot_seco
+ * now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+- spin_unlock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+
+ return pen_release != -1 ? -ENOSYS : 0;
+ }
+--- a/arch/arm/mach-ux500/platsmp.c
++++ b/arch/arm/mach-ux500/platsmp.c
+@@ -50,7 +50,7 @@ static void __iomem *scu_base_addr(void)
+ return NULL;
+ }
+
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+
+ static void __cpuinit ux500_secondary_init(unsigned int cpu)
+ {
+@@ -70,8 +70,8 @@ static void __cpuinit ux500_secondary_in
+ /*
+ * Synchronise with the boot thread.
+ */
+- spin_lock(&boot_lock);
+- spin_unlock(&boot_lock);
++ raw_spin_lock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+ }
+
+ static int __cpuinit ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -82,7 +82,7 @@ static int __cpuinit ux500_boot_secondar
+ * set synchronisation state between this boot processor
+ * and the secondary one
+ */
+- spin_lock(&boot_lock);
++ raw_spin_lock(&boot_lock);
+
+ /*
+ * The secondary processor is waiting to be released from
+@@ -103,7 +103,7 @@ static int __cpuinit ux500_boot_secondar
+ * now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+- spin_unlock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+
+ return pen_release != -1 ? -ENOSYS : 0;
+ }
+--- a/arch/arm/plat-versatile/platsmp.c
++++ b/arch/arm/plat-versatile/platsmp.c
+@@ -32,7 +32,7 @@ static void __cpuinit write_pen_release(
+ outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+ }
+
+-static DEFINE_SPINLOCK(boot_lock);
++static DEFINE_RAW_SPINLOCK(boot_lock);
+
+ void __cpuinit versatile_secondary_init(unsigned int cpu)
+ {
+@@ -52,8 +52,8 @@ void __cpuinit versatile_secondary_init(
+ /*
+ * Synchronise with the boot thread.
+ */
+- spin_lock(&boot_lock);
+- spin_unlock(&boot_lock);
++ raw_spin_lock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+ }
+
+ int __cpuinit versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
+@@ -64,7 +64,7 @@ int __cpuinit versatile_boot_secondary(u
+ * Set synchronisation state between this boot processor
+ * and the secondary one
+ */
+- spin_lock(&boot_lock);
++ raw_spin_lock(&boot_lock);
+
+ /*
+ * This is really belt and braces; we hold unintended secondary
+@@ -94,7 +94,7 @@ int __cpuinit versatile_boot_secondary(u
+ * now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+- spin_unlock(&boot_lock);
++ raw_spin_unlock(&boot_lock);
+
+ return pen_release != -1 ? -ENOSYS : 0;
+ }
diff --git a/patches/arm-disable-highmem-on-rt.patch b/patches/arm-disable-highmem-on-rt.patch
new file mode 100644
index 0000000..344b381
--- /dev/null
+++ b/patches/arm-disable-highmem-on-rt.patch
@@ -0,0 +1,20 @@
+Subject: arm-disable-highmem-on-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 17:09:28 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -1752,7 +1752,7 @@ config HAVE_ARCH_PFN_VALID
+
+ config HIGHMEM
+ bool "High Memory Support"
+- depends on MMU
++ depends on MMU && !PREEMPT_RT_FULL
+ help
+ The address space of ARM processors is only 4 Gigabytes large
+ and it has to accommodate user address space, kernel address
diff --git a/patches/arm-enable-highmem-for-rt.patch b/patches/arm-enable-highmem-for-rt.patch
new file mode 100644
index 0000000..e803c17
--- /dev/null
+++ b/patches/arm-enable-highmem-for-rt.patch
@@ -0,0 +1,140 @@
+Subject: arm-enable-highmem-for-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 13 Feb 2013 11:03:11 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig | 2 -
+ arch/arm/include/asm/switch_to.h | 9 ++++++++
+ arch/arm/mm/highmem.c | 41 +++++++++++++++++++++++++++++++++++++--
+ include/linux/highmem.h | 1
+ 4 files changed, 50 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -1752,7 +1752,7 @@ config HAVE_ARCH_PFN_VALID
+
+ config HIGHMEM
+ bool "High Memory Support"
+- depends on MMU && !PREEMPT_RT_FULL
++ depends on MMU
+ help
+ The address space of ARM processors is only 4 Gigabytes large
+ and it has to accommodate user address space, kernel address
+--- a/arch/arm/include/asm/switch_to.h
++++ b/arch/arm/include/asm/switch_to.h
+@@ -3,6 +3,14 @@
+
+ #include <linux/thread_info.h>
+
++#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
++void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
++#else
++static inline void
++switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
++#endif
++
++
+ /*
+ * switch_to(prev, next) should switch from task `prev' to `next'
+ * `prev' will never be the same as `next'. schedule() itself
+@@ -12,6 +20,7 @@ extern struct task_struct *__switch_to(s
+
+ #define switch_to(prev,next,last) \
+ do { \
++ switch_kmaps(prev, next); \
+ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
+ } while (0)
+
+--- a/arch/arm/mm/highmem.c
++++ b/arch/arm/mm/highmem.c
+@@ -38,6 +38,7 @@ EXPORT_SYMBOL(kunmap);
+
+ void *kmap_atomic(struct page *page)
+ {
++ pte_t pte = mk_pte(page, kmap_prot);
+ unsigned int idx;
+ unsigned long vaddr;
+ void *kmap;
+@@ -76,7 +77,10 @@ void *kmap_atomic(struct page *page)
+ * in place, so the contained TLB flush ensures the TLB is updated
+ * with the new mapping.
+ */
+- set_top_pte(vaddr, mk_pte(page, kmap_prot));
++#ifdef CONFIG_PREEMPT_RT_FULL
++ current->kmap_pte[type] = pte;
++#endif
++ set_top_pte(vaddr, pte);
+
+ return (void *)vaddr;
+ }
+@@ -93,6 +97,9 @@ void __kunmap_atomic(void *kvaddr)
+
+ if (cache_is_vivt())
+ __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
++#ifdef CONFIG_PREEMPT_RT_FULL
++ current->kmap_pte[type] = __pte(0);
++#endif
+ #ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ #else
+@@ -110,6 +117,7 @@ EXPORT_SYMBOL(__kunmap_atomic);
+
+ void *kmap_atomic_pfn(unsigned long pfn)
+ {
++ pte_t pte = pfn_pte(pfn, kmap_prot);
+ unsigned long vaddr;
+ int idx, type;
+
+@@ -121,7 +129,10 @@ void *kmap_atomic_pfn(unsigned long pfn)
+ #ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(!pte_none(get_top_pte(vaddr)));
+ #endif
+- set_top_pte(vaddr, pfn_pte(pfn, kmap_prot));
++#ifdef CONFIG_PREEMPT_RT_FULL
++ current->kmap_pte[type] = pte;
++#endif
++ set_top_pte(vaddr, pte);
+
+ return (void *)vaddr;
+ }
+@@ -135,3 +146,29 @@ struct page *kmap_atomic_to_page(const v
+
+ return pte_page(get_top_pte(vaddr));
+ }
++
++#if defined CONFIG_PREEMPT_RT_FULL
++void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
++{
++ int i;
++
++ /*
++ * Clear @prev's kmap_atomic mappings
++ */
++ for (i = 0; i < prev_p->kmap_idx; i++) {
++ int idx = i + KM_TYPE_NR * smp_processor_id();
++
++ set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx), __pte(0));
++ }
++ /*
++ * Restore @next_p's kmap_atomic mappings
++ */
++ for (i = 0; i < next_p->kmap_idx; i++) {
++ int idx = i + KM_TYPE_NR * smp_processor_id();
++
++ if (!pte_none(next_p->kmap_pte[i]))
++ set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx),
++ next_p->kmap_pte[i]);
++ }
++}
++#endif
+--- a/include/linux/highmem.h
++++ b/include/linux/highmem.h
+@@ -7,6 +7,7 @@
+ #include <linux/mm.h>
+ #include <linux/uaccess.h>
+ #include <linux/hardirq.h>
++#include <linux/sched.h>
+
+ #include <asm/cacheflush.h>
+
diff --git a/patches/arm-mark-pmu-interupt-no-thread.patch b/patches/arm-mark-pmu-interupt-no-thread.patch
new file mode 100644
index 0000000..6f97ee0
--- /dev/null
+++ b/patches/arm-mark-pmu-interupt-no-thread.patch
@@ -0,0 +1,23 @@
+Subject: arm: Mark pmu interupt IRQF_NO_THREAD
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 16 Mar 2011 14:45:31 +0100
+
+PMU interrupts must not be threaded.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/kernel/perf_event_cpu.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/arm/kernel/perf_event_cpu.c
++++ b/arch/arm/kernel/perf_event_cpu.c
+@@ -118,7 +118,8 @@ static int cpu_pmu_request_irq(struct ar
+ continue;
+ }
+
+- err = request_irq(irq, handler, IRQF_NOBALANCING, "arm-pmu",
++ err = request_irq(irq, handler,
++ IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+ cpu_pmu);
+ if (err) {
+ pr_err("unable to request IRQ%d for ARM PMU counters\n",
diff --git a/patches/arm-omap-make-wakeupgen_lock-raw.patch b/patches/arm-omap-make-wakeupgen_lock-raw.patch
new file mode 100644
index 0000000..74a258a
--- /dev/null
+++ b/patches/arm-omap-make-wakeupgen_lock-raw.patch
@@ -0,0 +1,62 @@
+Subject: arm-omap-make-wakeupgen_lock-raw.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 11 Apr 2012 11:26:38 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/mach-omap2/omap-wakeupgen.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/arch/arm/mach-omap2/omap-wakeupgen.c
++++ b/arch/arm/mach-omap2/omap-wakeupgen.c
+@@ -46,7 +46,7 @@
+
+ static void __iomem *wakeupgen_base;
+ static void __iomem *sar_base;
+-static DEFINE_SPINLOCK(wakeupgen_lock);
++static DEFINE_RAW_SPINLOCK(wakeupgen_lock);
+ static unsigned int irq_target_cpu[MAX_IRQS];
+ static unsigned int irq_banks = MAX_NR_REG_BANKS;
+ static unsigned int max_irqs = MAX_IRQS;
+@@ -134,9 +134,9 @@ static void wakeupgen_mask(struct irq_da
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&wakeupgen_lock, flags);
++ raw_spin_lock_irqsave(&wakeupgen_lock, flags);
+ _wakeupgen_clear(d->irq, irq_target_cpu[d->irq]);
+- spin_unlock_irqrestore(&wakeupgen_lock, flags);
++ raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+ }
+
+ /*
+@@ -146,9 +146,9 @@ static void wakeupgen_unmask(struct irq_
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&wakeupgen_lock, flags);
++ raw_spin_lock_irqsave(&wakeupgen_lock, flags);
+ _wakeupgen_set(d->irq, irq_target_cpu[d->irq]);
+- spin_unlock_irqrestore(&wakeupgen_lock, flags);
++ raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -189,7 +189,7 @@ static void wakeupgen_irqmask_all(unsign
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&wakeupgen_lock, flags);
++ raw_spin_lock_irqsave(&wakeupgen_lock, flags);
+ if (set) {
+ _wakeupgen_save_masks(cpu);
+ _wakeupgen_set_all(cpu, WKG_MASK_ALL);
+@@ -197,7 +197,7 @@ static void wakeupgen_irqmask_all(unsign
+ _wakeupgen_set_all(cpu, WKG_UNMASK_ALL);
+ _wakeupgen_restore_masks(cpu);
+ }
+- spin_unlock_irqrestore(&wakeupgen_lock, flags);
++ raw_spin_unlock_irqrestore(&wakeupgen_lock, flags);
+ }
+ #endif
+
diff --git a/patches/arm-preempt-lazy-support.patch b/patches/arm-preempt-lazy-support.patch
new file mode 100644
index 0000000..d1e990b
--- /dev/null
+++ b/patches/arm-preempt-lazy-support.patch
@@ -0,0 +1,103 @@
+Subject: arm-preempt-lazy-support.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 31 Oct 2012 12:04:11 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/Kconfig | 1 +
+ arch/arm/include/asm/thread_info.h | 3 +++
+ arch/arm/kernel/asm-offsets.c | 1 +
+ arch/arm/kernel/entry-armv.S | 13 +++++++++++--
+ arch/arm/kernel/signal.c | 3 ++-
+ 5 files changed, 18 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -47,6 +47,7 @@ config ARM
+ select HAVE_MEMBLOCK
+ select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
+ select HAVE_PERF_EVENTS
++ select HAVE_PREEMPT_LAZY
+ select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_UID16
+--- a/arch/arm/include/asm/thread_info.h
++++ b/arch/arm/include/asm/thread_info.h
+@@ -50,6 +50,7 @@ struct cpu_context_save {
+ struct thread_info {
+ unsigned long flags; /* low level flags */
+ int preempt_count; /* 0 => preemptable, <0 => bug */
++ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
+ mm_segment_t addr_limit; /* address limit */
+ struct task_struct *task; /* main task structure */
+ struct exec_domain *exec_domain; /* execution domain */
+@@ -148,6 +149,7 @@ extern int vfp_restore_user_hwstate(stru
+ #define TIF_SIGPENDING 0
+ #define TIF_NEED_RESCHED 1
+ #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
++#define TIF_NEED_RESCHED_LAZY 3
+ #define TIF_SYSCALL_TRACE 8
+ #define TIF_SYSCALL_AUDIT 9
+ #define TIF_SYSCALL_TRACEPOINT 10
+@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(stru
+ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+ #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
+ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -50,6 +50,7 @@ int main(void)
+ BLANK();
+ DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+ DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
++ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
+ DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
+ DEFINE(TI_TASK, offsetof(struct thread_info, task));
+ DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain));
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -216,11 +216,18 @@ __irq_svc:
+ #ifdef CONFIG_PREEMPT
+ get_thread_info tsk
+ ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
+- ldr r0, [tsk, #TI_FLAGS] @ get flags
+ teq r8, #0 @ if preempt count != 0
++ bne 1f @ return from exeption
++ ldr r0, [tsk, #TI_FLAGS] @ get flags
++ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
++ blne svc_preempt @ preempt!
++
++ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
++ teq r8, #0 @ if preempt lazy count != 0
+ movne r0, #0 @ force flags to 0
+- tst r0, #_TIF_NEED_RESCHED
++ tst r0, #_TIF_NEED_RESCHED_LAZY
+ blne svc_preempt
++1:
+ #endif
+
+ #ifdef CONFIG_TRACE_IRQFLAGS
+@@ -240,6 +247,8 @@ svc_preempt:
+ 1: bl preempt_schedule_irq @ irq en/disable is done inside
+ ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
+ tst r0, #_TIF_NEED_RESCHED
++ bne 1b
++ tst r0, #_TIF_NEED_RESCHED_LAZY
+ moveq pc, r8 @ go again
+ b 1b
+ #endif
+--- a/arch/arm/kernel/signal.c
++++ b/arch/arm/kernel/signal.c
+@@ -638,7 +638,8 @@ asmlinkage int
+ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+ {
+ do {
+- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
++ if (likely(thread_flags & (_TIF_NEED_RESCHED |
++ _TIF_NEED_RESCHED_LAZY))) {
+ schedule();
+ } else {
+ if (unlikely(!user_mode(regs)))
diff --git a/patches/ata-disable-interrupts-if-non-rt.patch b/patches/ata-disable-interrupts-if-non-rt.patch
new file mode 100644
index 0000000..6088cd8
--- /dev/null
+++ b/patches/ata-disable-interrupts-if-non-rt.patch
@@ -0,0 +1,64 @@
+From: Steven Rostedt <srostedt@redhat.com>
+Date: Fri, 3 Jul 2009 08:44:29 -0500
+Subject: ata: Do not disable interrupts in ide code for preempt-rt
+
+Use the local_irq_*_nort variants.
+
+Signed-off-by: Steven Rostedt <srostedt@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/ata/libata-sff.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/ata/libata-sff.c
++++ b/drivers/ata/libata-sff.c
+@@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(str
+ unsigned long flags;
+ unsigned int consumed;
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ return consumed;
+ }
+@@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_qu
+ unsigned long flags;
+
+ /* FIXME: use a bounce buffer */
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ buf = kmap_atomic(page);
+
+ /* do the actual data transfer */
+@@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_qu
+ do_write);
+
+ kunmap_atomic(buf);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ } else {
+ buf = page_address(page);
+ ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
+@@ -864,7 +864,7 @@ next_sg:
+ unsigned long flags;
+
+ /* FIXME: use bounce buffer */
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ buf = kmap_atomic(page);
+
+ /* do the actual data transfer */
+@@ -872,7 +872,7 @@ next_sg:
+ count, rw);
+
+ kunmap_atomic(buf);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ } else {
+ buf = page_address(page);
+ consumed = ap->ops->sff_data_xfer(dev, buf + offset,
diff --git a/patches/block-shorten-interrupt-disabled-regions.patch b/patches/block-shorten-interrupt-disabled-regions.patch
new file mode 100644
index 0000000..910d5ac
--- /dev/null
+++ b/patches/block-shorten-interrupt-disabled-regions.patch
@@ -0,0 +1,97 @@
+Subject: block: Shorten interrupt disabled regions
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 22 Jun 2011 19:47:02 +0200
+
+Moving the blk_sched_flush_plug() call out of the interrupt/preempt
+disabled region in the scheduler allows us to replace
+local_irq_save/restore(flags) by local_irq_disable/enable() in
+blk_flush_plug().
+
+Now instead of doing this we disable interrupts explicitely when we
+lock the request_queue and reenable them when we drop the lock. That
+allows interrupts to be handled when the plug list contains requests
+for more than one queue.
+
+Aside of that this change makes the scope of the irq disabled region
+more obvious. The current code confused the hell out of me when
+looking at:
+
+ local_irq_save(flags);
+ spin_lock(q->queue_lock);
+ ...
+ queue_unplugged(q...);
+ scsi_request_fn();
+ spin_unlock(q->queue_lock);
+ spin_lock(shost->host_lock);
+ spin_unlock_irq(shost->host_lock);
+
+-------------------^^^ ????
+
+ spin_lock_irq(q->queue_lock);
+ spin_unlock(q->lock);
+ local_irq_restore(flags);
+
+Also add a comment to __blk_run_queue() documenting that
+q->request_fn() can drop q->queue_lock and reenable interrupts, but
+must return with q->queue_lock held and interrupts disabled.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ block/blk-core.c | 12 ++----------
+ 1 file changed, 2 insertions(+), 10 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -2929,7 +2929,7 @@ static void queue_unplugged(struct reque
+ blk_run_queue_async(q);
+ else
+ __blk_run_queue(q);
+- spin_unlock(q->queue_lock);
++ spin_unlock_irq(q->queue_lock);
+ }
+
+ static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
+@@ -2977,7 +2977,6 @@ EXPORT_SYMBOL(blk_check_plugged);
+ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
+ {
+ struct request_queue *q;
+- unsigned long flags;
+ struct request *rq;
+ LIST_HEAD(list);
+ unsigned int depth;
+@@ -2998,11 +2997,6 @@ void blk_flush_plug_list(struct blk_plug
+ q = NULL;
+ depth = 0;
+
+- /*
+- * Save and disable interrupts here, to avoid doing it for every
+- * queue lock we have to take.
+- */
+- local_irq_save(flags);
+ while (!list_empty(&list)) {
+ rq = list_entry_rq(list.next);
+ list_del_init(&rq->queuelist);
+@@ -3015,7 +3009,7 @@ void blk_flush_plug_list(struct blk_plug
+ queue_unplugged(q, depth, from_schedule);
+ q = rq->q;
+ depth = 0;
+- spin_lock(q->queue_lock);
++ spin_lock_irq(q->queue_lock);
+ }
+
+ /*
+@@ -3042,8 +3036,6 @@ void blk_flush_plug_list(struct blk_plug
+ */
+ if (q)
+ queue_unplugged(q, depth, from_schedule);
+-
+- local_irq_restore(flags);
+ }
+
+ void blk_finish_plug(struct blk_plug *plug)
diff --git a/patches/block-use-cpu-chill.patch b/patches/block-use-cpu-chill.patch
new file mode 100644
index 0000000..9237f69
--- /dev/null
+++ b/patches/block-use-cpu-chill.patch
@@ -0,0 +1,45 @@
+Subject: block: Use cpu_chill() for retry loops
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 20 Dec 2012 18:28:26 +0100
+
+Retry loops on RT might loop forever when the modifying side was
+preempted. Steven also observed a live lock when there was a
+concurrent priority boosting going on.
+
+Use cpu_chill() instead of cpu_relax() to let the system
+make progress.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ block/blk-ioc.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/block/blk-ioc.c
++++ b/block/blk-ioc.c
+@@ -8,6 +8,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
+ #include <linux/slab.h>
++#include <linux/delay.h>
+
+ #include "blk.h"
+
+@@ -110,7 +111,7 @@ static void ioc_release_fn(struct work_s
+ spin_unlock(q->queue_lock);
+ } else {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+- cpu_relax();
++ cpu_chill();
+ spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+ }
+ }
+@@ -188,7 +189,7 @@ retry:
+ spin_unlock(icq->q->queue_lock);
+ } else {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+- cpu_relax();
++ cpu_chill();
+ goto retry;
+ }
+ }
diff --git a/patches/bug-rt-dependend-variants.patch b/patches/bug-rt-dependend-variants.patch
new file mode 100644
index 0000000..5f7a1e3
--- /dev/null
+++ b/patches/bug-rt-dependend-variants.patch
@@ -0,0 +1,34 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:58 -0500
+Subject: bug: BUG_ON/WARN_ON variants dependend on RT/!RT
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/asm-generic/bug.h | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/include/asm-generic/bug.h
++++ b/include/asm-generic/bug.h
+@@ -202,6 +202,20 @@ extern void warn_slowpath_null(const cha
+ # define WARN_ON_SMP(x) ({0;})
+ #endif
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++# define BUG_ON_RT(c) BUG_ON(c)
++# define BUG_ON_NONRT(c) do { } while (0)
++# define WARN_ON_RT(condition) WARN_ON(condition)
++# define WARN_ON_NONRT(condition) do { } while (0)
++# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
++#else
++# define BUG_ON_RT(c) do { } while (0)
++# define BUG_ON_NONRT(c) BUG_ON(c)
++# define WARN_ON_RT(condition) do { } while (0)
++# define WARN_ON_NONRT(condition) WARN_ON(condition)
++# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
++#endif
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif
diff --git a/patches/clocksource-tclib-allow-higher-clockrates.patch b/patches/clocksource-tclib-allow-higher-clockrates.patch
new file mode 100644
index 0000000..a4502c7
--- /dev/null
+++ b/patches/clocksource-tclib-allow-higher-clockrates.patch
@@ -0,0 +1,159 @@
+From: Benedikt Spranger <b.spranger@linutronix.de>
+Date: Mon, 8 Mar 2010 18:57:04 +0100
+Subject: clocksource: TCLIB: Allow higher clock rates for clock events
+
+As default the TCLIB uses the 32KiHz base clock rate for clock events.
+Add a compile time selection to allow higher clock resulution.
+
+Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/clocksource/tcb_clksrc.c | 44 +++++++++++++++++++++++----------------
+ drivers/misc/Kconfig | 11 +++++++--
+ 2 files changed, 35 insertions(+), 20 deletions(-)
+
+--- a/drivers/clocksource/tcb_clksrc.c
++++ b/drivers/clocksource/tcb_clksrc.c
+@@ -23,8 +23,7 @@
+ * this 32 bit free-running counter. the second channel is not used.
+ *
+ * - The third channel may be used to provide a 16-bit clockevent
+- * source, used in either periodic or oneshot mode. This runs
+- * at 32 KiHZ, and can handle delays of up to two seconds.
++ * source, used in either periodic or oneshot mode.
+ *
+ * A boot clocksource and clockevent source are also currently needed,
+ * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
+@@ -74,6 +73,7 @@ static struct clocksource clksrc = {
+ struct tc_clkevt_device {
+ struct clock_event_device clkevt;
+ struct clk *clk;
++ u32 freq;
+ void __iomem *regs;
+ };
+
+@@ -82,13 +82,6 @@ static struct tc_clkevt_device *to_tc_cl
+ return container_of(clkevt, struct tc_clkevt_device, clkevt);
+ }
+
+-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
+- * because using one of the divided clocks would usually mean the
+- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
+- *
+- * A divided clock could be good for high resolution timers, since
+- * 30.5 usec resolution can seem "low".
+- */
+ static u32 timer_clock;
+
+ static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
+@@ -111,11 +104,12 @@ static void tc_mode(enum clock_event_mod
+ case CLOCK_EVT_MODE_PERIODIC:
+ clk_enable(tcd->clk);
+
+- /* slow clock, count up to RC, then irq and restart */
++ /* count up to RC, then irq and restart */
+ __raw_writel(timer_clock
+ | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
+ regs + ATMEL_TC_REG(2, CMR));
+- __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
++ __raw_writel((tcd->freq + HZ/2)/HZ,
++ tcaddr + ATMEL_TC_REG(2, RC));
+
+ /* Enable clock and interrupts on RC compare */
+ __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
+@@ -128,7 +122,7 @@ static void tc_mode(enum clock_event_mod
+ case CLOCK_EVT_MODE_ONESHOT:
+ clk_enable(tcd->clk);
+
+- /* slow clock, count up to RC, then irq and stop */
++ /* count up to RC, then irq and stop */
+ __raw_writel(timer_clock | ATMEL_TC_CPCSTOP
+ | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
+ regs + ATMEL_TC_REG(2, CMR));
+@@ -158,8 +152,12 @@ static struct tc_clkevt_device clkevt =
+ .features = CLOCK_EVT_FEAT_PERIODIC
+ | CLOCK_EVT_FEAT_ONESHOT,
+ .shift = 32,
++#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
+ /* Should be lower than at91rm9200's system timer */
+ .rating = 125,
++#else
++ .rating = 200,
++#endif
+ .set_next_event = tc_next_event,
+ .set_mode = tc_mode,
+ },
+@@ -185,8 +183,9 @@ static struct irqaction tc_irqaction = {
+ .handler = ch2_irq,
+ };
+
+-static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
++static void __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
+ {
++ unsigned divisor = atmel_tc_divisors[divisor_idx];
+ struct clk *t2_clk = tc->clk[2];
+ int irq = tc->irq[2];
+
+@@ -194,11 +193,17 @@ static void __init setup_clkevents(struc
+ clkevt.clk = t2_clk;
+ tc_irqaction.dev_id = &clkevt;
+
+- timer_clock = clk32k_divisor_idx;
++ timer_clock = divisor_idx;
+
+- clkevt.clkevt.mult = div_sc(32768, NSEC_PER_SEC, clkevt.clkevt.shift);
+- clkevt.clkevt.max_delta_ns
+- = clockevent_delta2ns(0xffff, &clkevt.clkevt);
++ if (!divisor)
++ clkevt.freq = 32768;
++ else
++ clkevt.freq = clk_get_rate(t2_clk)/divisor;
++
++ clkevt.clkevt.mult = div_sc(clkevt.freq, NSEC_PER_SEC,
++ clkevt.clkevt.shift);
++ clkevt.clkevt.max_delta_ns =
++ clockevent_delta2ns(0xffff, &clkevt.clkevt);
+ clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
+ clkevt.clkevt.cpumask = cpumask_of(0);
+
+@@ -327,8 +332,11 @@ static int __init tcb_clksrc_init(void)
+ clocksource_register_hz(&clksrc, divided_rate);
+
+ /* channel 2: periodic and oneshot timer support */
++#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
+ setup_clkevents(tc, clk32k_divisor_idx);
+-
++#else
++ setup_clkevents(tc, best_divisor_idx);
++#endif
+ return 0;
+ }
+ arch_initcall(tcb_clksrc_init);
+--- a/drivers/misc/Kconfig
++++ b/drivers/misc/Kconfig
+@@ -78,8 +78,7 @@ config ATMEL_TCB_CLKSRC
+ are combined to make a single 32-bit timer.
+
+ When GENERIC_CLOCKEVENTS is defined, the third timer channel
+- may be used as a clock event device supporting oneshot mode
+- (delays of up to two seconds) based on the 32 KiHz clock.
++ may be used as a clock event device supporting oneshot mode.
+
+ config ATMEL_TCB_CLKSRC_BLOCK
+ int
+@@ -93,6 +92,14 @@ config ATMEL_TCB_CLKSRC_BLOCK
+ TC can be used for other purposes, such as PWM generation and
+ interval timing.
+
++config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
++ bool "TC Block use 32 KiHz clock"
++ depends on ATMEL_TCB_CLKSRC
++ default y
++ help
++ Select this to use 32 KiHz base clock rate as TC block clock
++ source for clock events.
++
+ config IBM_ASM
+ tristate "Device driver for IBM RSA service processor"
+ depends on X86 && PCI && INPUT
diff --git a/patches/completion-use-simple-wait-queues.patch b/patches/completion-use-simple-wait-queues.patch
new file mode 100644
index 0000000..f7d9d01
--- /dev/null
+++ b/patches/completion-use-simple-wait-queues.patch
@@ -0,0 +1,155 @@
+Subject: completion: Use simple wait queues
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 11 Jan 2013 11:23:51 +0100
+
+Completions have no long lasting callbacks and therefor do not need
+the complex waitqueue variant. Use simple waitqueues which reduces the
+contention on the waitqueue lock.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/completion.h | 8 ++++----
+ include/linux/uprobes.h | 1 +
+ kernel/sched/core.c | 34 +++++++++++++++++-----------------
+ 3 files changed, 22 insertions(+), 21 deletions(-)
+
+--- a/include/linux/completion.h
++++ b/include/linux/completion.h
+@@ -8,7 +8,7 @@
+ * See kernel/sched.c for details.
+ */
+
+-#include <linux/wait.h>
++#include <linux/wait-simple.h>
+
+ /*
+ * struct completion - structure used to maintain state for a "completion"
+@@ -24,11 +24,11 @@
+ */
+ struct completion {
+ unsigned int done;
+- wait_queue_head_t wait;
++ struct swait_head wait;
+ };
+
+ #define COMPLETION_INITIALIZER(work) \
+- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
++ { 0, SWAIT_HEAD_INITIALIZER((work).wait) }
+
+ #define COMPLETION_INITIALIZER_ONSTACK(work) \
+ ({ init_completion(&work); work; })
+@@ -73,7 +73,7 @@ struct completion {
+ static inline void init_completion(struct completion *x)
+ {
+ x->done = 0;
+- init_waitqueue_head(&x->wait);
++ init_swait_head(&x->wait);
+ }
+
+ extern void wait_for_completion(struct completion *);
+--- a/include/linux/uprobes.h
++++ b/include/linux/uprobes.h
+@@ -26,6 +26,7 @@
+
+ #include <linux/errno.h>
+ #include <linux/rbtree.h>
++#include <linux/wait.h>
+
+ struct vm_area_struct;
+ struct mm_struct;
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3487,10 +3487,10 @@ void complete(struct completion *x)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&x->wait.lock, flags);
++ raw_spin_lock_irqsave(&x->wait.lock, flags);
+ x->done++;
+- __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
+- spin_unlock_irqrestore(&x->wait.lock, flags);
++ __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
++ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ }
+ EXPORT_SYMBOL(complete);
+
+@@ -3507,10 +3507,10 @@ void complete_all(struct completion *x)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&x->wait.lock, flags);
++ raw_spin_lock_irqsave(&x->wait.lock, flags);
+ x->done += UINT_MAX/2;
+- __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
+- spin_unlock_irqrestore(&x->wait.lock, flags);
++ __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
++ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ }
+ EXPORT_SYMBOL(complete_all);
+
+@@ -3518,20 +3518,20 @@ static inline long __sched
+ do_wait_for_common(struct completion *x, long timeout, int state)
+ {
+ if (!x->done) {
+- DECLARE_WAITQUEUE(wait, current);
++ DEFINE_SWAITER(wait);
+
+- __add_wait_queue_tail_exclusive(&x->wait, &wait);
++ swait_prepare_locked(&x->wait, &wait);
+ do {
+ if (signal_pending_state(state, current)) {
+ timeout = -ERESTARTSYS;
+ break;
+ }
+ __set_current_state(state);
+- spin_unlock_irq(&x->wait.lock);
++ raw_spin_unlock_irq(&x->wait.lock);
+ timeout = schedule_timeout(timeout);
+- spin_lock_irq(&x->wait.lock);
++ raw_spin_lock_irq(&x->wait.lock);
+ } while (!x->done && timeout);
+- __remove_wait_queue(&x->wait, &wait);
++ swait_finish_locked(&x->wait, &wait);
+ if (!x->done)
+ return timeout;
+ }
+@@ -3544,9 +3544,9 @@ wait_for_common(struct completion *x, lo
+ {
+ might_sleep();
+
+- spin_lock_irq(&x->wait.lock);
++ raw_spin_lock_irq(&x->wait.lock);
+ timeout = do_wait_for_common(x, timeout, state);
+- spin_unlock_irq(&x->wait.lock);
++ raw_spin_unlock_irq(&x->wait.lock);
+ return timeout;
+ }
+
+@@ -3677,12 +3677,12 @@ bool try_wait_for_completion(struct comp
+ unsigned long flags;
+ int ret = 1;
+
+- spin_lock_irqsave(&x->wait.lock, flags);
++ raw_spin_lock_irqsave(&x->wait.lock, flags);
+ if (!x->done)
+ ret = 0;
+ else
+ x->done--;
+- spin_unlock_irqrestore(&x->wait.lock, flags);
++ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL(try_wait_for_completion);
+@@ -3700,10 +3700,10 @@ bool completion_done(struct completion *
+ unsigned long flags;
+ int ret = 1;
+
+- spin_lock_irqsave(&x->wait.lock, flags);
++ raw_spin_lock_irqsave(&x->wait.lock, flags);
+ if (!x->done)
+ ret = 0;
+- spin_unlock_irqrestore(&x->wait.lock, flags);
++ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL(completion_done);
diff --git a/patches/cond-resched-lock-rt-tweak.patch b/patches/cond-resched-lock-rt-tweak.patch
new file mode 100644
index 0000000..2dff484
--- /dev/null
+++ b/patches/cond-resched-lock-rt-tweak.patch
@@ -0,0 +1,20 @@
+Subject: cond-resched-lock-rt-tweak.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 22:51:33 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2719,7 +2719,7 @@ extern int _cond_resched(void);
+
+ extern int __cond_resched_lock(spinlock_t *lock);
+
+-#ifdef CONFIG_PREEMPT_COUNT
++#if defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT_FULL)
+ #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
+ #else
+ #define PREEMPT_LOCK_OFFSET 0
diff --git a/patches/cond-resched-softirq-rt.patch b/patches/cond-resched-softirq-rt.patch
new file mode 100644
index 0000000..dd146ee
--- /dev/null
+++ b/patches/cond-resched-softirq-rt.patch
@@ -0,0 +1,47 @@
+Subject: cond-resched-softirq-fix.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Jul 2011 09:56:44 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h | 4 ++++
+ kernel/sched/core.c | 2 ++
+ 2 files changed, 6 insertions(+)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2730,12 +2730,16 @@ extern int __cond_resched_lock(spinlock_
+ __cond_resched_lock(lock); \
+ })
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ extern int __cond_resched_softirq(void);
+
+ #define cond_resched_softirq() ({ \
+ __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
+ __cond_resched_softirq(); \
+ })
++#else
++# define cond_resched_softirq() cond_resched()
++#endif
+
+ /*
+ * Does a critical section need to be broken due to another
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4366,6 +4366,7 @@ int __cond_resched_lock(spinlock_t *lock
+ }
+ EXPORT_SYMBOL(__cond_resched_lock);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ int __sched __cond_resched_softirq(void)
+ {
+ BUG_ON(!in_softirq());
+@@ -4379,6 +4380,7 @@ int __sched __cond_resched_softirq(void)
+ return 0;
+ }
+ EXPORT_SYMBOL(__cond_resched_softirq);
++#endif
+
+ /**
+ * yield - yield the current processor to other threads.
diff --git a/patches/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch b/patches/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
new file mode 100644
index 0000000..80ed692
--- /dev/null
+++ b/patches/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
@@ -0,0 +1,125 @@
+Subject: cpu: Make hotplug.lock a "sleeping" spinlock on RT
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Fri, 02 Mar 2012 10:36:57 -0500
+
+Tasks can block on hotplug.lock in pin_current_cpu(), but their state
+might be != RUNNING. So the mutex wakeup will set the state
+unconditionally to RUNNING. That might cause spurious unexpected
+wakeups. We could provide a state preserving mutex_lock() function,
+but this is semantically backwards. So instead we convert the
+hotplug.lock() to a spinlock for RT, which has the state preserving
+semantics already.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: Carsten Emde <C.Emde@osadl.org>
+Cc: John Kacur <jkacur@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Clark Williams <clark.williams@gmail.com>
+Cc: stable-rt@vger.kernel.org
+Link: http://lkml.kernel.org/r/1330702617.25686.265.camel@gandalf.stny.rr.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c | 35 ++++++++++++++++++++++++++---------
+ 1 file changed, 26 insertions(+), 9 deletions(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -51,7 +51,12 @@ static int cpu_hotplug_disabled;
+
+ static struct {
+ struct task_struct *active_writer;
++#ifdef CONFIG_PREEMPT_RT_FULL
++ /* Makes the lock keep the task's state */
++ spinlock_t lock;
++#else
+ struct mutex lock; /* Synchronizes accesses to refcount, */
++#endif
+ /*
+ * Also blocks the new readers during
+ * an ongoing cpu hotplug operation.
+@@ -59,10 +64,22 @@ static struct {
+ int refcount;
+ } cpu_hotplug = {
+ .active_writer = NULL,
++#ifdef CONFIG_PREEMPT_RT_FULL
++ .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
++#else
+ .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
++#endif
+ .refcount = 0,
+ };
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
++# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
++#else
++# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
++# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
++#endif
++
+ struct hotplug_pcp {
+ struct task_struct *unplug;
+ int refcount;
+@@ -92,8 +109,8 @@ retry:
+ return;
+ }
+ preempt_enable();
+- mutex_lock(&cpu_hotplug.lock);
+- mutex_unlock(&cpu_hotplug.lock);
++ hotplug_lock();
++ hotplug_unlock();
+ preempt_disable();
+ goto retry;
+ }
+@@ -166,9 +183,9 @@ void get_online_cpus(void)
+ might_sleep();
+ if (cpu_hotplug.active_writer == current)
+ return;
+- mutex_lock(&cpu_hotplug.lock);
++ hotplug_lock();
+ cpu_hotplug.refcount++;
+- mutex_unlock(&cpu_hotplug.lock);
++ hotplug_unlock();
+
+ }
+ EXPORT_SYMBOL_GPL(get_online_cpus);
+@@ -177,14 +194,14 @@ void put_online_cpus(void)
+ {
+ if (cpu_hotplug.active_writer == current)
+ return;
+- mutex_lock(&cpu_hotplug.lock);
+
++ hotplug_lock();
+ if (WARN_ON(!cpu_hotplug.refcount))
+ cpu_hotplug.refcount++; /* try to fix things up */
+
+ if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
+ wake_up_process(cpu_hotplug.active_writer);
+- mutex_unlock(&cpu_hotplug.lock);
++ hotplug_unlock();
+
+ }
+ EXPORT_SYMBOL_GPL(put_online_cpus);
+@@ -216,11 +233,11 @@ static void cpu_hotplug_begin(void)
+ cpu_hotplug.active_writer = current;
+
+ for (;;) {
+- mutex_lock(&cpu_hotplug.lock);
++ hotplug_lock();
+ if (likely(!cpu_hotplug.refcount))
+ break;
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+- mutex_unlock(&cpu_hotplug.lock);
++ hotplug_unlock();
+ schedule();
+ }
+ }
+@@ -228,7 +245,7 @@ static void cpu_hotplug_begin(void)
+ static void cpu_hotplug_done(void)
+ {
+ cpu_hotplug.active_writer = NULL;
+- mutex_unlock(&cpu_hotplug.lock);
++ hotplug_unlock();
+ }
+
+ #else /* #if CONFIG_HOTPLUG_CPU */
diff --git a/patches/cpu-rt-rework-cpu-down.patch b/patches/cpu-rt-rework-cpu-down.patch
new file mode 100644
index 0000000..7a6496a
--- /dev/null
+++ b/patches/cpu-rt-rework-cpu-down.patch
@@ -0,0 +1,548 @@
+From: Steven Rostedt <srostedt@redhat.com>
+Date: Mon, 16 Jul 2012 08:07:43 +0000
+Subject: cpu/rt: Rework cpu down for PREEMPT_RT
+
+Bringing a CPU down is a pain with the PREEMPT_RT kernel because
+tasks can be preempted in many more places than in non-RT. In
+order to handle per_cpu variables, tasks may be pinned to a CPU
+for a while, and even sleep. But these tasks need to be off the CPU
+if that CPU is going down.
+
+Several synchronization methods have been tried, but when stressed
+they failed. This is a new approach.
+
+A sync_tsk thread is still created and tasks may still block on a
+lock when the CPU is going down, but how that works is a bit different.
+When cpu_down() starts, it will create the sync_tsk and wait on it
+to inform that current tasks that are pinned on the CPU are no longer
+pinned. But new tasks that are about to be pinned will still be allowed
+to do so at this time.
+
+Then the notifiers are called. Several notifiers will bring down tasks
+that will enter these locations. Some of these tasks will take locks
+of other tasks that are on the CPU. If we don't let those other tasks
+continue, but make them block until CPU down is done, the tasks that
+the notifiers are waiting on will never complete as they are waiting
+for the locks held by the tasks that are blocked.
+
+Thus we still let the task pin the CPU until the notifiers are done.
+After the notifiers run, we then make new tasks entering the pinned
+CPU sections grab a mutex and wait. This mutex is now a per CPU mutex
+in the hotplug_pcp descriptor.
+
+To help things along, a new function in the scheduler code is created
+called migrate_me(). This function will try to migrate the current task
+off the CPU this is going down if possible. When the sync_tsk is created,
+all tasks will then try to migrate off the CPU going down. There are
+several cases that this wont work, but it helps in most cases.
+
+After the notifiers are called and if a task can't migrate off but enters
+the pin CPU sections, it will be forced to wait on the hotplug_pcp mutex
+until the CPU down is complete. Then the scheduler will force the migration
+anyway.
+
+Also, I found that THREAD_BOUND need to also be accounted for in the
+pinned CPU, and the migrate_disable no longer treats them special.
+This helps fix issues with ksoftirqd and workqueue that unbind on CPU down.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/sched.h | 7 +
+ kernel/cpu.c | 241 +++++++++++++++++++++++++++++++++++++++++---------
+ kernel/sched/core.c | 82 ++++++++++++++++-
+ 3 files changed, 285 insertions(+), 45 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1977,6 +1977,10 @@ extern void do_set_cpus_allowed(struct t
+
+ extern int set_cpus_allowed_ptr(struct task_struct *p,
+ const struct cpumask *new_mask);
++int migrate_me(void);
++void tell_sched_cpu_down_begin(int cpu);
++void tell_sched_cpu_down_done(int cpu);
++
+ #else
+ static inline void do_set_cpus_allowed(struct task_struct *p,
+ const struct cpumask *new_mask)
+@@ -1989,6 +1993,9 @@ static inline int set_cpus_allowed_ptr(s
+ return -EINVAL;
+ return 0;
+ }
++static inline int migrate_me(void) { return 0; }
++static inline void tell_sched_cpu_down_begin(int cpu) { }
++static inline void tell_sched_cpu_down_done(int cpu) { }
+ #endif
+
+ #ifdef CONFIG_NO_HZ
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -51,12 +51,7 @@ static int cpu_hotplug_disabled;
+
+ static struct {
+ struct task_struct *active_writer;
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- /* Makes the lock keep the task's state */
+- spinlock_t lock;
+-#else
+ struct mutex lock; /* Synchronizes accesses to refcount, */
+-#endif
+ /*
+ * Also blocks the new readers during
+ * an ongoing cpu hotplug operation.
+@@ -64,28 +59,46 @@ static struct {
+ int refcount;
+ } cpu_hotplug = {
+ .active_writer = NULL,
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
+-#else
+ .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
+-#endif
+ .refcount = 0,
+ };
+
+-#ifdef CONFIG_PREEMPT_RT_FULL
+-# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
+-# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
+-#else
+-# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
+-# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
+-#endif
+-
++/**
++ * hotplug_pcp - per cpu hotplug descriptor
++ * @unplug: set when pin_current_cpu() needs to sync tasks
++ * @sync_tsk: the task that waits for tasks to finish pinned sections
++ * @refcount: counter of tasks in pinned sections
++ * @grab_lock: set when the tasks entering pinned sections should wait
++ * @synced: notifier for @sync_tsk to tell cpu_down it's finished
++ * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
++ * @mutex_init: zero if the mutex hasn't been initialized yet.
++ *
++ * Although @unplug and @sync_tsk may point to the same task, the @unplug
++ * is used as a flag and still exists after @sync_tsk has exited and
++ * @sync_tsk set to NULL.
++ */
+ struct hotplug_pcp {
+ struct task_struct *unplug;
++ struct task_struct *sync_tsk;
+ int refcount;
++ int grab_lock;
+ struct completion synced;
++#ifdef CONFIG_PREEMPT_RT_FULL
++ spinlock_t lock;
++#else
++ struct mutex mutex;
++#endif
++ int mutex_init;
+ };
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
++# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
++#else
++# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
++# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
++#endif
++
+ static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
+
+ /**
+@@ -99,18 +112,40 @@ static DEFINE_PER_CPU(struct hotplug_pcp
+ void pin_current_cpu(void)
+ {
+ struct hotplug_pcp *hp;
++ int force = 0;
+
+ retry:
+ hp = &__get_cpu_var(hotplug_pcp);
+
+- if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
++ if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
+ hp->unplug == current || (current->flags & PF_STOMPER)) {
+ hp->refcount++;
+ return;
+ }
+- preempt_enable();
+- hotplug_lock();
+- hotplug_unlock();
++
++ if (hp->grab_lock) {
++ preempt_enable();
++ hotplug_lock(hp);
++ hotplug_unlock(hp);
++ } else {
++ preempt_enable();
++ /*
++ * Try to push this task off of this CPU.
++ */
++ if (!migrate_me()) {
++ preempt_disable();
++ hp = &__get_cpu_var(hotplug_pcp);
++ if (!hp->grab_lock) {
++ /*
++ * Just let it continue it's already pinned
++ * or about to sleep.
++ */
++ force = 1;
++ goto retry;
++ }
++ preempt_enable();
++ }
++ }
+ preempt_disable();
+ goto retry;
+ }
+@@ -132,26 +167,84 @@ void unpin_current_cpu(void)
+ wake_up_process(hp->unplug);
+ }
+
+-/*
+- * FIXME: Is this really correct under all circumstances ?
+- */
++static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
++{
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ while (hp->refcount) {
++ schedule_preempt_disabled();
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ }
++}
++
+ static int sync_unplug_thread(void *data)
+ {
+ struct hotplug_pcp *hp = data;
+
+ preempt_disable();
+ hp->unplug = current;
++ wait_for_pinned_cpus(hp);
++
++ /*
++ * This thread will synchronize the cpu_down() with threads
++ * that have pinned the CPU. When the pinned CPU count reaches
++ * zero, we inform the cpu_down code to continue to the next step.
++ */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+- while (hp->refcount) {
+- schedule_preempt_disabled();
++ preempt_enable();
++ complete(&hp->synced);
++
++ /*
++ * If all succeeds, the next step will need tasks to wait till
++ * the CPU is offline before continuing. To do this, the grab_lock
++ * is set and tasks going into pin_current_cpu() will block on the
++ * mutex. But we still need to wait for those that are already in
++ * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
++ * will kick this thread out.
++ */
++ while (!hp->grab_lock && !kthread_should_stop()) {
++ schedule();
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ }
++
++ /* Make sure grab_lock is seen before we see a stale completion */
++ smp_mb();
++
++ /*
++ * Now just before cpu_down() enters stop machine, we need to make
++ * sure all tasks that are in pinned CPU sections are out, and new
++ * tasks will now grab the lock, keeping them from entering pinned
++ * CPU sections.
++ */
++ if (!kthread_should_stop()) {
++ preempt_disable();
++ wait_for_pinned_cpus(hp);
++ preempt_enable();
++ complete(&hp->synced);
++ }
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ while (!kthread_should_stop()) {
++ schedule();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+ set_current_state(TASK_RUNNING);
+- preempt_enable();
+- complete(&hp->synced);
++
++ /*
++ * Force this thread off this CPU as it's going down and
++ * we don't want any more work on this CPU.
++ */
++ current->flags &= ~PF_THREAD_BOUND;
++ do_set_cpus_allowed(current, cpu_present_mask);
++ migrate_me();
+ return 0;
+ }
+
++static void __cpu_unplug_sync(struct hotplug_pcp *hp)
++{
++ wake_up_process(hp->sync_tsk);
++ wait_for_completion(&hp->synced);
++}
++
+ /*
+ * Start the sync_unplug_thread on the target cpu and wait for it to
+ * complete.
+@@ -159,23 +252,83 @@ static int sync_unplug_thread(void *data
+ static int cpu_unplug_begin(unsigned int cpu)
+ {
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
+- struct task_struct *tsk;
++ int err;
++
++ /* Protected by cpu_hotplug.lock */
++ if (!hp->mutex_init) {
++#ifdef CONFIG_PREEMPT_RT_FULL
++ spin_lock_init(&hp->lock);
++#else
++ mutex_init(&hp->mutex);
++#endif
++ hp->mutex_init = 1;
++ }
++
++ /* Inform the scheduler to migrate tasks off this CPU */
++ tell_sched_cpu_down_begin(cpu);
+
+ init_completion(&hp->synced);
+- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
+- if (IS_ERR(tsk))
+- return (PTR_ERR(tsk));
+- kthread_bind(tsk, cpu);
+- wake_up_process(tsk);
+- wait_for_completion(&hp->synced);
++
++ hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
++ if (IS_ERR(hp->sync_tsk)) {
++ err = PTR_ERR(hp->sync_tsk);
++ hp->sync_tsk = NULL;
++ return err;
++ }
++ kthread_bind(hp->sync_tsk, cpu);
++
++ /*
++ * Wait for tasks to get out of the pinned sections,
++ * it's still OK if new tasks enter. Some CPU notifiers will
++ * wait for tasks that are going to enter these sections and
++ * we must not have them block.
++ */
++ __cpu_unplug_sync(hp);
++
+ return 0;
+ }
+
++static void cpu_unplug_sync(unsigned int cpu)
++{
++ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
++
++ init_completion(&hp->synced);
++ /* The completion needs to be initialzied before setting grab_lock */
++ smp_wmb();
++
++ /* Grab the mutex before setting grab_lock */
++ hotplug_lock(hp);
++ hp->grab_lock = 1;
++
++ /*
++ * The CPU notifiers have been completed.
++ * Wait for tasks to get out of pinned CPU sections and have new
++ * tasks block until the CPU is completely down.
++ */
++ __cpu_unplug_sync(hp);
++
++ /* All done with the sync thread */
++ kthread_stop(hp->sync_tsk);
++ hp->sync_tsk = NULL;
++}
++
+ static void cpu_unplug_done(unsigned int cpu)
+ {
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
+
+ hp->unplug = NULL;
++ /* Let all tasks know cpu unplug is finished before cleaning up */
++ smp_wmb();
++
++ if (hp->sync_tsk)
++ kthread_stop(hp->sync_tsk);
++
++ if (hp->grab_lock) {
++ hotplug_unlock(hp);
++ /* protected by cpu_hotplug.lock */
++ hp->grab_lock = 0;
++ }
++ tell_sched_cpu_down_done(cpu);
+ }
+
+ void get_online_cpus(void)
+@@ -183,9 +336,9 @@ void get_online_cpus(void)
+ might_sleep();
+ if (cpu_hotplug.active_writer == current)
+ return;
+- hotplug_lock();
++ mutex_lock(&cpu_hotplug.lock);
+ cpu_hotplug.refcount++;
+- hotplug_unlock();
++ mutex_unlock(&cpu_hotplug.lock);
+
+ }
+ EXPORT_SYMBOL_GPL(get_online_cpus);
+@@ -195,14 +348,13 @@ void put_online_cpus(void)
+ if (cpu_hotplug.active_writer == current)
+ return;
+
+- hotplug_lock();
++ mutex_lock(&cpu_hotplug.lock);
+ if (WARN_ON(!cpu_hotplug.refcount))
+ cpu_hotplug.refcount++; /* try to fix things up */
+
+ if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
+ wake_up_process(cpu_hotplug.active_writer);
+- hotplug_unlock();
+-
++ mutex_unlock(&cpu_hotplug.lock);
+ }
+ EXPORT_SYMBOL_GPL(put_online_cpus);
+
+@@ -233,11 +385,11 @@ static void cpu_hotplug_begin(void)
+ cpu_hotplug.active_writer = current;
+
+ for (;;) {
+- hotplug_lock();
++ mutex_lock(&cpu_hotplug.lock);
+ if (likely(!cpu_hotplug.refcount))
+ break;
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+- hotplug_unlock();
++ mutex_unlock(&cpu_hotplug.lock);
+ schedule();
+ }
+ }
+@@ -245,7 +397,7 @@ static void cpu_hotplug_begin(void)
+ static void cpu_hotplug_done(void)
+ {
+ cpu_hotplug.active_writer = NULL;
+- hotplug_unlock();
++ mutex_unlock(&cpu_hotplug.lock);
+ }
+
+ #else /* #if CONFIG_HOTPLUG_CPU */
+@@ -421,6 +573,9 @@ static int __ref _cpu_down(unsigned int
+ }
+ smpboot_park_threads(cpu);
+
++ /* Notifiers are done. Don't let any more tasks pin this CPU. */
++ cpu_unplug_sync(cpu);
++
+ err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ if (err) {
+ /* CPU didn't die: tell everyone. Can't complain. */
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2896,7 +2896,7 @@ void migrate_disable(void)
+ {
+ struct task_struct *p = current;
+
+- if (in_atomic() || p->flags & PF_THREAD_BOUND) {
++ if (in_atomic()) {
+ #ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic++;
+ #endif
+@@ -2927,7 +2927,7 @@ void migrate_enable(void)
+ unsigned long flags;
+ struct rq *rq;
+
+- if (in_atomic() || p->flags & PF_THREAD_BOUND) {
++ if (in_atomic()) {
+ #ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic--;
+ #endif
+@@ -4872,6 +4872,84 @@ void do_set_cpus_allowed(struct task_str
+ cpumask_copy(&p->cpus_allowed, new_mask);
+ }
+
++static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
++static DEFINE_MUTEX(sched_down_mutex);
++static cpumask_t sched_down_cpumask;
++
++void tell_sched_cpu_down_begin(int cpu)
++{
++ mutex_lock(&sched_down_mutex);
++ cpumask_set_cpu(cpu, &sched_down_cpumask);
++ mutex_unlock(&sched_down_mutex);
++}
++
++void tell_sched_cpu_down_done(int cpu)
++{
++ mutex_lock(&sched_down_mutex);
++ cpumask_clear_cpu(cpu, &sched_down_cpumask);
++ mutex_unlock(&sched_down_mutex);
++}
++
++/**
++ * migrate_me - try to move the current task off this cpu
++ *
++ * Used by the pin_current_cpu() code to try to get tasks
++ * to move off the current CPU as it is going down.
++ * It will only move the task if the task isn't pinned to
++ * the CPU (with migrate_disable, affinity or THREAD_BOUND)
++ * and the task has to be in a RUNNING state. Otherwise the
++ * movement of the task will wake it up (change its state
++ * to running) when the task did not expect it.
++ *
++ * Returns 1 if it succeeded in moving the current task
++ * 0 otherwise.
++ */
++int migrate_me(void)
++{
++ struct task_struct *p = current;
++ struct migration_arg arg;
++ struct cpumask *cpumask;
++ struct cpumask *mask;
++ unsigned long flags;
++ unsigned int dest_cpu;
++ struct rq *rq;
++
++ /*
++ * We can not migrate tasks bounded to a CPU or tasks not
++ * running. The movement of the task will wake it up.
++ */
++ if (p->flags & PF_THREAD_BOUND || p->state)
++ return 0;
++
++ mutex_lock(&sched_down_mutex);
++ rq = task_rq_lock(p, &flags);
++
++ cpumask = &__get_cpu_var(sched_cpumasks);
++ mask = &p->cpus_allowed;
++
++ cpumask_andnot(cpumask, mask, &sched_down_cpumask);
++
++ if (!cpumask_weight(cpumask)) {
++ /* It's only on this CPU? */
++ task_rq_unlock(rq, p, &flags);
++ mutex_unlock(&sched_down_mutex);
++ return 0;
++ }
++
++ dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
++
++ arg.task = p;
++ arg.dest_cpu = dest_cpu;
++
++ task_rq_unlock(rq, p, &flags);
++
++ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
++ tlb_migrate_finish(p->mm);
++ mutex_unlock(&sched_down_mutex);
++
++ return 1;
++}
++
+ /*
+ * This is how migration works:
+ *
diff --git a/patches/cpu-rt-variants.patch b/patches/cpu-rt-variants.patch
new file mode 100644
index 0000000..a344eda
--- /dev/null
+++ b/patches/cpu-rt-variants.patch
@@ -0,0 +1,26 @@
+Subject: cpu-rt-variants.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 17 Jun 2011 15:42:38 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/smp.h | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -218,6 +218,14 @@ static inline void kick_all_cpus_sync(vo
+ #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
+ #define put_cpu() preempt_enable()
+
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define get_cpu_light() get_cpu()
++# define put_cpu_light() put_cpu()
++#else
++# define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
++# define put_cpu_light() migrate_enable()
++#endif
++
+ /*
+ * Callback to arch code if there's nosmp or maxcpus=0 on the
+ * boot command line:
diff --git a/patches/cpumask-disable-offstack-on-rt.patch b/patches/cpumask-disable-offstack-on-rt.patch
new file mode 100644
index 0000000..c8728a6
--- /dev/null
+++ b/patches/cpumask-disable-offstack-on-rt.patch
@@ -0,0 +1,34 @@
+Subject: cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 14 Dec 2011 01:03:49 +0100
+
+We can't deal with the cpumask allocations which happen in atomic
+context (see arch/x86/kernel/apic/io_apic.c) on RT right now.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/Kconfig | 2 +-
+ lib/Kconfig | 1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -775,7 +775,7 @@ config IOMMU_HELPER
+ config MAXSMP
+ bool "Enable Maximum number of SMP Processors and NUMA Nodes"
+ depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+- select CPUMASK_OFFSTACK
++ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
+ ---help---
+ Enable maximum number of CPUS and NUMA Nodes for this architecture.
+ If unsure, say N.
+--- a/lib/Kconfig
++++ b/lib/Kconfig
+@@ -315,6 +315,7 @@ config CHECK_SIGNATURE
+
+ config CPUMASK_OFFSTACK
+ bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
++ depends on !PREEMPT_RT_FULL
+ help
+ Use dynamic allocation for cpumask_var_t, instead of putting
+ them on the stack. This is a bit more expensive, but avoids
diff --git a/patches/debugobjects-rt.patch b/patches/debugobjects-rt.patch
new file mode 100644
index 0000000..3e58f6f
--- /dev/null
+++ b/patches/debugobjects-rt.patch
@@ -0,0 +1,23 @@
+Subject: debugobjects-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:41:35 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/debugobjects.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -309,7 +309,10 @@ __debug_object_init(void *addr, struct d
+ struct debug_obj *obj;
+ unsigned long flags;
+
+- fill_pool();
++#ifdef CONFIG_PREEMPT_RT_FULL
++ if (preempt_count() == 0 && !irqs_disabled())
++#endif
++ fill_pool();
+
+ db = get_bucket((unsigned long) addr);
+
diff --git a/patches/dm-make-rt-aware.patch b/patches/dm-make-rt-aware.patch
new file mode 100644
index 0000000..e71d485
--- /dev/null
+++ b/patches/dm-make-rt-aware.patch
@@ -0,0 +1,34 @@
+Subject: dm: Make rt aware
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 14 Nov 2011 23:06:09 +0100
+
+Use the BUG_ON_NORT variant for the irq_disabled() checks. RT has
+interrupts legitimately enabled here as we cant deadlock against the
+irq thread due to the "sleeping spinlocks" conversion.
+
+Reported-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/md/dm.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1677,14 +1677,14 @@ static void dm_request_fn(struct request
+ if (map_request(ti, clone, md))
+ goto requeued;
+
+- BUG_ON(!irqs_disabled());
++ BUG_ON_NONRT(!irqs_disabled());
+ spin_lock(q->queue_lock);
+ }
+
+ goto out;
+
+ requeued:
+- BUG_ON(!irqs_disabled());
++ BUG_ON_NONRT(!irqs_disabled());
+ spin_lock(q->queue_lock);
+
+ delay_and_out:
diff --git a/patches/drivers-net-8139-disable-irq-nosync.patch b/patches/drivers-net-8139-disable-irq-nosync.patch
new file mode 100644
index 0000000..32dc2b1
--- /dev/null
+++ b/patches/drivers-net-8139-disable-irq-nosync.patch
@@ -0,0 +1,25 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:24 -0500
+Subject: drivers/net: Use disable_irq_nosync() in 8139too
+
+Use disable_irq_nosync() instead of disable_irq() as this might be
+called in atomic context with netpoll.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/net/ethernet/realtek/8139too.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/realtek/8139too.c
++++ b/drivers/net/ethernet/realtek/8139too.c
+@@ -2216,7 +2216,7 @@ static void rtl8139_poll_controller(stru
+ struct rtl8139_private *tp = netdev_priv(dev);
+ const int irq = tp->pci_dev->irq;
+
+- disable_irq(irq);
++ disable_irq_nosync(irq);
+ rtl8139_interrupt(irq, dev);
+ enable_irq(irq);
+ }
diff --git a/patches/drivers-net-fix-livelock-issues.patch b/patches/drivers-net-fix-livelock-issues.patch
new file mode 100644
index 0000000..0d9fc5c
--- /dev/null
+++ b/patches/drivers-net-fix-livelock-issues.patch
@@ -0,0 +1,126 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 20 Jun 2009 11:36:54 +0200
+Subject: drivers/net: fix livelock issues
+
+Preempt-RT runs into a live lock issue with the NETDEV_TX_LOCKED micro
+optimization. The reason is that the softirq thread is rescheduling
+itself on that return value. Depending on priorities it starts to
+monoplize the CPU and livelock on UP systems.
+
+Remove it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 6 +-----
+ drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 3 +--
+ drivers/net/ethernet/chelsio/cxgb/sge.c | 3 +--
+ drivers/net/ethernet/neterion/s2io.c | 7 +------
+ drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 6 ++----
+ drivers/net/ethernet/tehuti/tehuti.c | 9 ++-------
+ drivers/net/rionet.c | 6 +-----
+ 7 files changed, 9 insertions(+), 31 deletions(-)
+
+--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
++++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+@@ -2171,11 +2171,7 @@ static netdev_tx_t atl1c_xmit_frame(stru
+ }
+
+ tpd_req = atl1c_cal_tpd_req(skb);
+- if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
+- if (netif_msg_pktdata(adapter))
+- dev_info(&adapter->pdev->dev, "tx locked\n");
+- return NETDEV_TX_LOCKED;
+- }
++ spin_lock_irqsave(&adapter->tx_lock, flags);
+
+ if (atl1c_tpd_avail(adapter, type) < tpd_req) {
+ /* no enough descriptor, just stop queue */
+--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
++++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+@@ -1803,8 +1803,7 @@ static netdev_tx_t atl1e_xmit_frame(stru
+ return NETDEV_TX_OK;
+ }
+ tpd_req = atl1e_cal_tdp_req(skb);
+- if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
+- return NETDEV_TX_LOCKED;
++ spin_lock_irqsave(&adapter->tx_lock, flags);
+
+ if (atl1e_tpd_avail(adapter) < tpd_req) {
+ /* no enough descriptor, just stop queue */
+--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
++++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
+@@ -1666,8 +1666,7 @@ static int t1_sge_tx(struct sk_buff *skb
+ struct cmdQ *q = &sge->cmdQ[qid];
+ unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
+
+- if (!spin_trylock(&q->lock))
+- return NETDEV_TX_LOCKED;
++ spin_lock(&q->lock);
+
+ reclaim_completed_tx(sge, q);
+
+--- a/drivers/net/ethernet/neterion/s2io.c
++++ b/drivers/net/ethernet/neterion/s2io.c
+@@ -4088,12 +4088,7 @@ static netdev_tx_t s2io_xmit(struct sk_b
+ [skb->priority & (MAX_TX_FIFOS - 1)];
+ fifo = &mac_control->fifos[queue];
+
+- if (do_spin_lock)
+- spin_lock_irqsave(&fifo->tx_lock, flags);
+- else {
+- if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
+- return NETDEV_TX_LOCKED;
+- }
++ spin_lock_irqsave(&fifo->tx_lock, flags);
+
+ if (sp->config.multiq) {
+ if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
+--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
++++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+@@ -2114,10 +2114,8 @@ static int pch_gbe_xmit_frame(struct sk_
+ struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
+ unsigned long flags;
+
+- if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
+- /* Collision - tell upper layer to requeue */
+- return NETDEV_TX_LOCKED;
+- }
++ spin_lock_irqsave(&tx_ring->tx_lock, flags);
++
+ if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
+ netif_stop_queue(netdev);
+ spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
+--- a/drivers/net/ethernet/tehuti/tehuti.c
++++ b/drivers/net/ethernet/tehuti/tehuti.c
+@@ -1630,13 +1630,8 @@ static netdev_tx_t bdx_tx_transmit(struc
+ unsigned long flags;
+
+ ENTER;
+- local_irq_save(flags);
+- if (!spin_trylock(&priv->tx_lock)) {
+- local_irq_restore(flags);
+- DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
+- BDX_DRV_NAME, ndev->name);
+- return NETDEV_TX_LOCKED;
+- }
++
++ spin_lock_irqsave(&priv->tx_lock, flags);
+
+ /* build tx descriptor */
+ BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
+--- a/drivers/net/rionet.c
++++ b/drivers/net/rionet.c
+@@ -174,11 +174,7 @@ static int rionet_start_xmit(struct sk_b
+ unsigned long flags;
+ int add_num = 1;
+
+- local_irq_save(flags);
+- if (!spin_trylock(&rnet->tx_lock)) {
+- local_irq_restore(flags);
+- return NETDEV_TX_LOCKED;
+- }
++ spin_lock_irqsave(&rnet->tx_lock, flags);
+
+ if (is_multicast_ether_addr(eth->h_dest))
+ add_num = nets[rnet->mport->id].nact;
diff --git a/patches/drivers-net-gianfar-make-rt-aware.patch b/patches/drivers-net-gianfar-make-rt-aware.patch
new file mode 100644
index 0000000..5303089
--- /dev/null
+++ b/patches/drivers-net-gianfar-make-rt-aware.patch
@@ -0,0 +1,55 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 1 Apr 2010 20:20:57 +0200
+Subject: drivers: net: gianfar: Make RT aware
+
+The adjust_link() disables interrupts before taking the queue
+locks. On RT those locks are converted to "sleeping" locks and
+therefor the local_irq_save/restore must be converted to
+local_irq_save/restore_nort.
+
+Reported-by: Xianghua Xiao <xiaoxianghua@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Xianghua Xiao <xiaoxianghua@gmail.com>
+
+---
+ drivers/net/ethernet/freescale/gianfar.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/gianfar.c
++++ b/drivers/net/ethernet/freescale/gianfar.c
+@@ -1663,7 +1663,7 @@ void stop_gfar(struct net_device *dev)
+
+
+ /* Lock it down */
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ lock_tx_qs(priv);
+ lock_rx_qs(priv);
+
+@@ -1671,7 +1671,7 @@ void stop_gfar(struct net_device *dev)
+
+ unlock_rx_qs(priv);
+ unlock_tx_qs(priv);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ /* Free the IRQs */
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+@@ -2951,7 +2951,7 @@ static void adjust_link(struct net_devic
+ struct phy_device *phydev = priv->phydev;
+ int new_state = 0;
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ lock_tx_qs(priv);
+
+ if (phydev->link) {
+@@ -3020,7 +3020,7 @@ static void adjust_link(struct net_devic
+ if (new_state && netif_msg_link(priv))
+ phy_print_status(phydev);
+ unlock_tx_qs(priv);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ }
+
+ /* Update the hash table based on the current list of multicast
diff --git a/patches/drivers-net-tulip-add-missing-pci-disable.patch b/patches/drivers-net-tulip-add-missing-pci-disable.patch
new file mode 100644
index 0000000..78beb2b
--- /dev/null
+++ b/patches/drivers-net-tulip-add-missing-pci-disable.patch
@@ -0,0 +1,23 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:18 -0500
+Subject: drivers/net: tulip_remove_one needs to call pci_disable_device()
+
+Otherwise the device is not completely shut down.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/net/ethernet/dec/tulip/tulip_core.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
++++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
+@@ -1943,6 +1943,7 @@ static void tulip_remove_one(struct pci_
+ pci_iounmap(pdev, tp->base_addr);
+ free_netdev (dev);
+ pci_release_regions (pdev);
++ pci_disable_device (pdev);
+ pci_set_drvdata (pdev, NULL);
+
+ /* pci_power_off (pdev, -1); */
diff --git a/patches/drivers-net-vortex-fix-locking-issues.patch b/patches/drivers-net-vortex-fix-locking-issues.patch
new file mode 100644
index 0000000..4dd3898
--- /dev/null
+++ b/patches/drivers-net-vortex-fix-locking-issues.patch
@@ -0,0 +1,48 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Fri, 3 Jul 2009 08:30:00 -0500
+Subject: drivers/net: vortex fix locking issues
+
+Argh, cut and paste wasn't enough...
+
+Use this patch instead. It needs an irq disable. But, believe it or not,
+on SMP this is actually better. If the irq is shared (as it is in Mark's
+case), we don't stop the irq of other devices from being handled on
+another CPU (unfortunately for Mark, he pinned all interrupts to one CPU).
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+ drivers/net/ethernet/3com/3c59x.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+--- a/drivers/net/ethernet/3com/3c59x.c
++++ b/drivers/net/ethernet/3com/3c59x.c
+@@ -843,9 +843,9 @@ static void poll_vortex(struct net_devic
+ {
+ struct vortex_private *vp = netdev_priv(dev);
+ unsigned long flags;
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ }
+ #endif
+
+@@ -1919,12 +1919,12 @@ static void vortex_tx_timeout(struct net
+ * Block interrupts because vortex_interrupt does a bare spin_lock()
+ */
+ unsigned long flags;
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ if (vp->full_bus_master_tx)
+ boomerang_interrupt(dev->irq, dev);
+ else
+ vortex_interrupt(dev->irq, dev);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ }
+ }
+
diff --git a/patches/drivers-random-reduce-preempt-disabled-region.patch b/patches/drivers-random-reduce-preempt-disabled-region.patch
new file mode 100644
index 0000000..7b85cec
--- /dev/null
+++ b/patches/drivers-random-reduce-preempt-disabled-region.patch
@@ -0,0 +1,39 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:30 -0500
+Subject: drivers: random: Reduce preempt disabled region
+
+No need to keep preemption disabled across the whole function.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/char/random.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -676,9 +676,12 @@ static void add_timer_randomness(struct
+ preempt_disable();
+ /* if over the trickle threshold, use only 1 in 4096 samples */
+ if (input_pool.entropy_count > trickle_thresh &&
+- ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff))
+- goto out;
++ ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff)) {
++ preempt_enable();
++ return;
++ }
+
++ preempt_enable();
+ sample.jiffies = jiffies;
+ sample.cycles = get_cycles();
+ sample.num = num;
+@@ -719,8 +722,6 @@ static void add_timer_randomness(struct
+ credit_entropy_bits(&input_pool,
+ min_t(int, fls(delta>>1), 11));
+ }
+-out:
+- preempt_enable();
+ }
+
+ void add_input_randomness(unsigned int type, unsigned int code,
diff --git a/patches/drivers-serial-call-flush_to_ldisc-when-the-irq-is-t.patch b/patches/drivers-serial-call-flush_to_ldisc-when-the-irq-is-t.patch
new file mode 100644
index 0000000..b8c297e
--- /dev/null
+++ b/patches/drivers-serial-call-flush_to_ldisc-when-the-irq-is-t.patch
@@ -0,0 +1,28 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:01 -0500
+Subject: serial: 8250: Call flush_to_ldisc when the irq is threaded
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+---
+ drivers/tty/tty_buffer.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/tty/tty_buffer.c
++++ b/drivers/tty/tty_buffer.c
+@@ -566,10 +566,15 @@ void tty_flip_buffer_push(struct tty_str
+ buf->tail->commit = buf->tail->used;
+ spin_unlock_irqrestore(&buf->lock, flags);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ if (tty->low_latency)
+ flush_to_ldisc(&buf->work);
+ else
+ schedule_work(&buf->work);
++#else
++ flush_to_ldisc(&buf->work);
++#endif
++
+ }
+ EXPORT_SYMBOL(tty_flip_buffer_push);
+
diff --git a/patches/drivers-serial-cleanup-locking-for-rt.patch b/patches/drivers-serial-cleanup-locking-for-rt.patch
new file mode 100644
index 0000000..0ad518e
--- /dev/null
+++ b/patches/drivers-serial-cleanup-locking-for-rt.patch
@@ -0,0 +1,42 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:01 -0500
+Subject: serial: 8250: Clean up the locking for -rt
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/tty/serial/8250/8250.c | 15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+--- a/drivers/tty/serial/8250/8250.c
++++ b/drivers/tty/serial/8250/8250.c
+@@ -2900,14 +2900,10 @@ serial8250_console_write(struct console
+
+ touch_nmi_watchdog();
+
+- local_irq_save(flags);
+- if (port->sysrq) {
+- /* serial8250_handle_irq() already took the lock */
+- locked = 0;
+- } else if (oops_in_progress) {
+- locked = spin_trylock(&port->lock);
+- } else
+- spin_lock(&port->lock);
++ if (port->sysrq || oops_in_progress)
++ locked = spin_trylock_irqsave(&port->lock, flags);
++ else
++ spin_lock_irqsave(&port->lock, flags);
+
+ /*
+ * First save the IER then disable the interrupts
+@@ -2939,8 +2935,7 @@ serial8250_console_write(struct console
+ serial8250_modem_status(up);
+
+ if (locked)
+- spin_unlock(&port->lock);
+- local_irq_restore(flags);
++ spin_unlock_irqrestore(&port->lock, flags);
+ }
+
+ static int __init serial8250_console_setup(struct console *co, char *options)
diff --git a/patches/drivers-tty-fix-omap-lock-crap.patch b/patches/drivers-tty-fix-omap-lock-crap.patch
new file mode 100644
index 0000000..03b070d
--- /dev/null
+++ b/patches/drivers-tty-fix-omap-lock-crap.patch
@@ -0,0 +1,38 @@
+Subject: drivers-tty-fix-omap-lock-crap.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 28 Jul 2011 13:32:57 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/tty/serial/omap-serial.c | 12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/drivers/tty/serial/omap-serial.c
++++ b/drivers/tty/serial/omap-serial.c
+@@ -1166,13 +1166,10 @@ serial_omap_console_write(struct console
+
+ pm_runtime_get_sync(up->dev);
+
+- local_irq_save(flags);
+- if (up->port.sysrq)
+- locked = 0;
+- else if (oops_in_progress)
+- locked = spin_trylock(&up->port.lock);
++ if (up->port.sysrq || oops_in_progress)
++ locked = spin_trylock_irqsave(&up->port.lock, flags);
+ else
+- spin_lock(&up->port.lock);
++ spin_lock_irqsave(&up->port.lock, flags);
+
+ /*
+ * First save the IER then disable the interrupts
+@@ -1201,8 +1198,7 @@ serial_omap_console_write(struct console
+ pm_runtime_mark_last_busy(up->dev);
+ pm_runtime_put_autosuspend(up->dev);
+ if (locked)
+- spin_unlock(&up->port.lock);
+- local_irq_restore(flags);
++ spin_unlock_irqrestore(&up->port.lock, flags);
+ }
+
+ static int __init
diff --git a/patches/drivers-tty-pl011-irq-disable-madness.patch b/patches/drivers-tty-pl011-irq-disable-madness.patch
new file mode 100644
index 0000000..0e648b0
--- /dev/null
+++ b/patches/drivers-tty-pl011-irq-disable-madness.patch
@@ -0,0 +1,44 @@
+Subject: drivers-tty-pl011-irq-disable-madness.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 08 Jan 2013 21:36:51 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/tty/serial/amba-pl011.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/drivers/tty/serial/amba-pl011.c
++++ b/drivers/tty/serial/amba-pl011.c
+@@ -1779,13 +1779,19 @@ pl011_console_write(struct console *co,
+
+ clk_enable(uap->clk);
+
+- local_irq_save(flags);
++ /*
++ * local_irq_save(flags);
++ *
++ * This local_irq_save() is nonsense. If we come in via sysrq
++ * handling then interrupts are already disabled. Aside of
++ * that the port.sysrq check is racy on SMP regardless.
++ */
+ if (uap->port.sysrq)
+ locked = 0;
+ else if (oops_in_progress)
+- locked = spin_trylock(&uap->port.lock);
++ locked = spin_trylock_irqsave(&uap->port.lock, flags);
+ else
+- spin_lock(&uap->port.lock);
++ spin_lock_irqsave(&uap->port.lock, flags);
+
+ /*
+ * First save the CR then disable the interrupts
+@@ -1807,8 +1813,7 @@ pl011_console_write(struct console *co,
+ writew(old_cr, uap->port.membase + UART011_CR);
+
+ if (locked)
+- spin_unlock(&uap->port.lock);
+- local_irq_restore(flags);
++ spin_unlock_irqrestore(&uap->port.lock, flags);
+
+ clk_disable(uap->clk);
+ }
diff --git a/patches/early-printk-consolidate.patch b/patches/early-printk-consolidate.patch
new file mode 100644
index 0000000..29d5d9e
--- /dev/null
+++ b/patches/early-printk-consolidate.patch
@@ -0,0 +1,485 @@
+Subject: early-printk-consolidate.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 23 Jul 2011 11:04:08 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/kernel/early_printk.c | 17 +++--------------
+ arch/blackfin/kernel/early_printk.c | 2 --
+ arch/microblaze/kernel/early_printk.c | 26 ++++----------------------
+ arch/mips/kernel/early_printk.c | 11 +++++------
+ arch/powerpc/kernel/udbg.c | 6 ++----
+ arch/sh/kernel/sh_bios.c | 2 --
+ arch/sparc/kernel/setup_32.c | 1 +
+ arch/sparc/kernel/setup_64.c | 8 +++++++-
+ arch/tile/kernel/early_printk.c | 27 +++++----------------------
+ arch/um/kernel/early_printk.c | 8 +++++---
+ arch/unicore32/kernel/early_printk.c | 12 ++++--------
+ arch/x86/kernel/early_printk.c | 21 ++-------------------
+ include/linux/console.h | 1 +
+ include/linux/printk.h | 6 ++++++
+ kernel/printk.c | 30 +++++++++++++++++++++++-------
+ 15 files changed, 68 insertions(+), 110 deletions(-)
+
+--- a/arch/arm/kernel/early_printk.c
++++ b/arch/arm/kernel/early_printk.c
+@@ -29,28 +29,17 @@ static void early_console_write(struct c
+ early_write(s, n);
+ }
+
+-static struct console early_console = {
++static struct console early_console_dev = {
+ .name = "earlycon",
+ .write = early_console_write,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
+ .index = -1,
+ };
+
+-asmlinkage void early_printk(const char *fmt, ...)
+-{
+- char buf[512];
+- int n;
+- va_list ap;
+-
+- va_start(ap, fmt);
+- n = vscnprintf(buf, sizeof(buf), fmt, ap);
+- early_write(buf, n);
+- va_end(ap);
+-}
+-
+ static int __init setup_early_printk(char *buf)
+ {
+- register_console(&early_console);
++ early_console = &early_console_dev;
++ register_console(&early_console_dev);
+ return 0;
+ }
+
+--- a/arch/blackfin/kernel/early_printk.c
++++ b/arch/blackfin/kernel/early_printk.c
+@@ -25,8 +25,6 @@ extern struct console *bfin_earlyserial_
+ extern struct console *bfin_jc_early_init(void);
+ #endif
+
+-static struct console *early_console;
+-
+ /* Default console */
+ #define DEFAULT_PORT 0
+ #define DEFAULT_CFLAG CS8|B57600
+--- a/arch/microblaze/kernel/early_printk.c
++++ b/arch/microblaze/kernel/early_printk.c
+@@ -21,7 +21,6 @@
+ #include <asm/setup.h>
+ #include <asm/prom.h>
+
+-static u32 early_console_initialized;
+ static u32 base_addr;
+
+ #ifdef CONFIG_SERIAL_UARTLITE_CONSOLE
+@@ -109,27 +108,11 @@ static struct console early_serial_uart1
+ };
+ #endif /* CONFIG_SERIAL_8250_CONSOLE */
+
+-static struct console *early_console;
+-
+-void early_printk(const char *fmt, ...)
+-{
+- char buf[512];
+- int n;
+- va_list ap;
+-
+- if (early_console_initialized) {
+- va_start(ap, fmt);
+- n = vscnprintf(buf, 512, fmt, ap);
+- early_console->write(early_console, buf, n);
+- va_end(ap);
+- }
+-}
+-
+ int __init setup_early_printk(char *opt)
+ {
+ int version = 0;
+
+- if (early_console_initialized)
++ if (early_console)
+ return 1;
+
+ base_addr = of_early_console(&version);
+@@ -159,7 +142,6 @@ int __init setup_early_printk(char *opt)
+ }
+
+ register_console(early_console);
+- early_console_initialized = 1;
+ return 0;
+ }
+ return 1;
+@@ -169,7 +151,7 @@ int __init setup_early_printk(char *opt)
+ * only for early console because of performance degression */
+ void __init remap_early_printk(void)
+ {
+- if (!early_console_initialized || !early_console)
++ if (!early_console)
+ return;
+ printk(KERN_INFO "early_printk_console remapping from 0x%x to ",
+ base_addr);
+@@ -195,9 +177,9 @@ void __init remap_early_printk(void)
+
+ void __init disable_early_printk(void)
+ {
+- if (!early_console_initialized || !early_console)
++ if (!early_console)
+ return;
+ printk(KERN_WARNING "disabling early console\n");
+ unregister_console(early_console);
+- early_console_initialized = 0;
++ early_console = NULL;
+ }
+--- a/arch/mips/kernel/early_printk.c
++++ b/arch/mips/kernel/early_printk.c
+@@ -8,6 +8,7 @@
+ * written by Ralf Baechle (ralf@linux-mips.org)
+ */
+ #include <linux/console.h>
++#include <linux/printk.h>
+ #include <linux/init.h>
+
+ #include <asm/setup.h>
+@@ -25,20 +26,18 @@ early_console_write(struct console *con,
+ }
+ }
+
+-static struct console early_console __initdata = {
++static struct console early_console_prom = {
+ .name = "early",
+ .write = early_console_write,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
+ .index = -1
+ };
+
+-static int early_console_initialized __initdata;
+-
+ void __init setup_early_printk(void)
+ {
+- if (early_console_initialized)
++ if (early_console)
+ return;
+- early_console_initialized = 1;
++ early_console = &early_console_prom;
+
+- register_console(&early_console);
++ register_console(&early_console_prom);
+ }
+--- a/arch/powerpc/kernel/udbg.c
++++ b/arch/powerpc/kernel/udbg.c
+@@ -156,15 +156,13 @@ static struct console udbg_console = {
+ .index = 0,
+ };
+
+-static int early_console_initialized;
+-
+ /*
+ * Called by setup_system after ppc_md->probe and ppc_md->early_init.
+ * Call it again after setting udbg_putc in ppc_md->setup_arch.
+ */
+ void __init register_early_udbg_console(void)
+ {
+- if (early_console_initialized)
++ if (early_console)
+ return;
+
+ if (!udbg_putc)
+@@ -174,7 +172,7 @@ void __init register_early_udbg_console(
+ printk(KERN_INFO "early console immortal !\n");
+ udbg_console.flags &= ~CON_BOOT;
+ }
+- early_console_initialized = 1;
++ early_console = &udbg_console;
+ register_console(&udbg_console);
+ }
+
+--- a/arch/sh/kernel/sh_bios.c
++++ b/arch/sh/kernel/sh_bios.c
+@@ -144,8 +144,6 @@ static struct console bios_console = {
+ .index = -1,
+ };
+
+-static struct console *early_console;
+-
+ static int __init setup_early_printk(char *buf)
+ {
+ int keep_early = 0;
+--- a/arch/sparc/kernel/setup_32.c
++++ b/arch/sparc/kernel/setup_32.c
+@@ -309,6 +309,7 @@ void __init setup_arch(char **cmdline_p)
+
+ boot_flags_init(*cmdline_p);
+
++ early_console = &prom_early_console;
+ register_console(&prom_early_console);
+
+ printk("ARCH: ");
+--- a/arch/sparc/kernel/setup_64.c
++++ b/arch/sparc/kernel/setup_64.c
+@@ -551,6 +551,12 @@ static void __init init_sparc64_elf_hwca
+ pause_patch();
+ }
+
++static inline void register_prom_console(void)
++{
++ early_console = &prom_early_console;
++ register_console(&prom_early_console);
++}
++
+ void __init setup_arch(char **cmdline_p)
+ {
+ /* Initialize PROM console and command line. */
+@@ -562,7 +568,7 @@ void __init setup_arch(char **cmdline_p)
+ #ifdef CONFIG_EARLYFB
+ if (btext_find_display())
+ #endif
+- register_console(&prom_early_console);
++ register_prom_console();
+
+ if (tlb_type == hypervisor)
+ printk("ARCH: SUN4V\n");
+--- a/arch/tile/kernel/early_printk.c
++++ b/arch/tile/kernel/early_printk.c
+@@ -17,6 +17,7 @@
+ #include <linux/init.h>
+ #include <linux/string.h>
+ #include <linux/irqflags.h>
++#include <linux/printk.h>
+ #include <asm/setup.h>
+ #include <hv/hypervisor.h>
+
+@@ -33,25 +34,8 @@ static struct console early_hv_console =
+ };
+
+ /* Direct interface for emergencies */
+-static struct console *early_console = &early_hv_console;
+-static int early_console_initialized;
+ static int early_console_complete;
+
+-static void early_vprintk(const char *fmt, va_list ap)
+-{
+- char buf[512];
+- int n = vscnprintf(buf, sizeof(buf), fmt, ap);
+- early_console->write(early_console, buf, n);
+-}
+-
+-void early_printk(const char *fmt, ...)
+-{
+- va_list ap;
+- va_start(ap, fmt);
+- early_vprintk(fmt, ap);
+- va_end(ap);
+-}
+-
+ void early_panic(const char *fmt, ...)
+ {
+ va_list ap;
+@@ -69,14 +53,13 @@ static int __initdata keep_early;
+
+ static int __init setup_early_printk(char *str)
+ {
+- if (early_console_initialized)
++ if (early_console)
+ return 1;
+
+ if (str != NULL && strncmp(str, "keep", 4) == 0)
+ keep_early = 1;
+
+ early_console = &early_hv_console;
+- early_console_initialized = 1;
+ register_console(early_console);
+
+ return 0;
+@@ -85,12 +68,12 @@ static int __init setup_early_printk(cha
+ void __init disable_early_printk(void)
+ {
+ early_console_complete = 1;
+- if (!early_console_initialized || !early_console)
++ if (!early_console)
+ return;
+ if (!keep_early) {
+ early_printk("disabling early console\n");
+ unregister_console(early_console);
+- early_console_initialized = 0;
++ early_console = NULL;
+ } else {
+ early_printk("keeping early console\n");
+ }
+@@ -98,7 +81,7 @@ void __init disable_early_printk(void)
+
+ void warn_early_printk(void)
+ {
+- if (early_console_complete || early_console_initialized)
++ if (early_console_complete || early_console)
+ return;
+ early_printk("\
+ Machine shutting down before console output is fully initialized.\n\
+--- a/arch/um/kernel/early_printk.c
++++ b/arch/um/kernel/early_printk.c
+@@ -16,7 +16,7 @@ static void early_console_write(struct c
+ um_early_printk(s, n);
+ }
+
+-static struct console early_console = {
++static struct console early_console_dev = {
+ .name = "earlycon",
+ .write = early_console_write,
+ .flags = CON_BOOT,
+@@ -25,8 +25,10 @@ static struct console early_console = {
+
+ static int __init setup_early_printk(char *buf)
+ {
+- register_console(&early_console);
+-
++ if (!early_console) {
++ early_console = &early_console_dev;
++ register_console(&early_console_dev);
++ }
+ return 0;
+ }
+
+--- a/arch/unicore32/kernel/early_printk.c
++++ b/arch/unicore32/kernel/early_printk.c
+@@ -33,21 +33,17 @@ static struct console early_ocd_console
+ .index = -1,
+ };
+
+-/* Direct interface for emergencies */
+-static struct console *early_console = &early_ocd_console;
+-
+-static int __initdata keep_early;
+-
+ static int __init setup_early_printk(char *buf)
+ {
+- if (!buf)
++ int keep_early;
++
++ if (!buf || early_console)
+ return 0;
+
+ if (strstr(buf, "keep"))
+ keep_early = 1;
+
+- if (!strncmp(buf, "ocd", 3))
+- early_console = &early_ocd_console;
++ early_console = &early_ocd_console;
+
+ if (keep_early)
+ early_console->flags &= ~CON_BOOT;
+--- a/arch/x86/kernel/early_printk.c
++++ b/arch/x86/kernel/early_printk.c
+@@ -169,25 +169,9 @@ static struct console early_serial_conso
+ .index = -1,
+ };
+
+-/* Direct interface for emergencies */
+-static struct console *early_console = &early_vga_console;
+-static int __initdata early_console_initialized;
+-
+-asmlinkage void early_printk(const char *fmt, ...)
+-{
+- char buf[512];
+- int n;
+- va_list ap;
+-
+- va_start(ap, fmt);
+- n = vscnprintf(buf, sizeof(buf), fmt, ap);
+- early_console->write(early_console, buf, n);
+- va_end(ap);
+-}
+-
+ static inline void early_console_register(struct console *con, int keep_early)
+ {
+- if (early_console->index != -1) {
++ if (con->index != -1) {
+ printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
+ con->name);
+ return;
+@@ -207,9 +191,8 @@ static int __init setup_early_printk(cha
+ if (!buf)
+ return 0;
+
+- if (early_console_initialized)
++ if (early_console)
+ return 0;
+- early_console_initialized = 1;
+
+ keep = (strstr(buf, "keep") != NULL);
+
+--- a/include/linux/console.h
++++ b/include/linux/console.h
+@@ -141,6 +141,7 @@ struct console {
+ for (con = console_drivers; con != NULL; con = con->next)
+
+ extern int console_set_on_cmdline;
++extern struct console *early_console;
+
+ extern int add_preferred_console(char *name, int idx, char *options);
+ extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
+--- a/include/linux/printk.h
++++ b/include/linux/printk.h
+@@ -95,8 +95,14 @@ int no_printk(const char *fmt, ...)
+ return 0;
+ }
+
++#ifdef CONFIG_EARLY_PRINTK
+ extern asmlinkage __printf(1, 2)
+ void early_printk(const char *fmt, ...);
++void early_vprintk(const char *fmt, va_list ap);
++#else
++static inline __printf(1, 2) __cold
++void early_printk(const char *s, ...) { }
++#endif
+
+ extern int printk_needs_cpu(int cpu);
+ extern void printk_tick(void);
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -48,13 +48,6 @@
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/printk.h>
+
+-/*
+- * Architectures can override it:
+- */
+-void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
+-{
+-}
+-
+ /* printk's without a loglevel use this.. */
+ #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+
+@@ -756,6 +749,29 @@ module_param(ignore_loglevel, bool, S_IR
+ MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
+ "print all kernel messages to the console.");
+
++#ifdef CONFIG_EARLY_PRINTK
++struct console *early_console;
++
++void early_vprintk(const char *fmt, va_list ap)
++{
++ if (early_console) {
++ char buf[512];
++ int n = vscnprintf(buf, sizeof(buf), fmt, ap);
++
++ early_console->write(early_console, buf, n);
++ }
++}
++
++asmlinkage void early_printk(const char *fmt, ...)
++{
++ va_list ap;
++
++ va_start(ap, fmt);
++ early_vprintk(fmt, ap);
++ va_end(ap);
++}
++#endif
++
+ #ifdef CONFIG_BOOT_PRINTK_DELAY
+
+ static int boot_delay; /* msecs delay after each printk during bootup */
diff --git a/patches/epoll-use-get-cpu-light.patch b/patches/epoll-use-get-cpu-light.patch
new file mode 100644
index 0000000..e419acc
--- /dev/null
+++ b/patches/epoll-use-get-cpu-light.patch
@@ -0,0 +1,26 @@
+Subject: epoll.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 08 Jul 2011 16:35:35 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ fs/eventpoll.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -497,12 +497,12 @@ static int ep_poll_wakeup_proc(void *pri
+ */
+ static void ep_poll_safewake(wait_queue_head_t *wq)
+ {
+- int this_cpu = get_cpu();
++ int this_cpu = get_cpu_light();
+
+ ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+ ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
+
+- put_cpu();
++ put_cpu_light();
+ }
+
+ static void ep_remove_wait_queue(struct eppoll_entry *pwq)
diff --git a/patches/filemap-fix-up.patch b/patches/filemap-fix-up.patch
new file mode 100644
index 0000000..26f78d7
--- /dev/null
+++ b/patches/filemap-fix-up.patch
@@ -0,0 +1,22 @@
+Subject: filemap-fix-up.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 17 Jun 2011 18:56:24 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Wrecked-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/n/tip-m6yuzd6ul717hlnl2gj6p3ou@git.kernel.org
+---
+ mm/filemap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -1955,7 +1955,7 @@ size_t iov_iter_copy_from_user_atomic(st
+ char *kaddr;
+ size_t copied;
+
+- BUG_ON(!in_atomic());
++ BUG_ON(!pagefault_disabled());
+ kaddr = kmap_atomic(page);
+ if (likely(i->nr_segs == 1)) {
+ int left;
diff --git a/patches/fix-1-2-slub-do-not-dereference-null-pointer-in-node_match.patch b/patches/fix-1-2-slub-do-not-dereference-null-pointer-in-node_match.patch
new file mode 100644
index 0000000..f4c3003
--- /dev/null
+++ b/patches/fix-1-2-slub-do-not-dereference-null-pointer-in-node_match.patch
@@ -0,0 +1,34 @@
+Subject: FIX [1/2] slub: Do not dereference NULL pointer in node_match
+From: Christoph Lameter <cl@linux.com>
+Date: Wed, 23 Jan 2013 21:45:47 +0000
+
+The variables accessed in slab_alloc are volatile and therefore
+the page pointer passed to node_match can be NULL. The processing
+of data in slab_alloc is tentative until either the cmpxhchg
+succeeds or the __slab_alloc slowpath is invoked. Both are
+able to perform the same allocation from the freelist.
+
+Check for the NULL pointer in node_match.
+
+A false positive will lead to a retry of the loop in __slab_alloc.
+
+Signed-off-by: Christoph Lameter <cl@linux.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Pekka Enberg <penberg@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/slub.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2041,7 +2041,7 @@ static void flush_all(struct kmem_cache
+ static inline int node_match(struct page *page, int node)
+ {
+ #ifdef CONFIG_NUMA
+- if (node != NUMA_NO_NODE && page_to_nid(page) != node)
++ if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
+ return 0;
+ #endif
+ return 1;
diff --git a/patches/fix-2-2-slub-tid-must-be-retrieved-from-the-percpu-area-of-the-current-processor.patch b/patches/fix-2-2-slub-tid-must-be-retrieved-from-the-percpu-area-of-the-current-processor.patch
new file mode 100644
index 0000000..0915b2a
--- /dev/null
+++ b/patches/fix-2-2-slub-tid-must-be-retrieved-from-the-percpu-area-of-the-current-processor.patch
@@ -0,0 +1,65 @@
+Subject: FIX [2/2] slub: Tid must be retrieved from the percpu area of the current processor
+From: Christoph Lameter <cl@linux.com>
+Date: Wed, 23 Jan 2013 21:45:48 +0000
+
+As Steven Rostedt has pointer out: Rescheduling could occur on a differnet processor
+after the determination of the per cpu pointer and before the tid is retrieved.
+This could result in allocation from the wrong node in slab_alloc.
+
+The effect is much more severe in slab_free() where we could free to the freelist
+of the wrong page.
+
+The window for something like that occurring is pretty small but it is possible.
+
+Signed-off-by: Christoph Lameter <cl@linux.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Pekka Enberg <penberg@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/slub.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2331,13 +2331,13 @@ static __always_inline void *slab_alloc_
+
+ s = memcg_kmem_get_cache(s, gfpflags);
+ redo:
+-
+ /*
+- * Must read kmem_cache cpu data via this cpu ptr. Preemption is
+- * enabled. We may switch back and forth between cpus while
+- * reading from one cpu area. That does not matter as long
+- * as we end up on the original cpu again when doing the cmpxchg.
++ * Preemption is disabled for the retrieval of the tid because that
++ * must occur from the current processor. We cannot allow rescheduling
++ * on a different processor between the determination of the pointer
++ * and the retrieval of the tid.
+ */
++ preempt_disable();
+ c = __this_cpu_ptr(s->cpu_slab);
+
+ /*
+@@ -2347,7 +2347,7 @@ redo:
+ * linked list in between.
+ */
+ tid = c->tid;
+- barrier();
++ preempt_enable();
+
+ object = c->freelist;
+ page = c->page;
+@@ -2594,10 +2594,11 @@ redo:
+ * data is retrieved via this pointer. If we are on the same cpu
+ * during the cmpxchg then the free will succedd.
+ */
++ preempt_disable();
+ c = __this_cpu_ptr(s->cpu_slab);
+
+ tid = c->tid;
+- barrier();
++ preempt_enable();
+
+ if (likely(page == c->page)) {
+ set_freepointer(s, object, c->freelist);
diff --git a/patches/fix-rq-3elock-vs-logbuf_lock-unlock-race.patch b/patches/fix-rq-3elock-vs-logbuf_lock-unlock-race.patch
new file mode 100644
index 0000000..1ed928e
--- /dev/null
+++ b/patches/fix-rq-3elock-vs-logbuf_lock-unlock-race.patch
@@ -0,0 +1,34 @@
+Subject: printk: Fix rq->lock vs logbuf_lock unlock lock inversion
+From: "Bu, Yitian" <ybu@qti.qualcomm.com>
+Date: Mon, 18 Feb 2013 12:53:37 +0000
+
+commit 07354eb1a74d1 ("locking printk: Annotate logbuf_lock as raw")
+reintroduced a lock inversion problem which was fixed in commit
+0b5e1c5255 ("printk: Release console_sem after logbuf_lock"). This
+happened probably when fixing up patch rejects.
+
+Restore the ordering and unlock logbuf_lock before releasing
+console_sem.
+
+Signed-off-by: ybu <ybu@qti.qualcomm.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/E807E903FE6CBE4D95E420FBFCC273B827413C@nasanexd01h.na.qualcomm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/printk.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -1358,9 +1358,9 @@ static int console_trylock_for_printk(un
+ }
+ }
+ logbuf_cpu = UINT_MAX;
++ raw_spin_unlock(&logbuf_lock);
+ if (wake)
+ up(&console_sem);
+- raw_spin_unlock(&logbuf_lock);
+ return retval;
+ }
+
diff --git a/patches/fix-rt-int3-x86_32-3.2-rt.patch b/patches/fix-rt-int3-x86_32-3.2-rt.patch
new file mode 100644
index 0000000..8f450e9
--- /dev/null
+++ b/patches/fix-rt-int3-x86_32-3.2-rt.patch
@@ -0,0 +1,112 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Subject: x86: Do not disable preemption in int3 on 32bit
+
+Preemption must be disabled before enabling interrupts in do_trap
+on x86_64 because the stack in use for int3 and debug is a per CPU
+stack set by th IST. But 32bit does not have an IST and the stack
+still belongs to the current task and there is no problem in scheduling
+out the task.
+
+Keep preemption enabled on X86_32 when enabling interrupts for
+do_trap().
+
+The name of the function is changed from preempt_conditional_sti/cli()
+to conditional_sti/cli_ist(), to annotate that this function is used
+when the stack is on the IST.
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/x86/kernel/traps.c | 32 +++++++++++++++++++++++---------
+ 1 file changed, 23 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -85,9 +85,21 @@ static inline void conditional_sti(struc
+ local_irq_enable();
+ }
+
+-static inline void preempt_conditional_sti(struct pt_regs *regs)
++static inline void conditional_sti_ist(struct pt_regs *regs)
+ {
++#ifdef CONFIG_X86_64
++ /*
++ * X86_64 uses a per CPU stack on the IST for certain traps
++ * like int3. The task can not be preempted when using one
++ * of these stacks, thus preemption must be disabled, otherwise
++ * the stack can be corrupted if the task is scheduled out,
++ * and another task comes in and uses this stack.
++ *
++ * On x86_32 the task keeps its own stack and it is OK if the
++ * task schedules out.
++ */
+ inc_preempt_count();
++#endif
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
+ }
+@@ -98,11 +110,13 @@ static inline void conditional_cli(struc
+ local_irq_disable();
+ }
+
+-static inline void preempt_conditional_cli(struct pt_regs *regs)
++static inline void conditional_cli_ist(struct pt_regs *regs)
+ {
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_disable();
++#ifdef CONFIG_X86_64
+ dec_preempt_count();
++#endif
+ }
+
+ static int __kprobes
+@@ -229,9 +243,9 @@ dotraplinkage void do_stack_segment(stru
+ exception_enter(regs);
+ if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
+ X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
+- preempt_conditional_sti(regs);
++ conditional_sti_ist(regs);
+ do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
+- preempt_conditional_cli(regs);
++ conditional_cli_ist(regs);
+ }
+ exception_exit(regs);
+ }
+@@ -331,9 +345,9 @@ dotraplinkage void __kprobes notrace do_
+ * as we may switch to the interrupt stack.
+ */
+ debug_stack_usage_inc();
+- preempt_conditional_sti(regs);
++ conditional_sti_ist(regs);
+ do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
+- preempt_conditional_cli(regs);
++ conditional_cli_ist(regs);
+ debug_stack_usage_dec();
+ exit:
+ exception_exit(regs);
+@@ -438,12 +452,12 @@ dotraplinkage void __kprobes do_debug(st
+ debug_stack_usage_inc();
+
+ /* It's safe to allow irq's after DR6 has been saved */
+- preempt_conditional_sti(regs);
++ conditional_sti_ist(regs);
+
+ if (regs->flags & X86_VM_MASK) {
+ handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
+ X86_TRAP_DB);
+- preempt_conditional_cli(regs);
++ conditional_cli_ist(regs);
+ debug_stack_usage_dec();
+ goto exit;
+ }
+@@ -463,7 +477,7 @@ dotraplinkage void __kprobes do_debug(st
+ si_code = get_si_code(tsk->thread.debugreg6);
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
+ send_sigtrap(tsk, regs, error_code, si_code);
+- preempt_conditional_cli(regs);
++ conditional_cli_ist(regs);
+ debug_stack_usage_dec();
+
+ exit:
diff --git a/patches/fs-block-rt-support.patch b/patches/fs-block-rt-support.patch
new file mode 100644
index 0000000..9246db6
--- /dev/null
+++ b/patches/fs-block-rt-support.patch
@@ -0,0 +1,40 @@
+Subject: fs-block-rt-support.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 14 Jun 2011 17:05:09 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ block/blk-core.c | 2 +-
+ fs/file.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -241,7 +241,7 @@ EXPORT_SYMBOL(blk_delay_queue);
+ **/
+ void blk_start_queue(struct request_queue *q)
+ {
+- WARN_ON(!irqs_disabled());
++ WARN_ON_NONRT(!irqs_disabled());
+
+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+ __blk_run_queue(q);
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -98,14 +98,14 @@ static void free_fdtable_rcu(struct rcu_
+ kfree(fdt->open_fds);
+ kfree(fdt);
+ } else {
+- fddef = &get_cpu_var(fdtable_defer_list);
++ fddef = &per_cpu(fdtable_defer_list, get_cpu_light());
+ spin_lock(&fddef->lock);
+ fdt->next = fddef->next;
+ fddef->next = fdt;
+ /* vmallocs are handled from the workqueue context */
+ schedule_work(&fddef->wq);
+ spin_unlock(&fddef->lock);
+- put_cpu_var(fdtable_defer_list);
++ put_cpu_light();
+ }
+ }
+
diff --git a/patches/fs-dcache-use-cpu-chill-in-trylock-loops.patch b/patches/fs-dcache-use-cpu-chill-in-trylock-loops.patch
new file mode 100644
index 0000000..d6228dc
--- /dev/null
+++ b/patches/fs-dcache-use-cpu-chill-in-trylock-loops.patch
@@ -0,0 +1,94 @@
+Subject: fs: dcache: Use cpu_chill() in trylock loops
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 07 Mar 2012 21:00:34 +0100
+
+Retry loops on RT might loop forever when the modifying side was
+preempted. Use cpu_chill() instead of cpu_relax() to let the system
+make progress.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ fs/autofs4/autofs_i.h | 1 +
+ fs/autofs4/expire.c | 2 +-
+ fs/dcache.c | 7 ++++---
+ fs/namespace.c | 3 ++-
+ 4 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/fs/autofs4/autofs_i.h
++++ b/fs/autofs4/autofs_i.h
+@@ -34,6 +34,7 @@
+ #include <linux/sched.h>
+ #include <linux/mount.h>
+ #include <linux/namei.h>
++#include <linux/delay.h>
+ #include <asm/current.h>
+ #include <asm/uaccess.h>
+
+--- a/fs/autofs4/expire.c
++++ b/fs/autofs4/expire.c
+@@ -166,7 +166,7 @@ again:
+ parent = p->d_parent;
+ if (!spin_trylock(&parent->d_lock)) {
+ spin_unlock(&p->d_lock);
+- cpu_relax();
++ cpu_chill();
+ goto relock;
+ }
+ spin_unlock(&p->d_lock);
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -37,6 +37,7 @@
+ #include <linux/rculist_bl.h>
+ #include <linux/prefetch.h>
+ #include <linux/ratelimit.h>
++#include <linux/delay.h>
+ #include "internal.h"
+ #include "mount.h"
+
+@@ -470,7 +471,7 @@ static inline struct dentry *dentry_kill
+ if (inode && !spin_trylock(&inode->i_lock)) {
+ relock:
+ spin_unlock(&dentry->d_lock);
+- cpu_relax();
++ cpu_chill();
+ return dentry; /* try again with same dentry */
+ }
+ if (IS_ROOT(dentry))
+@@ -852,7 +853,7 @@ relock:
+
+ if (!spin_trylock(&dentry->d_lock)) {
+ spin_unlock(&dcache_lru_lock);
+- cpu_relax();
++ cpu_chill();
+ goto relock;
+ }
+
+@@ -2084,7 +2085,7 @@ again:
+ if (dentry->d_count == 1) {
+ if (!spin_trylock(&inode->i_lock)) {
+ spin_unlock(&dentry->d_lock);
+- cpu_relax();
++ cpu_chill();
+ goto again;
+ }
+ dentry->d_flags &= ~DCACHE_CANT_MOUNT;
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -22,6 +22,7 @@
+ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
+ #include <linux/uaccess.h>
+ #include <linux/proc_fs.h>
++#include <linux/delay.h>
+ #include "pnode.h"
+ #include "internal.h"
+
+@@ -315,7 +316,7 @@ int __mnt_want_write(struct vfsmount *m)
+ smp_mb();
+ while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+ preempt_enable();
+- cpu_relax();
++ cpu_chill();
+ preempt_disable();
+ }
+ /*
diff --git a/patches/fs-jbd-pull-plug-when-waiting-for-space.patch b/patches/fs-jbd-pull-plug-when-waiting-for-space.patch
new file mode 100644
index 0000000..0dd4c95
--- /dev/null
+++ b/patches/fs-jbd-pull-plug-when-waiting-for-space.patch
@@ -0,0 +1,29 @@
+From: Mike Galbraith <mgalbraith@suse.de>
+Date: Wed, 11 Jul 2012 22:05:20 +0000
+Subject: fs, jbd: pull your plug when waiting for space
+
+With an -rt kernel, and a heavy sync IO load, tasks can jam
+up on journal locks without unplugging, which can lead to
+terminal IO starvation. Unplug and schedule when waiting for space.
+
+Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Theodore Tso <tytso@mit.edu>
+Link: http://lkml.kernel.org/r/1341812414.7370.73.camel@marge.simpson.net
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ fs/jbd/checkpoint.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/jbd/checkpoint.c
++++ b/fs/jbd/checkpoint.c
+@@ -129,6 +129,8 @@ void __log_wait_for_space(journal_t *jou
+ if (journal->j_flags & JFS_ABORT)
+ return;
+ spin_unlock(&journal->j_state_lock);
++ if (current->plug)
++ io_schedule();
+ mutex_lock(&journal->j_checkpoint_mutex);
+
+ /*
diff --git a/patches/fs-jbd-replace-bh_state-lock.patch b/patches/fs-jbd-replace-bh_state-lock.patch
new file mode 100644
index 0000000..b7b48ce
--- /dev/null
+++ b/patches/fs-jbd-replace-bh_state-lock.patch
@@ -0,0 +1,100 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Mar 2011 10:11:25 +0100
+Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe
+
+bit_spin_locks break under RT.
+
+Based on a previous patch from Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+--
+
+ include/linux/buffer_head.h | 10 ++++++++++
+ include/linux/jbd_common.h | 24 ++++++++++++++++++++++++
+ 2 files changed, 34 insertions(+)
+
+--- a/include/linux/buffer_head.h
++++ b/include/linux/buffer_head.h
+@@ -74,6 +74,11 @@ struct buffer_head {
+ atomic_t b_count; /* users using this buffer_head */
+ #ifdef CONFIG_PREEMPT_RT_BASE
+ spinlock_t b_uptodate_lock;
++#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
++ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
++ spinlock_t b_state_lock;
++ spinlock_t b_journal_head_lock;
++#endif
+ #endif
+ };
+
+@@ -105,6 +110,11 @@ static inline void buffer_head_init_lock
+ {
+ #ifdef CONFIG_PREEMPT_RT_BASE
+ spin_lock_init(&bh->b_uptodate_lock);
++#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
++ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
++ spin_lock_init(&bh->b_state_lock);
++ spin_lock_init(&bh->b_journal_head_lock);
++#endif
+ #endif
+ }
+
+--- a/include/linux/jbd_common.h
++++ b/include/linux/jbd_common.h
+@@ -39,32 +39,56 @@ static inline struct journal_head *bh2jh
+
+ static inline void jbd_lock_bh_state(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ bit_spin_lock(BH_State, &bh->b_state);
++#else
++ spin_lock(&bh->b_state_lock);
++#endif
+ }
+
+ static inline int jbd_trylock_bh_state(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ return bit_spin_trylock(BH_State, &bh->b_state);
++#else
++ return spin_trylock(&bh->b_state_lock);
++#endif
+ }
+
+ static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ return bit_spin_is_locked(BH_State, &bh->b_state);
++#else
++ return spin_is_locked(&bh->b_state_lock);
++#endif
+ }
+
+ static inline void jbd_unlock_bh_state(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ bit_spin_unlock(BH_State, &bh->b_state);
++#else
++ spin_unlock(&bh->b_state_lock);
++#endif
+ }
+
+ static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ bit_spin_lock(BH_JournalHead, &bh->b_state);
++#else
++ spin_lock(&bh->b_journal_head_lock);
++#endif
+ }
+
+ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ bit_spin_unlock(BH_JournalHead, &bh->b_state);
++#else
++ spin_unlock(&bh->b_journal_head_lock);
++#endif
+ }
+
+ #endif
diff --git a/patches/fs-namespace-preemption-fix.patch b/patches/fs-namespace-preemption-fix.patch
new file mode 100644
index 0000000..04805b8
--- /dev/null
+++ b/patches/fs-namespace-preemption-fix.patch
@@ -0,0 +1,30 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 19 Jul 2009 08:44:27 -0500
+Subject: fs: namespace preemption fix
+
+On RT we cannot loop with preemption disabled here as
+mnt_make_readonly() might have been preempted. We can safely enable
+preemption while waiting for MNT_WRITE_HOLD to be cleared. Safe on !RT
+as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ fs/namespace.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -313,8 +313,11 @@ int __mnt_want_write(struct vfsmount *m)
+ * incremented count after it has set MNT_WRITE_HOLD.
+ */
+ smp_mb();
+- while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
++ while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
++ preempt_enable();
+ cpu_relax();
++ preempt_disable();
++ }
+ /*
+ * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+ * be set to match its requirements. So we must not load that until
diff --git a/patches/fs-ntfs-disable-interrupt-non-rt.patch b/patches/fs-ntfs-disable-interrupt-non-rt.patch
new file mode 100644
index 0000000..f06ad29
--- /dev/null
+++ b/patches/fs-ntfs-disable-interrupt-non-rt.patch
@@ -0,0 +1,59 @@
+From: Mike Galbraith <efault@gmx.de>
+Date: Fri, 3 Jul 2009 08:44:12 -0500
+Subject: fs: ntfs: disable interrupt only on !RT
+
+On Sat, 2007-10-27 at 11:44 +0200, Ingo Molnar wrote:
+> * Nick Piggin <nickpiggin@yahoo.com.au> wrote:
+>
+> > > [10138.175796] [<c0105de3>] show_trace+0x12/0x14
+> > > [10138.180291] [<c0105dfb>] dump_stack+0x16/0x18
+> > > [10138.184769] [<c011609f>] native_smp_call_function_mask+0x138/0x13d
+> > > [10138.191117] [<c0117606>] smp_call_function+0x1e/0x24
+> > > [10138.196210] [<c012f85c>] on_each_cpu+0x25/0x50
+> > > [10138.200807] [<c0115c74>] flush_tlb_all+0x1e/0x20
+> > > [10138.205553] [<c016caaf>] kmap_high+0x1b6/0x417
+> > > [10138.210118] [<c011ec88>] kmap+0x4d/0x4f
+> > > [10138.214102] [<c026a9d8>] ntfs_end_buffer_async_read+0x228/0x2f9
+> > > [10138.220163] [<c01a0e9e>] end_bio_bh_io_sync+0x26/0x3f
+> > > [10138.225352] [<c01a2b09>] bio_endio+0x42/0x6d
+> > > [10138.229769] [<c02c2a08>] __end_that_request_first+0x115/0x4ac
+> > > [10138.235682] [<c02c2da7>] end_that_request_chunk+0x8/0xa
+> > > [10138.241052] [<c0365943>] ide_end_request+0x55/0x10a
+> > > [10138.246058] [<c036dae3>] ide_dma_intr+0x6f/0xac
+> > > [10138.250727] [<c0366d83>] ide_intr+0x93/0x1e0
+> > > [10138.255125] [<c015afb4>] handle_IRQ_event+0x5c/0xc9
+> >
+> > Looks like ntfs is kmap()ing from interrupt context. Should be using
+> > kmap_atomic instead, I think.
+>
+> it's not atomic interrupt context but irq thread context - and -rt
+> remaps kmap_atomic() to kmap() internally.
+
+Hm. Looking at the change to mm/bounce.c, perhaps I should do this
+instead?
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ fs/ntfs/aops.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ntfs/aops.c
++++ b/fs/ntfs/aops.c
+@@ -144,13 +144,13 @@ static void ntfs_end_buffer_async_read(s
+ recs = PAGE_CACHE_SIZE / rec_size;
+ /* Should have been verified before we got here... */
+ BUG_ON(!recs);
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ kaddr = kmap_atomic(page);
+ for (i = 0; i < recs; i++)
+ post_read_mst_fixup((NTFS_RECORD*)(kaddr +
+ i * rec_size), rec_size);
+ kunmap_atomic(kaddr);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ flush_dcache_page(page);
+ if (likely(page_uptodate && !PageError(page)))
+ SetPageUptodate(page);
diff --git a/patches/fs-replace-bh_uptodate_lock-for-rt.patch b/patches/fs-replace-bh_uptodate_lock-for-rt.patch
new file mode 100644
index 0000000..466a0a3
--- /dev/null
+++ b/patches/fs-replace-bh_uptodate_lock-for-rt.patch
@@ -0,0 +1,161 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Mar 2011 09:18:52 +0100
+Subject: buffer_head: Replace bh_uptodate_lock for -rt
+
+Wrap the bit_spin_lock calls into a separate inline and add the RT
+replacements with a real spinlock.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ fs/buffer.c | 21 +++++++--------------
+ fs/ntfs/aops.c | 10 +++-------
+ include/linux/buffer_head.h | 34 ++++++++++++++++++++++++++++++++++
+ 3 files changed, 44 insertions(+), 21 deletions(-)
+
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -280,8 +280,7 @@ static void end_buffer_async_read(struct
+ * decide that the page is now completely done.
+ */
+ first = page_buffers(page);
+- local_irq_save(flags);
+- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++ flags = bh_uptodate_lock_irqsave(first);
+ clear_buffer_async_read(bh);
+ unlock_buffer(bh);
+ tmp = bh;
+@@ -294,8 +293,7 @@ static void end_buffer_async_read(struct
+ }
+ tmp = tmp->b_this_page;
+ } while (tmp != bh);
+- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+- local_irq_restore(flags);
++ bh_uptodate_unlock_irqrestore(first, flags);
+
+ /*
+ * If none of the buffers had errors and they are all
+@@ -307,9 +305,7 @@ static void end_buffer_async_read(struct
+ return;
+
+ still_busy:
+- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+- local_irq_restore(flags);
+- return;
++ bh_uptodate_unlock_irqrestore(first, flags);
+ }
+
+ /*
+@@ -343,8 +339,7 @@ void end_buffer_async_write(struct buffe
+ }
+
+ first = page_buffers(page);
+- local_irq_save(flags);
+- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++ flags = bh_uptodate_lock_irqsave(first);
+
+ clear_buffer_async_write(bh);
+ unlock_buffer(bh);
+@@ -356,15 +351,12 @@ void end_buffer_async_write(struct buffe
+ }
+ tmp = tmp->b_this_page;
+ }
+- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+- local_irq_restore(flags);
++ bh_uptodate_unlock_irqrestore(first, flags);
+ end_page_writeback(page);
+ return;
+
+ still_busy:
+- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+- local_irq_restore(flags);
+- return;
++ bh_uptodate_unlock_irqrestore(first, flags);
+ }
+ EXPORT_SYMBOL(end_buffer_async_write);
+
+@@ -3256,6 +3248,7 @@ struct buffer_head *alloc_buffer_head(gf
+ struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
+ if (ret) {
+ INIT_LIST_HEAD(&ret->b_assoc_buffers);
++ buffer_head_init_locks(ret);
+ preempt_disable();
+ __this_cpu_inc(bh_accounting.nr);
+ recalc_bh_state();
+--- a/fs/ntfs/aops.c
++++ b/fs/ntfs/aops.c
+@@ -108,8 +108,7 @@ static void ntfs_end_buffer_async_read(s
+ "0x%llx.", (unsigned long long)bh->b_blocknr);
+ }
+ first = page_buffers(page);
+- local_irq_save(flags);
+- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++ flags = bh_uptodate_lock_irqsave(first);
+ clear_buffer_async_read(bh);
+ unlock_buffer(bh);
+ tmp = bh;
+@@ -124,8 +123,7 @@ static void ntfs_end_buffer_async_read(s
+ }
+ tmp = tmp->b_this_page;
+ } while (tmp != bh);
+- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+- local_irq_restore(flags);
++ bh_uptodate_unlock_irqrestore(first, flags);
+ /*
+ * If none of the buffers had errors then we can set the page uptodate,
+ * but we first have to perform the post read mst fixups, if the
+@@ -160,9 +158,7 @@ static void ntfs_end_buffer_async_read(s
+ unlock_page(page);
+ return;
+ still_busy:
+- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+- local_irq_restore(flags);
+- return;
++ bh_uptodate_unlock_irqrestore(first, flags);
+ }
+
+ /**
+--- a/include/linux/buffer_head.h
++++ b/include/linux/buffer_head.h
+@@ -72,8 +72,42 @@ struct buffer_head {
+ struct address_space *b_assoc_map; /* mapping this buffer is
+ associated with */
+ atomic_t b_count; /* users using this buffer_head */
++#ifdef CONFIG_PREEMPT_RT_BASE
++ spinlock_t b_uptodate_lock;
++#endif
+ };
+
++static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
++{
++ unsigned long flags;
++
++#ifndef CONFIG_PREEMPT_RT_BASE
++ local_irq_save(flags);
++ bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
++#else
++ spin_lock_irqsave(&bh->b_uptodate_lock, flags);
++#endif
++ return flags;
++}
++
++static inline void
++bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
++{
++#ifndef CONFIG_PREEMPT_RT_BASE
++ bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
++ local_irq_restore(flags);
++#else
++ spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
++#endif
++}
++
++static inline void buffer_head_init_locks(struct buffer_head *bh)
++{
++#ifdef CONFIG_PREEMPT_RT_BASE
++ spin_lock_init(&bh->b_uptodate_lock);
++#endif
++}
++
+ /*
+ * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
+ * and buffer_foo() functions.
diff --git a/patches/ftrace-migrate-disable-tracing.patch b/patches/ftrace-migrate-disable-tracing.patch
new file mode 100644
index 0000000..a85247d
--- /dev/null
+++ b/patches/ftrace-migrate-disable-tracing.patch
@@ -0,0 +1,73 @@
+Subject: ftrace-migrate-disable-tracing.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:56:42 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/ftrace_event.h | 3 ++-
+ kernel/trace/trace.c | 9 ++++++---
+ kernel/trace/trace_events.c | 1 +
+ kernel/trace/trace_output.c | 5 +++++
+ 4 files changed, 14 insertions(+), 4 deletions(-)
+
+--- a/include/linux/ftrace_event.h
++++ b/include/linux/ftrace_event.h
+@@ -49,7 +49,8 @@ struct trace_entry {
+ unsigned char flags;
+ unsigned char preempt_count;
+ int pid;
+- int padding;
++ unsigned short migrate_disable;
++ unsigned short padding;
+ };
+
+ #define FTRACE_MAX_EVENT \
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -1177,6 +1177,8 @@ tracing_generic_entry_update(struct trac
+ ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
+ ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
+ (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
++
++ entry->migrate_disable = (tsk) ? tsk->migrate_disable & 0xFF : 0;
+ }
+ EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
+
+@@ -2034,9 +2036,10 @@ static void print_lat_help_header(struct
+ seq_puts(m, "# | / _----=> need-resched \n");
+ seq_puts(m, "# || / _---=> hardirq/softirq \n");
+ seq_puts(m, "# ||| / _--=> preempt-depth \n");
+- seq_puts(m, "# |||| / delay \n");
+- seq_puts(m, "# cmd pid ||||| time | caller \n");
+- seq_puts(m, "# \\ / ||||| \\ | / \n");
++ seq_puts(m, "# |||| / _--=> migrate-disable\n");
++ seq_puts(m, "# ||||| / delay \n");
++ seq_puts(m, "# cmd pid |||||| time | caller \n");
++ seq_puts(m, "# \\ / ||||| \\ | / \n");
+ }
+
+ static void print_event_info(struct trace_array *tr, struct seq_file *m)
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -116,6 +116,7 @@ static int trace_define_common_fields(vo
+ __common_field(unsigned char, flags);
+ __common_field(unsigned char, preempt_count);
+ __common_field(int, pid);
++ __common_field(unsigned short, migrate_disable);
+ __common_field(int, padding);
+
+ return ret;
+--- a/kernel/trace/trace_output.c
++++ b/kernel/trace/trace_output.c
+@@ -593,6 +593,11 @@ int trace_print_lat_fmt(struct trace_seq
+ else
+ ret = trace_seq_putc(s, '.');
+
++ if (entry->migrate_disable)
++ ret = trace_seq_printf(s, "%x", entry->migrate_disable);
++ else
++ ret = trace_seq_putc(s, '.');
++
+ return ret;
+ }
+
diff --git a/patches/futex-requeue-pi-fix.patch b/patches/futex-requeue-pi-fix.patch
new file mode 100644
index 0000000..9e8e808
--- /dev/null
+++ b/patches/futex-requeue-pi-fix.patch
@@ -0,0 +1,114 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Subject: futex: Fix bug on when a requeued RT task times out
+
+Requeue with timeout causes a bug with PREEMPT_RT_FULL.
+
+The bug comes from a timed out condition.
+
+
+ TASK 1 TASK 2
+ ------ ------
+ futex_wait_requeue_pi()
+ futex_wait_queue_me()
+ <timed out>
+
+ double_lock_hb();
+
+ raw_spin_lock(pi_lock);
+ if (current->pi_blocked_on) {
+ } else {
+ current->pi_blocked_on = PI_WAKE_INPROGRESS;
+ run_spin_unlock(pi_lock);
+ spin_lock(hb->lock); <-- blocked!
+
+
+ plist_for_each_entry_safe(this) {
+ rt_mutex_start_proxy_lock();
+ task_blocks_on_rt_mutex();
+ BUG_ON(task->pi_blocked_on)!!!!
+
+The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the
+problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to
+grab the hb->lock, which it fails to do so. As the hb->lock is a mutex,
+it will block and set the "pi_blocked_on" to the hb->lock.
+
+When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails
+because the task1's pi_blocked_on is no longer set to that, but instead,
+set to the hb->lock.
+
+The fix:
+
+When calling rt_mutex_start_proxy_lock() a check is made to see
+if the proxy tasks pi_blocked_on is set. If so, exit out early.
+Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies
+the proxy task that it is being requeued, and will handle things
+appropriately.
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+
+---
+ kernel/rtmutex.c | 32 +++++++++++++++++++++++++++++++-
+ kernel/rtmutex_common.h | 1 +
+ 2 files changed, 32 insertions(+), 1 deletion(-)
+
+--- a/kernel/rtmutex.c
++++ b/kernel/rtmutex.c
+@@ -69,7 +69,8 @@ static void fixup_rt_mutex_waiters(struc
+
+ static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
+ {
+- return waiter && waiter != PI_WAKEUP_INPROGRESS;
++ return waiter && waiter != PI_WAKEUP_INPROGRESS &&
++ waiter != PI_REQUEUE_INPROGRESS;
+ }
+
+ /*
+@@ -981,6 +982,35 @@ int rt_mutex_start_proxy_lock(struct rt_
+ return 1;
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++ /*
++ * In PREEMPT_RT there's an added race.
++ * If the task, that we are about to requeue, times out,
++ * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
++ * to skip this task. But right after the task sets
++ * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
++ * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
++ * This will replace the PI_WAKEUP_INPROGRESS with the actual
++ * lock that it blocks on. We *must not* place this task
++ * on this proxy lock in that case.
++ *
++ * To prevent this race, we first take the task's pi_lock
++ * and check if it has updated its pi_blocked_on. If it has,
++ * we assume that it woke up and we return -EAGAIN.
++ * Otherwise, we set the task's pi_blocked_on to
++ * PI_REQUEUE_INPROGRESS, so that if the task is waking up
++ * it will know that we are in the process of requeuing it.
++ */
++ raw_spin_lock_irq(&task->pi_lock);
++ if (task->pi_blocked_on) {
++ raw_spin_unlock_irq(&task->pi_lock);
++ raw_spin_unlock(&lock->wait_lock);
++ return -EAGAIN;
++ }
++ task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
++ raw_spin_unlock_irq(&task->pi_lock);
++#endif
++
+ ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
+
+ if (ret && !rt_mutex_owner(lock)) {
+--- a/kernel/rtmutex_common.h
++++ b/kernel/rtmutex_common.h
+@@ -104,6 +104,7 @@ static inline struct task_struct *rt_mut
+ * PI-futex support (proxy locking functions, etc.):
+ */
+ #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
++#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
+
+ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
diff --git a/patches/generic-cmpxchg-use-raw-local-irq.patch b/patches/generic-cmpxchg-use-raw-local-irq.patch
new file mode 100644
index 0000000..12d82a2
--- /dev/null
+++ b/patches/generic-cmpxchg-use-raw-local-irq.patch
@@ -0,0 +1,47 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:30 -0500
+Subject: generic: Use raw local irq variant for generic cmpxchg
+
+No point in tracing those.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/asm-generic/cmpxchg-local.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/include/asm-generic/cmpxchg-local.h
++++ b/include/asm-generic/cmpxchg-local.h
+@@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_lo
+ if (size == 8 && sizeof(unsigned long) != 8)
+ wrong_size_cmpxchg(ptr);
+
+- local_irq_save(flags);
++ raw_local_irq_save(flags);
+ switch (size) {
+ case 1: prev = *(u8 *)ptr;
+ if (prev == old)
+@@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_lo
+ default:
+ wrong_size_cmpxchg(ptr);
+ }
+- local_irq_restore(flags);
++ raw_local_irq_restore(flags);
+ return prev;
+ }
+
+@@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_gene
+ u64 prev;
+ unsigned long flags;
+
+- local_irq_save(flags);
++ raw_local_irq_save(flags);
+ prev = *(u64 *)ptr;
+ if (prev == old)
+ *(u64 *)ptr = new;
+- local_irq_restore(flags);
++ raw_local_irq_restore(flags);
+ return prev;
+ }
+
diff --git a/patches/genirq-add-default-mask-cmdline-option.patch b/patches/genirq-add-default-mask-cmdline-option.patch
new file mode 100644
index 0000000..d452bf2
--- /dev/null
+++ b/patches/genirq-add-default-mask-cmdline-option.patch
@@ -0,0 +1,66 @@
+Subject: genirq: Add default affinity mask command line option
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 25 May 2012 16:59:47 +0200
+
+If we isolate CPUs, then we don't want random device interrupts on
+them. Even w/o the user space irq balancer enabled we can end up with
+irqs on non boot cpus.
+
+Allow to restrict the default irq affinity mask.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ Documentation/kernel-parameters.txt | 9 +++++++++
+ kernel/irq/irqdesc.c | 21 +++++++++++++++++++--
+ 2 files changed, 28 insertions(+), 2 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -1182,6 +1182,15 @@ bytes respectively. Such letter suffixes
+ See comment before ip2_setup() in
+ drivers/char/ip2/ip2base.c.
+
++ irqaffinity= [SMP] Set the default irq affinity mask
++ Format:
++ <cpu number>,...,<cpu number>
++ or
++ <cpu number>-<cpu number>
++ (must be a positive range in ascending order)
++ or a mixture
++ <cpu number>,...,<cpu number>-<cpu number>
++
+ irqfixup [HW]
+ When an interrupt is not handled search all handlers
+ for it. Intended to get systems with badly broken
+--- a/kernel/irq/irqdesc.c
++++ b/kernel/irq/irqdesc.c
+@@ -23,10 +23,27 @@
+ static struct lock_class_key irq_desc_lock_class;
+
+ #if defined(CONFIG_SMP)
++static int __init irq_affinity_setup(char *str)
++{
++ zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
++ cpulist_parse(str, irq_default_affinity);
++ /*
++ * Set at least the boot cpu. We don't want to end up with
++ * bugreports caused by random comandline masks
++ */
++ cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
++ return 1;
++}
++__setup("irqaffinity=", irq_affinity_setup);
++
+ static void __init init_irq_default_affinity(void)
+ {
+- alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+- cpumask_setall(irq_default_affinity);
++#ifdef CONFIG_CPUMASK_OFFSTACK
++ if (!irq_default_affinity)
++ zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
++#endif
++ if (cpumask_empty(irq_default_affinity))
++ cpumask_setall(irq_default_affinity);
+ }
+ #else
+ static void __init init_irq_default_affinity(void)
diff --git a/patches/genirq-disable-irqpoll-on-rt.patch b/patches/genirq-disable-irqpoll-on-rt.patch
new file mode 100644
index 0000000..b9072ea
--- /dev/null
+++ b/patches/genirq-disable-irqpoll-on-rt.patch
@@ -0,0 +1,39 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:57 -0500
+Subject: genirq: disable irqpoll on -rt
+
+Creates long latencies for no value
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/irq/spurious.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/kernel/irq/spurious.c
++++ b/kernel/irq/spurious.c
+@@ -340,6 +340,11 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir
+
+ static int __init irqfixup_setup(char *str)
+ {
++#ifdef CONFIG_PREEMPT_RT_BASE
++ printk(KERN_WARNING "irqfixup boot option not supported "
++ "w/ CONFIG_PREEMPT_RT_BASE\n");
++ return 1;
++#endif
+ irqfixup = 1;
+ printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+ printk(KERN_WARNING "This may impact system performance.\n");
+@@ -352,6 +357,11 @@ module_param(irqfixup, int, 0644);
+
+ static int __init irqpoll_setup(char *str)
+ {
++#ifdef CONFIG_PREEMPT_RT_BASE
++ printk(KERN_WARNING "irqpoll boot option not supported "
++ "w/ CONFIG_PREEMPT_RT_BASE\n");
++ return 1;
++#endif
+ irqfixup = 2;
+ printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
+ "enabled\n");
diff --git a/patches/genirq-force-threading.patch b/patches/genirq-force-threading.patch
new file mode 100644
index 0000000..72fbf98
--- /dev/null
+++ b/patches/genirq-force-threading.patch
@@ -0,0 +1,46 @@
+Subject: genirq-force-threading.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 03 Apr 2011 11:57:29 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/interrupt.h | 8 ++++++--
+ kernel/irq/manage.c | 2 ++
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -383,9 +383,13 @@ static inline int disable_irq_wake(unsig
+
+
+ #ifdef CONFIG_IRQ_FORCED_THREADING
+-extern bool force_irqthreads;
++# ifndef CONFIG_PREEMPT_RT_BASE
++ extern bool force_irqthreads;
++# else
++# define force_irqthreads (true)
++# endif
+ #else
+-#define force_irqthreads (0)
++#define force_irqthreads (false)
+ #endif
+
+ #ifndef __ARCH_SET_SOFTIRQ_PENDING
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -21,6 +21,7 @@
+ #include "internals.h"
+
+ #ifdef CONFIG_IRQ_FORCED_THREADING
++# ifndef CONFIG_PREEMPT_RT_BASE
+ __read_mostly bool force_irqthreads;
+
+ static int __init setup_forced_irqthreads(char *arg)
+@@ -29,6 +30,7 @@ static int __init setup_forced_irqthread
+ return 0;
+ }
+ early_param("threadirqs", setup_forced_irqthreads);
++# endif
+ #endif
+
+ /**
diff --git a/patches/genirq-nodebug-shirq.patch b/patches/genirq-nodebug-shirq.patch
new file mode 100644
index 0000000..4885fb4
--- /dev/null
+++ b/patches/genirq-nodebug-shirq.patch
@@ -0,0 +1,20 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 18 Mar 2011 10:22:04 +0100
+Subject: genirq: Disable DEBUG_SHIRQ for rt
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/Kconfig.debug | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -164,7 +164,7 @@ config DEBUG_KERNEL
+
+ config DEBUG_SHIRQ
+ bool "Debug shared IRQ handlers"
+- depends on DEBUG_KERNEL && GENERIC_HARDIRQS
++ depends on DEBUG_KERNEL && GENERIC_HARDIRQS && !PREEMPT_RT_BASE
+ help
+ Enable this to generate a spurious interrupt as soon as a shared
+ interrupt handler is registered, and just before one is deregistered.
diff --git a/patches/harirq-h-define-softirq_count-as-oul-to-kill-build-warning.patch b/patches/harirq-h-define-softirq_count-as-oul-to-kill-build-warning.patch
new file mode 100644
index 0000000..8e94d15
--- /dev/null
+++ b/patches/harirq-h-define-softirq_count-as-oul-to-kill-build-warning.patch
@@ -0,0 +1,35 @@
+Subject: hardirq.h: Define softirq_count() as OUL to kill build warning
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Thu, 13 Oct 2011 17:19:09 +0800
+
+kernel/lockdep.c: In function ‘print_bad_irq_dependency’:
+kernel/lockdep.c:1476:3: warning: format ‘%lu’ expects type ‘long unsigned int’, but argument 7 has type ‘unsigned int’
+kernel/lockdep.c: In function ‘print_usage_bug’:
+kernel/lockdep.c:2193:3: warning: format ‘%lu’ expects type ‘long unsigned int’, but argument 7 has type ‘unsigned int’
+
+kernel/lockdep.i show this:
+ printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
+ curr->comm, task_pid_nr(curr),
+ curr->hardirq_context, ((current_thread_info()->preempt_count) & (((1UL << (10))-1) << ((0 + 8) + 8))) >> ((0 + 8) + 8),
+ curr->softirq_context, (0U) >> (0 + 8),
+ curr->hardirqs_enabled,
+ curr->softirqs_enabled);
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Link: http://lkml.kernel.org/r/20111013091909.GA32739@zhy
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/hardirq.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/hardirq.h
++++ b/include/linux/hardirq.h
+@@ -85,7 +85,7 @@
+ # define softirq_count() (preempt_count() & SOFTIRQ_MASK)
+ # define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
+ #else
+-# define softirq_count() (0U)
++# define softirq_count() (0UL)
+ extern int in_serving_softirq(void);
+ #endif
+
diff --git a/patches/hotplug-call-cpu_unplug_begin-a-little-early.patch b/patches/hotplug-call-cpu_unplug_begin-a-little-early.patch
new file mode 100644
index 0000000..27f2ef3
--- /dev/null
+++ b/patches/hotplug-call-cpu_unplug_begin-a-little-early.patch
@@ -0,0 +1,59 @@
+Subject: hotplug: Call cpu_unplug_begin() before DOWN_PREPARE
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Sun, 16 Oct 2011 18:56:44 +0800
+
+cpu_unplug_begin() should be called before CPU_DOWN_PREPARE, because
+at CPU_DOWN_PREPARE cpu_active is cleared and sched_domain is
+rebuilt. Otherwise the 'sync_unplug' thread will be running on the cpu
+on which it's created and not bound on the cpu which is about to go
+down.
+
+I found that by an incorrect warning on smp_processor_id() called by
+sync_unplug/1, and trace shows below:
+(echo 1 > /sys/device/system/cpu/cpu1/online)
+ bash-1664 [000] 83.136620: _cpu_down: Bind sync_unplug to cpu 1
+ bash-1664 [000] 83.136623: sched_wait_task: comm=sync_unplug/1 pid=1724 prio=120
+ bash-1664 [000] 83.136624: _cpu_down: Wake sync_unplug
+ bash-1664 [000] 83.136629: sched_wakeup: comm=sync_unplug/1 pid=1724 prio=120 success=1 target_cpu=000
+
+Wants to be folded back....
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Link: http://lkml.kernel.org/r/1318762607-2261-3-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c | 16 +++++++---------
+ 1 file changed, 7 insertions(+), 9 deletions(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -387,22 +387,20 @@ static int __ref _cpu_down(unsigned int
+ return -EBUSY;
+ }
+
+- err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
++ cpu_hotplug_begin();
++ err = cpu_unplug_begin(cpu);
+ if (err) {
+- nr_calls--;
+- __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
+- printk("%s: attempt to take down CPU %u failed\n",
+- __func__, cpu);
++ printk("cpu_unplug_begin(%d) failed\n", cpu);
+ goto out_cancel;
+ }
+
+- cpu_hotplug_begin();
+- err = cpu_unplug_begin(cpu);
++ err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+ if (err) {
+ nr_calls--;
+ __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
+- printk("cpu_unplug_begin(%d) failed\n", cpu);
+- goto out_cancel;
++ printk("%s: attempt to take down CPU %u failed\n",
++ __func__, cpu);
++ goto out_release;
+ }
+ smpboot_park_threads(cpu);
+
diff --git a/patches/hotplug-light-get-online-cpus.patch b/patches/hotplug-light-get-online-cpus.patch
new file mode 100644
index 0000000..5b05139
--- /dev/null
+++ b/patches/hotplug-light-get-online-cpus.patch
@@ -0,0 +1,208 @@
+Subject: hotplug: Lightweight get online cpus
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 15 Jun 2011 12:36:06 +0200
+
+get_online_cpus() is a heavy weight function which involves a global
+mutex. migrate_disable() wants a simpler construct which prevents only
+a CPU from going doing while a task is in a migrate disabled section.
+
+Implement a per cpu lockless mechanism, which serializes only in the
+real unplug case on a global mutex. That serialization affects only
+tasks on the cpu which should be brought down.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/cpu.h | 4 +
+ kernel/cpu.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++--
+ 2 files changed, 128 insertions(+), 3 deletions(-)
+
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -175,6 +175,8 @@ extern struct bus_type cpu_subsys;
+
+ extern void get_online_cpus(void);
+ extern void put_online_cpus(void);
++extern void pin_current_cpu(void);
++extern void unpin_current_cpu(void);
+ #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
+ #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
+ #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
+@@ -198,6 +200,8 @@ static inline void cpu_hotplug_driver_un
+
+ #define get_online_cpus() do { } while (0)
+ #define put_online_cpus() do { } while (0)
++static inline void pin_current_cpu(void) { }
++static inline void unpin_current_cpu(void) { }
+ #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
+ /* These aren't inline functions due to a GCC bug. */
+ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -63,6 +63,102 @@ static struct {
+ .refcount = 0,
+ };
+
++struct hotplug_pcp {
++ struct task_struct *unplug;
++ int refcount;
++ struct completion synced;
++};
++
++static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
++
++/**
++ * pin_current_cpu - Prevent the current cpu from being unplugged
++ *
++ * Lightweight version of get_online_cpus() to prevent cpu from being
++ * unplugged when code runs in a migration disabled region.
++ *
++ * Must be called with preemption disabled (preempt_count = 1)!
++ */
++void pin_current_cpu(void)
++{
++ struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
++
++retry:
++ if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
++ hp->unplug == current || (current->flags & PF_STOMPER)) {
++ hp->refcount++;
++ return;
++ }
++ preempt_enable();
++ mutex_lock(&cpu_hotplug.lock);
++ mutex_unlock(&cpu_hotplug.lock);
++ preempt_disable();
++ goto retry;
++}
++
++/**
++ * unpin_current_cpu - Allow unplug of current cpu
++ *
++ * Must be called with preemption or interrupts disabled!
++ */
++void unpin_current_cpu(void)
++{
++ struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
++
++ WARN_ON(hp->refcount <= 0);
++
++ /* This is safe. sync_unplug_thread is pinned to this cpu */
++ if (!--hp->refcount && hp->unplug && hp->unplug != current &&
++ !(current->flags & PF_STOMPER))
++ wake_up_process(hp->unplug);
++}
++
++/*
++ * FIXME: Is this really correct under all circumstances ?
++ */
++static int sync_unplug_thread(void *data)
++{
++ struct hotplug_pcp *hp = data;
++
++ preempt_disable();
++ hp->unplug = current;
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ while (hp->refcount) {
++ schedule_preempt_disabled();
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ }
++ set_current_state(TASK_RUNNING);
++ preempt_enable();
++ complete(&hp->synced);
++ return 0;
++}
++
++/*
++ * Start the sync_unplug_thread on the target cpu and wait for it to
++ * complete.
++ */
++static int cpu_unplug_begin(unsigned int cpu)
++{
++ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
++ struct task_struct *tsk;
++
++ init_completion(&hp->synced);
++ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
++ if (IS_ERR(tsk))
++ return (PTR_ERR(tsk));
++ kthread_bind(tsk, cpu);
++ wake_up_process(tsk);
++ wait_for_completion(&hp->synced);
++ return 0;
++}
++
++static void cpu_unplug_done(unsigned int cpu)
++{
++ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
++
++ hp->unplug = NULL;
++}
++
+ void get_online_cpus(void)
+ {
+ might_sleep();
+@@ -260,13 +356,14 @@ static int __ref take_cpu_down(void *_pa
+ /* Requires cpu_add_remove_lock to be held */
+ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ {
+- int err, nr_calls = 0;
++ int mycpu, err, nr_calls = 0;
+ void *hcpu = (void *)(long)cpu;
+ unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ struct take_cpu_down_param tcd_param = {
+ .mod = mod,
+ .hcpu = hcpu,
+ };
++ cpumask_var_t cpumask;
+
+ if (num_online_cpus() == 1)
+ return -EBUSY;
+@@ -274,7 +371,20 @@ static int __ref _cpu_down(unsigned int
+ if (!cpu_online(cpu))
+ return -EINVAL;
+
+- cpu_hotplug_begin();
++ /* Move the downtaker off the unplug cpu */
++ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
++ return -ENOMEM;
++ cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
++ set_cpus_allowed_ptr(current, cpumask);
++ free_cpumask_var(cpumask);
++ preempt_disable();
++ mycpu = smp_processor_id();
++ if (mycpu == cpu) {
++ printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
++ preempt_enable();
++ return -EBUSY;
++ }
++ preempt_enable();
+
+ err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+ if (err) {
+@@ -282,7 +392,16 @@ static int __ref _cpu_down(unsigned int
+ __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
+ printk("%s: attempt to take down CPU %u failed\n",
+ __func__, cpu);
+- goto out_release;
++ goto out_cancel;
++ }
++
++ cpu_hotplug_begin();
++ err = cpu_unplug_begin(cpu);
++ if (err) {
++ nr_calls--;
++ __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
++ printk("cpu_unplug_begin(%d) failed\n", cpu);
++ goto out_cancel;
+ }
+ smpboot_park_threads(cpu);
+
+@@ -314,6 +433,8 @@ static int __ref _cpu_down(unsigned int
+ check_for_tasks(cpu);
+
+ out_release:
++ cpu_unplug_done(cpu);
++out_cancel:
+ cpu_hotplug_done();
+ if (!err)
+ cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
diff --git a/patches/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch b/patches/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch
new file mode 100644
index 0000000..c224c7c
--- /dev/null
+++ b/patches/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch
@@ -0,0 +1,24 @@
+Subject: hotplug: sync_unplug: No "\n" in task name
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Sun, 16 Oct 2011 18:56:43 +0800
+
+Otherwise the output will look a little odd.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Link: http://lkml.kernel.org/r/1318762607-2261-2-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -143,7 +143,7 @@ static int cpu_unplug_begin(unsigned int
+ struct task_struct *tsk;
+
+ init_completion(&hp->synced);
+- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
++ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
+ if (IS_ERR(tsk))
+ return (PTR_ERR(tsk));
+ kthread_bind(tsk, cpu);
diff --git a/patches/hotplug-use-migrate-disable.patch b/patches/hotplug-use-migrate-disable.patch
new file mode 100644
index 0000000..876d123
--- /dev/null
+++ b/patches/hotplug-use-migrate-disable.patch
@@ -0,0 +1,36 @@
+Subject: hotplug-use-migrate-disable.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 19:35:29 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -379,14 +379,13 @@ static int __ref _cpu_down(unsigned int
+ cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
+ set_cpus_allowed_ptr(current, cpumask);
+ free_cpumask_var(cpumask);
+- preempt_disable();
++ migrate_disable();
+ mycpu = smp_processor_id();
+ if (mycpu == cpu) {
+ printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
+- preempt_enable();
++ migrate_enable();
+ return -EBUSY;
+ }
+- preempt_enable();
+
+ err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+ if (err) {
+@@ -437,6 +436,7 @@ static int __ref _cpu_down(unsigned int
+ out_release:
+ cpu_unplug_done(cpu);
+ out_cancel:
++ migrate_enable();
+ cpu_hotplug_done();
+ if (!err)
+ cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
diff --git a/patches/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch b/patches/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch
new file mode 100644
index 0000000..930553a
--- /dev/null
+++ b/patches/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch
@@ -0,0 +1,461 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:31 -0500
+Subject: hrtimer: fixup hrtimer callback changes for preempt-rt
+
+In preempt-rt we can not call the callbacks which take sleeping locks
+from the timer interrupt context.
+
+Bring back the softirq split for now, until we fixed the signal
+delivery problem for real.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+---
+ include/linux/hrtimer.h | 3
+ kernel/hrtimer.c | 220 ++++++++++++++++++++++++++++++++++++++++-------
+ kernel/sched/core.c | 1
+ kernel/sched/rt.c | 1
+ kernel/time/tick-sched.c | 1
+ kernel/watchdog.c | 1
+ 6 files changed, 198 insertions(+), 29 deletions(-)
+
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -111,6 +111,8 @@ struct hrtimer {
+ enum hrtimer_restart (*function)(struct hrtimer *);
+ struct hrtimer_clock_base *base;
+ unsigned long state;
++ struct list_head cb_entry;
++ int irqsafe;
+ #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
+ ktime_t praecox;
+ #endif
+@@ -150,6 +152,7 @@ struct hrtimer_clock_base {
+ int index;
+ clockid_t clockid;
+ struct timerqueue_head active;
++ struct list_head expired;
+ ktime_t resolution;
+ ktime_t (*get_time)(void);
+ ktime_t softirq_time;
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -589,8 +589,7 @@ static int hrtimer_reprogram(struct hrti
+ * When the callback is running, we do not reprogram the clock event
+ * device. The timer callback is either running on a different CPU or
+ * the callback is executed in the hrtimer_interrupt context. The
+- * reprogramming is handled either by the softirq, which called the
+- * callback or at the end of the hrtimer_interrupt.
++ * reprogramming is handled at the end of the hrtimer_interrupt.
+ */
+ if (hrtimer_callback_running(timer))
+ return 0;
+@@ -625,6 +624,9 @@ static int hrtimer_reprogram(struct hrti
+ return res;
+ }
+
++static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
++static int hrtimer_rt_defer(struct hrtimer *timer);
++
+ /*
+ * Initialize the high resolution related parts of cpu_base
+ */
+@@ -641,9 +643,18 @@ static inline void hrtimer_init_hres(str
+ * and expiry check is done in the hrtimer_interrupt or in the softirq.
+ */
+ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+- struct hrtimer_clock_base *base)
++ struct hrtimer_clock_base *base,
++ int wakeup)
+ {
+- return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
++ if (!(base->cpu_base->hres_active && hrtimer_reprogram(timer, base)))
++ return 0;
++ if (!wakeup)
++ return -ETIME;
++#ifdef CONFIG_PREEMPT_RT_BASE
++ if (!hrtimer_rt_defer(timer))
++ return -ETIME;
++#endif
++ return 1;
+ }
+
+ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
+@@ -724,12 +735,18 @@ static inline int hrtimer_switch_to_hres
+ static inline void
+ hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
+ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+- struct hrtimer_clock_base *base)
++ struct hrtimer_clock_base *base,
++ int wakeup)
+ {
+ return 0;
+ }
+ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
+ static inline void retrigger_next_event(void *arg) { }
++static inline int hrtimer_reprogram(struct hrtimer *timer,
++ struct hrtimer_clock_base *base)
++{
++ return 0;
++}
+
+ #endif /* CONFIG_HIGH_RES_TIMERS */
+
+@@ -861,9 +878,9 @@ void hrtimer_wait_for_timer(const struct
+ {
+ struct hrtimer_clock_base *base = timer->base;
+
+- if (base && base->cpu_base && !hrtimer_hres_active(base->cpu_base))
++ if (base && base->cpu_base && !timer->irqsafe)
+ wait_event(base->cpu_base->wait,
+- !(timer->state & HRTIMER_STATE_CALLBACK));
++ !(timer->state & HRTIMER_STATE_CALLBACK));
+ }
+
+ #else
+@@ -913,6 +930,11 @@ static void __remove_hrtimer(struct hrti
+ if (!(timer->state & HRTIMER_STATE_ENQUEUED))
+ goto out;
+
++ if (unlikely(!list_empty(&timer->cb_entry))) {
++ list_del_init(&timer->cb_entry);
++ goto out;
++ }
++
+ next_timer = timerqueue_getnext(&base->active);
+ timerqueue_del(&base->active, &timer->node);
+ if (&timer->node == next_timer) {
+@@ -1020,9 +1042,19 @@ int __hrtimer_start_range_ns(struct hrti
+ *
+ * XXX send_remote_softirq() ?
+ */
+- if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
+- && hrtimer_enqueue_reprogram(timer, new_base)) {
+- if (wakeup) {
++ if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) {
++ ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
++ if (ret < 0) {
++ /*
++ * In case we failed to reprogram the timer (mostly
++ * because out current timer is already elapsed),
++ * remove it again and report a failure. This avoids
++ * stale base->first entries.
++ */
++ debug_deactivate(timer);
++ __remove_hrtimer(timer, new_base,
++ timer->state & HRTIMER_STATE_CALLBACK, 0);
++ } else if (ret > 0) {
+ /*
+ * We need to drop cpu_base->lock to avoid a
+ * lock ordering issue vs. rq->lock.
+@@ -1030,9 +1062,7 @@ int __hrtimer_start_range_ns(struct hrti
+ raw_spin_unlock(&new_base->cpu_base->lock);
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ local_irq_restore(flags);
+- return ret;
+- } else {
+- __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++ return 0;
+ }
+ }
+
+@@ -1199,6 +1229,7 @@ static void __hrtimer_init(struct hrtime
+
+ base = hrtimer_clockid_to_base(clock_id);
+ timer->base = &cpu_base->clock_base[base];
++ INIT_LIST_HEAD(&timer->cb_entry);
+ timerqueue_init(&timer->node);
+
+ #ifdef CONFIG_TIMER_STATS
+@@ -1282,10 +1313,128 @@ static void __run_hrtimer(struct hrtimer
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
+ }
+
+-#ifdef CONFIG_HIGH_RES_TIMERS
+-
+ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
++ struct hrtimer_clock_base *base)
++{
++ /*
++ * Note, we clear the callback flag before we requeue the
++ * timer otherwise we trigger the callback_running() check
++ * in hrtimer_reprogram().
++ */
++ timer->state &= ~HRTIMER_STATE_CALLBACK;
++
++ if (restart != HRTIMER_NORESTART) {
++ BUG_ON(hrtimer_active(timer));
++ /*
++ * Enqueue the timer, if it's the leftmost timer then
++ * we need to reprogram it.
++ */
++ if (!enqueue_hrtimer(timer, base))
++ return;
++
++#ifndef CONFIG_HIGH_RES_TIMERS
++ }
++#else
++ if (base->cpu_base->hres_active &&
++ hrtimer_reprogram(timer, base))
++ goto requeue;
++
++ } else if (hrtimer_active(timer)) {
++ /*
++ * If the timer was rearmed on another CPU, reprogram
++ * the event device.
++ */
++ if (&timer->node == base->active.next &&
++ base->cpu_base->hres_active &&
++ hrtimer_reprogram(timer, base))
++ goto requeue;
++ }
++ return;
++
++requeue:
++ /*
++ * Timer is expired. Thus move it from tree to pending list
++ * again.
++ */
++ __remove_hrtimer(timer, base, timer->state, 0);
++ list_add_tail(&timer->cb_entry, &base->expired);
++#endif
++}
++
++/*
++ * The changes in mainline which removed the callback modes from
++ * hrtimer are not yet working with -rt. The non wakeup_process()
++ * based callbacks which involve sleeping locks need to be treated
++ * seperately.
++ */
++static void hrtimer_rt_run_pending(void)
++{
++ enum hrtimer_restart (*fn)(struct hrtimer *);
++ struct hrtimer_cpu_base *cpu_base;
++ struct hrtimer_clock_base *base;
++ struct hrtimer *timer;
++ int index, restart;
++
++ local_irq_disable();
++ cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
++
++ raw_spin_lock(&cpu_base->lock);
++
++ for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
++ base = &cpu_base->clock_base[index];
++
++ while (!list_empty(&base->expired)) {
++ timer = list_first_entry(&base->expired,
++ struct hrtimer, cb_entry);
++
++ /*
++ * Same as the above __run_hrtimer function
++ * just we run with interrupts enabled.
++ */
++ debug_hrtimer_deactivate(timer);
++ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
++ timer_stats_account_hrtimer(timer);
++ fn = timer->function;
++
++ raw_spin_unlock_irq(&cpu_base->lock);
++ restart = fn(timer);
++ raw_spin_lock_irq(&cpu_base->lock);
++
++ hrtimer_rt_reprogram(restart, timer, base);
++ }
++ }
++
++ raw_spin_unlock_irq(&cpu_base->lock);
++
++ wake_up_timer_waiters(cpu_base);
++}
++
++static int hrtimer_rt_defer(struct hrtimer *timer)
++{
++ if (timer->irqsafe)
++ return 0;
++
++ __remove_hrtimer(timer, timer->base, timer->state, 0);
++ list_add_tail(&timer->cb_entry, &timer->base->expired);
++ return 1;
++}
++
++#else
++
++static inline void hrtimer_rt_run_pending(void)
++{
++ hrtimer_peek_ahead_timers();
++}
++
++static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
++
++#endif
++
++#ifdef CONFIG_HIGH_RES_TIMERS
++
+ /*
+ * High resolution timer interrupt
+ * Called with interrupts disabled
+@@ -1294,7 +1443,7 @@ void hrtimer_interrupt(struct clock_even
+ {
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ ktime_t expires_next, now, entry_time, delta;
+- int i, retries = 0;
++ int i, retries = 0, raise = 0;
+
+ BUG_ON(!cpu_base->hres_active);
+ cpu_base->nr_events++;
+@@ -1361,7 +1510,10 @@ retry:
+ break;
+ }
+
+- __run_hrtimer(timer, &basenow);
++ if (!hrtimer_rt_defer(timer))
++ __run_hrtimer(timer, &basenow);
++ else
++ raise = 1;
+ }
+ }
+
+@@ -1376,6 +1528,10 @@ retry:
+ if (expires_next.tv64 == KTIME_MAX ||
+ !tick_program_event(expires_next, 0)) {
+ cpu_base->hang_detected = 0;
++
++ if (raise)
++ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++
+ return;
+ }
+
+@@ -1456,24 +1612,26 @@ void hrtimer_peek_ahead_timers(void)
+ local_irq_restore(flags);
+ }
+
++#else /* CONFIG_HIGH_RES_TIMERS */
++
++static inline void __hrtimer_peek_ahead_timers(void) { }
++
++#endif /* !CONFIG_HIGH_RES_TIMERS */
++
+ static void run_hrtimer_softirq(struct softirq_action *h)
+ {
++#ifdef CONFIG_HIGH_RES_TIMERS
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+
+ if (cpu_base->clock_was_set) {
+ cpu_base->clock_was_set = 0;
+ clock_was_set();
+ }
++#endif
+
+- hrtimer_peek_ahead_timers();
++ hrtimer_rt_run_pending();
+ }
+
+-#else /* CONFIG_HIGH_RES_TIMERS */
+-
+-static inline void __hrtimer_peek_ahead_timers(void) { }
+-
+-#endif /* !CONFIG_HIGH_RES_TIMERS */
+-
+ /*
+ * Called from timer softirq every jiffy, expire hrtimers:
+ *
+@@ -1506,7 +1664,7 @@ void hrtimer_run_queues(void)
+ struct timerqueue_node *node;
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ struct hrtimer_clock_base *base;
+- int index, gettime = 1;
++ int index, gettime = 1, raise = 0;
+
+ if (hrtimer_hres_active())
+ return;
+@@ -1531,12 +1689,16 @@ void hrtimer_run_queues(void)
+ hrtimer_get_expires_tv64(timer))
+ break;
+
+- __run_hrtimer(timer, &base->softirq_time);
++ if (!hrtimer_rt_defer(timer))
++ __run_hrtimer(timer, &base->softirq_time);
++ else
++ raise = 1;
+ }
+ raw_spin_unlock(&cpu_base->lock);
+ }
+
+- wake_up_timer_waiters(cpu_base);
++ if (raise)
++ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ }
+
+ /*
+@@ -1558,6 +1720,7 @@ static enum hrtimer_restart hrtimer_wake
+ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+ {
+ sl->timer.function = hrtimer_wakeup;
++ sl->timer.irqsafe = 1;
+ sl->task = task;
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
+@@ -1696,6 +1859,7 @@ static void __cpuinit init_hrtimers_cpu(
+ for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+ cpu_base->clock_base[i].cpu_base = cpu_base;
+ timerqueue_init_head(&cpu_base->clock_base[i].active);
++ INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
+ }
+
+ hrtimer_init_hres(cpu_base);
+@@ -1814,9 +1978,7 @@ void __init hrtimers_init(void)
+ hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
+ (void *)(long)smp_processor_id());
+ register_cpu_notifier(&hrtimers_nb);
+-#ifdef CONFIG_HIGH_RES_TIMERS
+ open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
+-#endif
+ }
+
+ /**
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -489,6 +489,7 @@ static void init_rq_hrtick(struct rq *rq
+
+ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ rq->hrtick_timer.function = hrtick;
++ rq->hrtick_timer.irqsafe = 1;
+ }
+ #else /* CONFIG_SCHED_HRTICK */
+ static inline void hrtick_clear(struct rq *rq)
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -41,6 +41,7 @@ void init_rt_bandwidth(struct rt_bandwid
+
+ hrtimer_init(&rt_b->rt_period_timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++ rt_b->rt_period_timer.irqsafe = 1;
+ rt_b->rt_period_timer.function = sched_rt_period_timer;
+ }
+
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -867,6 +867,7 @@ void tick_setup_sched_timer(void)
+ * Emulate tick processing via per-CPU hrtimers:
+ */
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
++ ts->sched_timer.irqsafe = 1;
+ ts->sched_timer.function = tick_sched_timer;
+
+ /* Get the next period (per cpu) */
+--- a/kernel/watchdog.c
++++ b/kernel/watchdog.c
+@@ -358,6 +358,7 @@ static void watchdog_enable(unsigned int
+ /* kick off the timer for the hardlockup detector */
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer->function = watchdog_timer_fn;
++ hrtimer->irqsafe = 1;
+
+ if (!watchdog_enabled) {
+ kthread_park(current);
diff --git a/patches/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch b/patches/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch
new file mode 100644
index 0000000..5c8bce5
--- /dev/null
+++ b/patches/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch
@@ -0,0 +1,37 @@
+Subject: hrtimer: Raise softirq if hrtimer irq stalled
+From: Watanabe <shunsuke.watanabe@tel.com>
+Date: Sun, 28 Oct 2012 11:13:44 +0100
+
+When the hrtimer stall detection hits the softirq is not raised.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ kernel/hrtimer.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -1528,11 +1528,7 @@ retry:
+ if (expires_next.tv64 == KTIME_MAX ||
+ !tick_program_event(expires_next, 0)) {
+ cpu_base->hang_detected = 0;
+-
+- if (raise)
+- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+-
+- return;
++ goto out;
+ }
+
+ /*
+@@ -1576,6 +1572,9 @@ retry:
+ tick_program_event(expires_next, 1);
+ printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
+ ktime_to_ns(delta));
++out:
++ if (raise)
++ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ }
+
+ /*
diff --git a/patches/hrtimers-prepare-full-preemption.patch b/patches/hrtimers-prepare-full-preemption.patch
new file mode 100644
index 0000000..be6ba1d
--- /dev/null
+++ b/patches/hrtimers-prepare-full-preemption.patch
@@ -0,0 +1,195 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:34 -0500
+Subject: hrtimers: prepare full preemption
+
+Make cancellation of a running callback in softirq context safe
+against preemption.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/hrtimer.h | 10 ++++++++++
+ kernel/hrtimer.c | 33 ++++++++++++++++++++++++++++++++-
+ kernel/itimer.c | 1 +
+ kernel/posix-timers.c | 33 +++++++++++++++++++++++++++++++++
+ 4 files changed, 76 insertions(+), 1 deletion(-)
+
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -192,6 +192,9 @@ struct hrtimer_cpu_base {
+ unsigned long nr_hangs;
+ ktime_t max_hang_time;
+ #endif
++#ifdef CONFIG_PREEMPT_RT_BASE
++ wait_queue_head_t wait;
++#endif
+ struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+ };
+
+@@ -385,6 +388,13 @@ static inline int hrtimer_restart(struct
+ return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+ }
+
++/* Softirq preemption could deadlock timer removal */
++#ifdef CONFIG_PREEMPT_RT_BASE
++ extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
++#else
++# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
++#endif
++
+ /* Query timers: */
+ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+ extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -844,6 +844,32 @@ u64 hrtimer_forward(struct hrtimer *time
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_forward);
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
++
++/**
++ * hrtimer_wait_for_timer - Wait for a running timer
++ *
++ * @timer: timer to wait for
++ *
++ * The function waits in case the timers callback function is
++ * currently executed on the waitqueue of the timer base. The
++ * waitqueue is woken up after the timer callback function has
++ * finished execution.
++ */
++void hrtimer_wait_for_timer(const struct hrtimer *timer)
++{
++ struct hrtimer_clock_base *base = timer->base;
++
++ if (base && base->cpu_base && !hrtimer_hres_active(base->cpu_base))
++ wait_event(base->cpu_base->wait,
++ !(timer->state & HRTIMER_STATE_CALLBACK));
++}
++
++#else
++# define wake_up_timer_waiters(b) do { } while (0)
++#endif
++
+ /*
+ * enqueue_hrtimer - internal function to (re)start a timer
+ *
+@@ -1094,7 +1120,7 @@ int hrtimer_cancel(struct hrtimer *timer
+
+ if (ret >= 0)
+ return ret;
+- cpu_relax();
++ hrtimer_wait_for_timer(timer);
+ }
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_cancel);
+@@ -1509,6 +1535,8 @@ void hrtimer_run_queues(void)
+ }
+ raw_spin_unlock(&cpu_base->lock);
+ }
++
++ wake_up_timer_waiters(cpu_base);
+ }
+
+ /*
+@@ -1671,6 +1699,9 @@ static void __cpuinit init_hrtimers_cpu(
+ }
+
+ hrtimer_init_hres(cpu_base);
++#ifdef CONFIG_PREEMPT_RT_BASE
++ init_waitqueue_head(&cpu_base->wait);
++#endif
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
+--- a/kernel/itimer.c
++++ b/kernel/itimer.c
+@@ -213,6 +213,7 @@ again:
+ /* We are sharing ->siglock with it_real_fn() */
+ if (hrtimer_try_to_cancel(timer) < 0) {
+ spin_unlock_irq(&tsk->sighand->siglock);
++ hrtimer_wait_for_timer(&tsk->signal->real_timer);
+ goto again;
+ }
+ expires = timeval_to_ktime(value->it_value);
+--- a/kernel/posix-timers.c
++++ b/kernel/posix-timers.c
+@@ -773,6 +773,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_
+ return overrun;
+ }
+
++/*
++ * Protected by RCU!
++ */
++static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
++{
++#ifdef CONFIG_PREEMPT_RT_FULL
++ if (kc->timer_set == common_timer_set)
++ hrtimer_wait_for_timer(&timr->it.real.timer);
++ else
++ /* FIXME: Whacky hack for posix-cpu-timers */
++ schedule_timeout(1);
++#endif
++}
++
+ /* Set a POSIX.1b interval timer. */
+ /* timr->it_lock is taken. */
+ static int
+@@ -850,6 +864,7 @@ retry:
+ if (!timr)
+ return -EINVAL;
+
++ rcu_read_lock();
+ kc = clockid_to_kclock(timr->it_clock);
+ if (WARN_ON_ONCE(!kc || !kc->timer_set))
+ error = -EINVAL;
+@@ -858,9 +873,12 @@ retry:
+
+ unlock_timer(timr, flag);
+ if (error == TIMER_RETRY) {
++ timer_wait_for_callback(kc, timr);
+ rtn = NULL; // We already got the old time...
++ rcu_read_unlock();
+ goto retry;
+ }
++ rcu_read_unlock();
+
+ if (old_setting && !error &&
+ copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
+@@ -898,10 +916,15 @@ retry_delete:
+ if (!timer)
+ return -EINVAL;
+
++ rcu_read_lock();
+ if (timer_delete_hook(timer) == TIMER_RETRY) {
+ unlock_timer(timer, flags);
++ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
++ timer);
++ rcu_read_unlock();
+ goto retry_delete;
+ }
++ rcu_read_unlock();
+
+ spin_lock(&current->sighand->siglock);
+ list_del(&timer->list);
+@@ -927,8 +950,18 @@ static void itimer_delete(struct k_itime
+ retry_delete:
+ spin_lock_irqsave(&timer->it_lock, flags);
+
++ /* On RT we can race with a deletion */
++ if (!timer->it_signal) {
++ unlock_timer(timer, flags);
++ return;
++ }
++
+ if (timer_delete_hook(timer) == TIMER_RETRY) {
++ rcu_read_lock();
+ unlock_timer(timer, flags);
++ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
++ timer);
++ rcu_read_unlock();
+ goto retry_delete;
+ }
+ list_del(&timer->list);
diff --git a/patches/hwlatdetect.patch b/patches/hwlatdetect.patch
new file mode 100644
index 0000000..08045b3
--- /dev/null
+++ b/patches/hwlatdetect.patch
@@ -0,0 +1,1344 @@
+Subject: hwlatdetect.patch
+From: Carsten Emde <C.Emde@osadl.org>
+Date: Tue, 19 Jul 2011 13:53:12 +0100
+
+Jon Masters developed this wonderful SMI detector. For details please
+consult Documentation/hwlat_detector.txt. It could be ported to Linux
+3.0 RT without any major change.
+
+Signed-off-by: Carsten Emde <C.Emde@osadl.org>
+
+---
+ Documentation/hwlat_detector.txt | 64 ++
+ drivers/misc/Kconfig | 29
+ drivers/misc/Makefile | 1
+ drivers/misc/hwlat_detector.c | 1212 +++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 1306 insertions(+)
+
+--- /dev/null
++++ b/Documentation/hwlat_detector.txt
+@@ -0,0 +1,64 @@
++Introduction:
++-------------
++
++The module hwlat_detector is a special purpose kernel module that is used to
++detect large system latencies induced by the behavior of certain underlying
++hardware or firmware, independent of Linux itself. The code was developed
++originally to detect SMIs (System Management Interrupts) on x86 systems,
++however there is nothing x86 specific about this patchset. It was
++originally written for use by the "RT" patch since the Real Time
++kernel is highly latency sensitive.
++
++SMIs are usually not serviced by the Linux kernel, which typically does not
++even know that they are occuring. SMIs are instead are set up by BIOS code
++and are serviced by BIOS code, usually for "critical" events such as
++management of thermal sensors and fans. Sometimes though, SMIs are used for
++other tasks and those tasks can spend an inordinate amount of time in the
++handler (sometimes measured in milliseconds). Obviously this is a problem if
++you are trying to keep event service latencies down in the microsecond range.
++
++The hardware latency detector works by hogging all of the cpus for configurable
++amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
++for some period, then looking for gaps in the TSC data. Any gap indicates a
++time when the polling was interrupted and since the machine is stopped and
++interrupts turned off the only thing that could do that would be an SMI.
++
++Note that the SMI detector should *NEVER* be used in a production environment.
++It is intended to be run manually to determine if the hardware platform has a
++problem with long system firmware service routines.
++
++Usage:
++------
++
++Loading the module hwlat_detector passing the parameter "enabled=1" (or by
++setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
++step required to start the hwlat_detector. It is possible to redefine the
++threshold in microseconds (us) above which latency spikes will be taken
++into account (parameter "threshold=").
++
++Example:
++
++ # modprobe hwlat_detector enabled=1 threshold=100
++
++After the module is loaded, it creates a directory named "hwlat_detector" under
++the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
++to have debugfs mounted, which might be on /sys/debug on your system.
++
++The /debug/hwlat_detector interface contains the following files:
++
++count - number of latency spikes observed since last reset
++enable - a global enable/disable toggle (0/1), resets count
++max - maximum hardware latency actually observed (usecs)
++sample - a pipe from which to read current raw sample data
++ in the format <timestamp> <latency observed usecs>
++ (can be opened O_NONBLOCK for a single sample)
++threshold - minimum latency value to be considered (usecs)
++width - time period to sample with CPUs held (usecs)
++ must be less than the total window size (enforced)
++window - total period of sampling, width being inside (usecs)
++
++By default we will set width to 500,000 and window to 1,000,000, meaning that
++we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
++observe any latencies that exceed the threshold (initially 100 usecs),
++then we write to a global sample ring buffer of 8K samples, which is
++consumed by reading from the "sample" (pipe) debugfs file interface.
+--- a/drivers/misc/Kconfig
++++ b/drivers/misc/Kconfig
+@@ -121,6 +121,35 @@ config IBM_ASM
+ for information on the specific driver level and support statement
+ for your IBM server.
+
++config HWLAT_DETECTOR
++ tristate "Testing module to detect hardware-induced latencies"
++ depends on DEBUG_FS
++ depends on RING_BUFFER
++ default m
++ ---help---
++ A simple hardware latency detector. Use this module to detect
++ large latencies introduced by the behavior of the underlying
++ system firmware external to Linux. We do this using periodic
++ use of stop_machine to grab all available CPUs and measure
++ for unexplainable gaps in the CPU timestamp counter(s). By
++ default, the module is not enabled until the "enable" file
++ within the "hwlat_detector" debugfs directory is toggled.
++
++ This module is often used to detect SMI (System Management
++ Interrupts) on x86 systems, though is not x86 specific. To
++ this end, we default to using a sample window of 1 second,
++ during which we will sample for 0.5 seconds. If an SMI or
++ similar event occurs during that time, it is recorded
++ into an 8K samples global ring buffer until retreived.
++
++ WARNING: This software should never be enabled (it can be built
++ but should not be turned on after it is loaded) in a production
++ environment where high latencies are a concern since the
++ sampling mechanism actually introduces latencies for
++ regular tasks while the CPU(s) are being held.
++
++ If unsure, say N
++
+ config PHANTOM
+ tristate "Sensable PHANToM (PCI)"
+ depends on PCI
+--- a/drivers/misc/Makefile
++++ b/drivers/misc/Makefile
+@@ -49,3 +49,4 @@ obj-y += carma/
+ obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
+ obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/
+ obj-$(CONFIG_INTEL_MEI) += mei/
++obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
+--- /dev/null
++++ b/drivers/misc/hwlat_detector.c
+@@ -0,0 +1,1212 @@
++/*
++ * hwlat_detector.c - A simple Hardware Latency detector.
++ *
++ * Use this module to detect large system latencies induced by the behavior of
++ * certain underlying system hardware or firmware, independent of Linux itself.
++ * The code was developed originally to detect the presence of SMIs on Intel
++ * and AMD systems, although there is no dependency upon x86 herein.
++ *
++ * The classical example usage of this module is in detecting the presence of
++ * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
++ * somewhat special form of hardware interrupt spawned from earlier CPU debug
++ * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
++ * LPC (or other device) to generate a special interrupt under certain
++ * circumstances, for example, upon expiration of a special SMI timer device,
++ * due to certain external thermal readings, on certain I/O address accesses,
++ * and other situations. An SMI hits a special CPU pin, triggers a special
++ * SMI mode (complete with special memory map), and the OS is unaware.
++ *
++ * Although certain hardware-inducing latencies are necessary (for example,
++ * a modern system often requires an SMI handler for correct thermal control
++ * and remote management) they can wreak havoc upon any OS-level performance
++ * guarantees toward low-latency, especially when the OS is not even made
++ * aware of the presence of these interrupts. For this reason, we need a
++ * somewhat brute force mechanism to detect these interrupts. In this case,
++ * we do it by hogging all of the CPU(s) for configurable timer intervals,
++ * sampling the built-in CPU timer, looking for discontiguous readings.
++ *
++ * WARNING: This implementation necessarily introduces latencies. Therefore,
++ * you should NEVER use this module in a production environment
++ * requiring any kind of low-latency performance guarantee(s).
++ *
++ * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
++ *
++ * Includes useful feedback from Clark Williams <clark@redhat.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/ring_buffer.h>
++#include <linux/stop_machine.h>
++#include <linux/time.h>
++#include <linux/hrtimer.h>
++#include <linux/kthread.h>
++#include <linux/debugfs.h>
++#include <linux/seq_file.h>
++#include <linux/uaccess.h>
++#include <linux/version.h>
++#include <linux/delay.h>
++#include <linux/slab.h>
++
++#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
++#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
++#define U64STR_SIZE 22 /* 20 digits max */
++
++#define VERSION "1.0.0"
++#define BANNER "hwlat_detector: "
++#define DRVNAME "hwlat_detector"
++#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
++#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
++#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
++
++/* Module metadata */
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
++MODULE_DESCRIPTION("A simple hardware latency detector");
++MODULE_VERSION(VERSION);
++
++/* Module parameters */
++
++static int debug;
++static int enabled;
++static int threshold;
++
++module_param(debug, int, 0); /* enable debug */
++module_param(enabled, int, 0); /* enable detector */
++module_param(threshold, int, 0); /* latency threshold */
++
++/* Buffering and sampling */
++
++static struct ring_buffer *ring_buffer; /* sample buffer */
++static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
++static unsigned long buf_size = BUF_SIZE_DEFAULT;
++static struct task_struct *kthread; /* sampling thread */
++
++/* DebugFS filesystem entries */
++
++static struct dentry *debug_dir; /* debugfs directory */
++static struct dentry *debug_max; /* maximum TSC delta */
++static struct dentry *debug_count; /* total detect count */
++static struct dentry *debug_sample_width; /* sample width us */
++static struct dentry *debug_sample_window; /* sample window us */
++static struct dentry *debug_sample; /* raw samples us */
++static struct dentry *debug_threshold; /* threshold us */
++static struct dentry *debug_enable; /* enable/disable */
++
++/* Individual samples and global state */
++
++struct sample; /* latency sample */
++struct data; /* Global state */
++
++/* Sampling functions */
++static int __buffer_add_sample(struct sample *sample);
++static struct sample *buffer_get_sample(struct sample *sample);
++static int get_sample(void *unused);
++
++/* Threading and state */
++static int kthread_fn(void *unused);
++static int start_kthread(void);
++static int stop_kthread(void);
++static void __reset_stats(void);
++static int init_stats(void);
++
++/* Debugfs interface */
++static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos, const u64 *entry);
++static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
++ size_t cnt, loff_t *ppos, u64 *entry);
++static int debug_sample_fopen(struct inode *inode, struct file *filp);
++static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos);
++static int debug_sample_release(struct inode *inode, struct file *filp);
++static int debug_enable_fopen(struct inode *inode, struct file *filp);
++static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos);
++static ssize_t debug_enable_fwrite(struct file *file,
++ const char __user *user_buffer,
++ size_t user_size, loff_t *offset);
++
++/* Initialization functions */
++static int init_debugfs(void);
++static void free_debugfs(void);
++static int detector_init(void);
++static void detector_exit(void);
++
++/* Individual latency samples are stored here when detected and packed into
++ * the ring_buffer circular buffer, where they are overwritten when
++ * more than buf_size/sizeof(sample) samples are received. */
++struct sample {
++ u64 seqnum; /* unique sequence */
++ u64 duration; /* ktime delta */
++ struct timespec timestamp; /* wall time */
++ unsigned long lost;
++};
++
++/* keep the global state somewhere. Mostly used under stop_machine. */
++static struct data {
++
++ struct mutex lock; /* protect changes */
++
++ u64 count; /* total since reset */
++ u64 max_sample; /* max hardware latency */
++ u64 threshold; /* sample threshold level */
++
++ u64 sample_window; /* total sampling window (on+off) */
++ u64 sample_width; /* active sampling portion of window */
++
++ atomic_t sample_open; /* whether the sample file is open */
++
++ wait_queue_head_t wq; /* waitqeue for new sample values */
++
++} data;
++
++/**
++ * __buffer_add_sample - add a new latency sample recording to the ring buffer
++ * @sample: The new latency sample value
++ *
++ * This receives a new latency sample and records it in a global ring buffer.
++ * No additional locking is used in this case - suited for stop_machine use.
++ */
++static int __buffer_add_sample(struct sample *sample)
++{
++ return ring_buffer_write(ring_buffer,
++ sizeof(struct sample), sample);
++}
++
++/**
++ * buffer_get_sample - remove a hardware latency sample from the ring buffer
++ * @sample: Pre-allocated storage for the sample
++ *
++ * This retrieves a hardware latency sample from the global circular buffer
++ */
++static struct sample *buffer_get_sample(struct sample *sample)
++{
++ struct ring_buffer_event *e = NULL;
++ struct sample *s = NULL;
++ unsigned int cpu = 0;
++
++ if (!sample)
++ return NULL;
++
++ mutex_lock(&ring_buffer_mutex);
++ for_each_online_cpu(cpu) {
++ e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
++ if (e)
++ break;
++ }
++
++ if (e) {
++ s = ring_buffer_event_data(e);
++ memcpy(sample, s, sizeof(struct sample));
++ } else
++ sample = NULL;
++ mutex_unlock(&ring_buffer_mutex);
++
++ return sample;
++}
++
++/**
++ * get_sample - sample the CPU TSC and look for likely hardware latencies
++ * @unused: This is not used but is a part of the stop_machine API
++ *
++ * Used to repeatedly capture the CPU TSC (or similar), looking for potential
++ * hardware-induced latency. Called under stop_machine, with data.lock held.
++ */
++static int get_sample(void *unused)
++{
++ ktime_t start, t1, t2;
++ s64 diff, total = 0;
++ u64 sample = 0;
++ int ret = 1;
++
++ start = ktime_get(); /* start timestamp */
++
++ do {
++
++ t1 = ktime_get(); /* we'll look for a discontinuity */
++ t2 = ktime_get();
++
++ total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
++ diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */
++
++ /* This shouldn't happen */
++ if (diff < 0) {
++ printk(KERN_ERR BANNER "time running backwards\n");
++ goto out;
++ }
++
++ if (diff > sample)
++ sample = diff; /* only want highest value */
++
++ } while (total <= data.sample_width);
++
++ /* If we exceed the threshold value, we have found a hardware latency */
++ if (sample > data.threshold) {
++ struct sample s;
++
++ data.count++;
++ s.seqnum = data.count;
++ s.duration = sample;
++ s.timestamp = CURRENT_TIME;
++ __buffer_add_sample(&s);
++
++ /* Keep a running maximum ever recorded hardware latency */
++ if (sample > data.max_sample)
++ data.max_sample = sample;
++ }
++
++ ret = 0;
++out:
++ return ret;
++}
++
++/*
++ * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
++ * @unused: A required part of the kthread API.
++ *
++ * Used to periodically sample the CPU TSC via a call to get_sample. We
++ * use stop_machine, whith does (intentionally) introduce latency since we
++ * need to ensure nothing else might be running (and thus pre-empting).
++ * Obviously this should never be used in production environments.
++ *
++ * stop_machine will schedule us typically only on CPU0 which is fine for
++ * almost every real-world hardware latency situation - but we might later
++ * generalize this if we find there are any actualy systems with alternate
++ * SMI delivery or other non CPU0 hardware latencies.
++ */
++static int kthread_fn(void *unused)
++{
++ int err = 0;
++ u64 interval = 0;
++
++ while (!kthread_should_stop()) {
++
++ mutex_lock(&data.lock);
++
++ err = stop_machine(get_sample, unused, 0);
++ if (err) {
++ /* Houston, we have a problem */
++ mutex_unlock(&data.lock);
++ goto err_out;
++ }
++
++ wake_up(&data.wq); /* wake up reader(s) */
++
++ interval = data.sample_window - data.sample_width;
++ do_div(interval, USEC_PER_MSEC); /* modifies interval value */
++
++ mutex_unlock(&data.lock);
++
++ if (msleep_interruptible(interval))
++ goto out;
++ }
++ goto out;
++err_out:
++ printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
++ enabled = 0;
++out:
++ return err;
++
++}
++
++/**
++ * start_kthread - Kick off the hardware latency sampling/detector kthread
++ *
++ * This starts a kernel thread that will sit and sample the CPU timestamp
++ * counter (TSC or similar) and look for potential hardware latencies.
++ */
++static int start_kthread(void)
++{
++ kthread = kthread_run(kthread_fn, NULL,
++ DRVNAME);
++ if (IS_ERR(kthread)) {
++ printk(KERN_ERR BANNER "could not start sampling thread\n");
++ enabled = 0;
++ return -ENOMEM;
++ }
++
++ return 0;
++}
++
++/**
++ * stop_kthread - Inform the hardware latency samping/detector kthread to stop
++ *
++ * This kicks the running hardware latency sampling/detector kernel thread and
++ * tells it to stop sampling now. Use this on unload and at system shutdown.
++ */
++static int stop_kthread(void)
++{
++ int ret;
++
++ ret = kthread_stop(kthread);
++
++ return ret;
++}
++
++/**
++ * __reset_stats - Reset statistics for the hardware latency detector
++ *
++ * We use data to store various statistics and global state. We call this
++ * function in order to reset those when "enable" is toggled on or off, and
++ * also at initialization. Should be called with data.lock held.
++ */
++static void __reset_stats(void)
++{
++ data.count = 0;
++ data.max_sample = 0;
++ ring_buffer_reset(ring_buffer); /* flush out old sample entries */
++}
++
++/**
++ * init_stats - Setup global state statistics for the hardware latency detector
++ *
++ * We use data to store various statistics and global state. We also use
++ * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
++ * induced system latencies. This function initializes these structures and
++ * allocates the global ring buffer also.
++ */
++static int init_stats(void)
++{
++ int ret = -ENOMEM;
++
++ mutex_init(&data.lock);
++ init_waitqueue_head(&data.wq);
++ atomic_set(&data.sample_open, 0);
++
++ ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
++
++ if (WARN(!ring_buffer, KERN_ERR BANNER
++ "failed to allocate ring buffer!\n"))
++ goto out;
++
++ __reset_stats();
++ data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */
++ data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
++ data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
++
++ ret = 0;
++
++out:
++ return ret;
++
++}
++
++/*
++ * simple_data_read - Wrapper read function for global state debugfs entries
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ * @entry: The entry to read from
++ *
++ * This function provides a generic read implementation for the global state
++ * "data" structure debugfs filesystem entries. It would be nice to use
++ * simple_attr_read directly, but we need to make sure that the data.lock
++ * spinlock is held during the actual read (even though we likely won't ever
++ * actually race here as the updater runs under a stop_machine context).
++ */
++static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos, const u64 *entry)
++{
++ char buf[U64STR_SIZE];
++ u64 val = 0;
++ int len = 0;
++
++ memset(buf, 0, sizeof(buf));
++
++ if (!entry)
++ return -EFAULT;
++
++ mutex_lock(&data.lock);
++ val = *entry;
++ mutex_unlock(&data.lock);
++
++ len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
++
++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
++
++}
++
++/*
++ * simple_data_write - Wrapper write function for global state debugfs entries
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to write value from
++ * @cnt: The maximum number of bytes to write
++ * @ppos: The current "file" position
++ * @entry: The entry to write to
++ *
++ * This function provides a generic write implementation for the global state
++ * "data" structure debugfs filesystem entries. It would be nice to use
++ * simple_attr_write directly, but we need to make sure that the data.lock
++ * spinlock is held during the actual write (even though we likely won't ever
++ * actually race here as the updater runs under a stop_machine context).
++ */
++static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
++ size_t cnt, loff_t *ppos, u64 *entry)
++{
++ char buf[U64STR_SIZE];
++ int csize = min(cnt, sizeof(buf));
++ u64 val = 0;
++ int err = 0;
++
++ memset(buf, '\0', sizeof(buf));
++ if (copy_from_user(buf, ubuf, csize))
++ return -EFAULT;
++
++ buf[U64STR_SIZE-1] = '\0'; /* just in case */
++ err = strict_strtoull(buf, 10, &val);
++ if (err)
++ return -EINVAL;
++
++ mutex_lock(&data.lock);
++ *entry = val;
++ mutex_unlock(&data.lock);
++
++ return csize;
++}
++
++/**
++ * debug_count_fopen - Open function for "count" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "count" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_count_fopen(struct inode *inode, struct file *filp)
++{
++ return 0;
++}
++
++/**
++ * debug_count_fread - Read function for "count" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "count" debugfs
++ * interface to the hardware latency detector. Can be used to read the
++ * number of latency readings exceeding the configured threshold since
++ * the detector was last reset (e.g. by writing a zero into "count").
++ */
++static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos)
++{
++ return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
++}
++
++/**
++ * debug_count_fwrite - Write function for "count" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "count" debugfs
++ * interface to the hardware latency detector. Can be used to write a
++ * desired value, especially to zero the total count.
++ */
++static ssize_t debug_count_fwrite(struct file *filp,
++ const char __user *ubuf,
++ size_t cnt,
++ loff_t *ppos)
++{
++ return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
++}
++
++/**
++ * debug_enable_fopen - Dummy open function for "enable" debugfs interface
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "enable" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_enable_fopen(struct inode *inode, struct file *filp)
++{
++ return 0;
++}
++
++/**
++ * debug_enable_fread - Read function for "enable" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "enable" debugfs
++ * interface to the hardware latency detector. Can be used to determine
++ * whether the detector is currently enabled ("0\n" or "1\n" returned).
++ */
++static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos)
++{
++ char buf[4];
++
++ if ((cnt < sizeof(buf)) || (*ppos))
++ return 0;
++
++ buf[0] = enabled ? '1' : '0';
++ buf[1] = '\n';
++ buf[2] = '\0';
++ if (copy_to_user(ubuf, buf, strlen(buf)))
++ return -EFAULT;
++ return *ppos = strlen(buf);
++}
++
++/**
++ * debug_enable_fwrite - Write function for "enable" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "enable" debugfs
++ * interface to the hardware latency detector. Can be used to enable or
++ * disable the detector, which will have the side-effect of possibly
++ * also resetting the global stats and kicking off the measuring
++ * kthread (on an enable) or the converse (upon a disable).
++ */
++static ssize_t debug_enable_fwrite(struct file *filp,
++ const char __user *ubuf,
++ size_t cnt,
++ loff_t *ppos)
++{
++ char buf[4];
++ int csize = min(cnt, sizeof(buf));
++ long val = 0;
++ int err = 0;
++
++ memset(buf, '\0', sizeof(buf));
++ if (copy_from_user(buf, ubuf, csize))
++ return -EFAULT;
++
++ buf[sizeof(buf)-1] = '\0'; /* just in case */
++ err = strict_strtoul(buf, 10, &val);
++ if (0 != err)
++ return -EINVAL;
++
++ if (val) {
++ if (enabled)
++ goto unlock;
++ enabled = 1;
++ __reset_stats();
++ if (start_kthread())
++ return -EFAULT;
++ } else {
++ if (!enabled)
++ goto unlock;
++ enabled = 0;
++ err = stop_kthread();
++ if (err) {
++ printk(KERN_ERR BANNER "cannot stop kthread\n");
++ return -EFAULT;
++ }
++ wake_up(&data.wq); /* reader(s) should return */
++ }
++unlock:
++ return csize;
++}
++
++/**
++ * debug_max_fopen - Open function for "max" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "max" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_max_fopen(struct inode *inode, struct file *filp)
++{
++ return 0;
++}
++
++/**
++ * debug_max_fread - Read function for "max" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "max" debugfs
++ * interface to the hardware latency detector. Can be used to determine
++ * the maximum latency value observed since it was last reset.
++ */
++static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos)
++{
++ return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
++}
++
++/**
++ * debug_max_fwrite - Write function for "max" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "max" debugfs
++ * interface to the hardware latency detector. Can be used to reset the
++ * maximum or set it to some other desired value - if, then, subsequent
++ * measurements exceed this value, the maximum will be updated.
++ */
++static ssize_t debug_max_fwrite(struct file *filp,
++ const char __user *ubuf,
++ size_t cnt,
++ loff_t *ppos)
++{
++ return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
++}
++
++
++/**
++ * debug_sample_fopen - An open function for "sample" debugfs interface
++ * @inode: The in-kernel inode representation of this debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function handles opening the "sample" file within the hardware
++ * latency detector debugfs directory interface. This file is used to read
++ * raw samples from the global ring_buffer and allows the user to see a
++ * running latency history. Can be opened blocking or non-blocking,
++ * affecting whether it behaves as a buffer read pipe, or does not.
++ * Implements simple locking to prevent multiple simultaneous use.
++ */
++static int debug_sample_fopen(struct inode *inode, struct file *filp)
++{
++ if (!atomic_add_unless(&data.sample_open, 1, 1))
++ return -EBUSY;
++ else
++ return 0;
++}
++
++/**
++ * debug_sample_fread - A read function for "sample" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that will contain the samples read
++ * @cnt: The maximum bytes to read from the debugfs "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function handles reading from the "sample" file within the hardware
++ * latency detector debugfs directory interface. This file is used to read
++ * raw samples from the global ring_buffer and allows the user to see a
++ * running latency history. By default this will block pending a new
++ * value written into the sample buffer, unless there are already a
++ * number of value(s) waiting in the buffer, or the sample file was
++ * previously opened in a non-blocking mode of operation.
++ */
++static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos)
++{
++ int len = 0;
++ char buf[64];
++ struct sample *sample = NULL;
++
++ if (!enabled)
++ return 0;
++
++ sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
++ if (!sample)
++ return -ENOMEM;
++
++ while (!buffer_get_sample(sample)) {
++
++ DEFINE_WAIT(wait);
++
++ if (filp->f_flags & O_NONBLOCK) {
++ len = -EAGAIN;
++ goto out;
++ }
++
++ prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
++ schedule();
++ finish_wait(&data.wq, &wait);
++
++ if (signal_pending(current)) {
++ len = -EINTR;
++ goto out;
++ }
++
++ if (!enabled) { /* enable was toggled */
++ len = 0;
++ goto out;
++ }
++ }
++
++ len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
++ sample->timestamp.tv_sec,
++ sample->timestamp.tv_nsec,
++ sample->duration);
++
++
++ /* handling partial reads is more trouble than it's worth */
++ if (len > cnt)
++ goto out;
++
++ if (copy_to_user(ubuf, buf, len))
++ len = -EFAULT;
++
++out:
++ kfree(sample);
++ return len;
++}
++
++/**
++ * debug_sample_release - Release function for "sample" debugfs interface
++ * @inode: The in-kernel inode represenation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function completes the close of the debugfs interface "sample" file.
++ * Frees the sample_open "lock" so that other users may open the interface.
++ */
++static int debug_sample_release(struct inode *inode, struct file *filp)
++{
++ atomic_dec(&data.sample_open);
++
++ return 0;
++}
++
++/**
++ * debug_threshold_fopen - Open function for "threshold" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "threshold" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_threshold_fopen(struct inode *inode, struct file *filp)
++{
++ return 0;
++}
++
++/**
++ * debug_threshold_fread - Read function for "threshold" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "threshold" debugfs
++ * interface to the hardware latency detector. It can be used to determine
++ * the current threshold level at which a latency will be recorded in the
++ * global ring buffer, typically on the order of 10us.
++ */
++static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos)
++{
++ return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
++}
++
++/**
++ * debug_threshold_fwrite - Write function for "threshold" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "threshold" debugfs
++ * interface to the hardware latency detector. It can be used to configure
++ * the threshold level at which any subsequently detected latencies will
++ * be recorded into the global ring buffer.
++ */
++static ssize_t debug_threshold_fwrite(struct file *filp,
++ const char __user *ubuf,
++ size_t cnt,
++ loff_t *ppos)
++{
++ int ret;
++
++ ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
++
++ if (enabled)
++ wake_up_process(kthread);
++
++ return ret;
++}
++
++/**
++ * debug_width_fopen - Open function for "width" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "width" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_width_fopen(struct inode *inode, struct file *filp)
++{
++ return 0;
++}
++
++/**
++ * debug_width_fread - Read function for "width" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "width" debugfs
++ * interface to the hardware latency detector. It can be used to determine
++ * for how many us of the total window us we will actively sample for any
++ * hardware-induced latecy periods. Obviously, it is not possible to
++ * sample constantly and have the system respond to a sample reader, or,
++ * worse, without having the system appear to have gone out to lunch.
++ */
++static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos)
++{
++ return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
++}
++
++/**
++ * debug_width_fwrite - Write function for "width" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "width" debugfs
++ * interface to the hardware latency detector. It can be used to configure
++ * for how many us of the total window us we will actively sample for any
++ * hardware-induced latency periods. Obviously, it is not possible to
++ * sample constantly and have the system respond to a sample reader, or,
++ * worse, without having the system appear to have gone out to lunch. It
++ * is enforced that width is less that the total window size.
++ */
++static ssize_t debug_width_fwrite(struct file *filp,
++ const char __user *ubuf,
++ size_t cnt,
++ loff_t *ppos)
++{
++ char buf[U64STR_SIZE];
++ int csize = min(cnt, sizeof(buf));
++ u64 val = 0;
++ int err = 0;
++
++ memset(buf, '\0', sizeof(buf));
++ if (copy_from_user(buf, ubuf, csize))
++ return -EFAULT;
++
++ buf[U64STR_SIZE-1] = '\0'; /* just in case */
++ err = strict_strtoull(buf, 10, &val);
++ if (0 != err)
++ return -EINVAL;
++
++ mutex_lock(&data.lock);
++ if (val < data.sample_window)
++ data.sample_width = val;
++ else {
++ mutex_unlock(&data.lock);
++ return -EINVAL;
++ }
++ mutex_unlock(&data.lock);
++
++ if (enabled)
++ wake_up_process(kthread);
++
++ return csize;
++}
++
++/**
++ * debug_window_fopen - Open function for "window" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "window" debugfs
++ * interface to the hardware latency detector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs.
++ */
++static int debug_window_fopen(struct inode *inode, struct file *filp)
++{
++ return 0;
++}
++
++/**
++ * debug_window_fread - Read function for "window" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "window" debugfs
++ * interface to the hardware latency detector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs. Can be used to read the total window size.
++ */
++static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
++ size_t cnt, loff_t *ppos)
++{
++ return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
++}
++
++/**
++ * debug_window_fwrite - Write function for "window" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "window" debufds
++ * interface to the hardware latency detetector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs. Can be used to write a new total window size. It
++ * is enfoced that any value written must be greater than the sample width
++ * size, or an error results.
++ */
++static ssize_t debug_window_fwrite(struct file *filp,
++ const char __user *ubuf,
++ size_t cnt,
++ loff_t *ppos)
++{
++ char buf[U64STR_SIZE];
++ int csize = min(cnt, sizeof(buf));
++ u64 val = 0;
++ int err = 0;
++
++ memset(buf, '\0', sizeof(buf));
++ if (copy_from_user(buf, ubuf, csize))
++ return -EFAULT;
++
++ buf[U64STR_SIZE-1] = '\0'; /* just in case */
++ err = strict_strtoull(buf, 10, &val);
++ if (0 != err)
++ return -EINVAL;
++
++ mutex_lock(&data.lock);
++ if (data.sample_width < val)
++ data.sample_window = val;
++ else {
++ mutex_unlock(&data.lock);
++ return -EINVAL;
++ }
++ mutex_unlock(&data.lock);
++
++ return csize;
++}
++
++/*
++ * Function pointers for the "count" debugfs file operations
++ */
++static const struct file_operations count_fops = {
++ .open = debug_count_fopen,
++ .read = debug_count_fread,
++ .write = debug_count_fwrite,
++ .owner = THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "enable" debugfs file operations
++ */
++static const struct file_operations enable_fops = {
++ .open = debug_enable_fopen,
++ .read = debug_enable_fread,
++ .write = debug_enable_fwrite,
++ .owner = THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "max" debugfs file operations
++ */
++static const struct file_operations max_fops = {
++ .open = debug_max_fopen,
++ .read = debug_max_fread,
++ .write = debug_max_fwrite,
++ .owner = THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "sample" debugfs file operations
++ */
++static const struct file_operations sample_fops = {
++ .open = debug_sample_fopen,
++ .read = debug_sample_fread,
++ .release = debug_sample_release,
++ .owner = THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "threshold" debugfs file operations
++ */
++static const struct file_operations threshold_fops = {
++ .open = debug_threshold_fopen,
++ .read = debug_threshold_fread,
++ .write = debug_threshold_fwrite,
++ .owner = THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "width" debugfs file operations
++ */
++static const struct file_operations width_fops = {
++ .open = debug_width_fopen,
++ .read = debug_width_fread,
++ .write = debug_width_fwrite,
++ .owner = THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "window" debugfs file operations
++ */
++static const struct file_operations window_fops = {
++ .open = debug_window_fopen,
++ .read = debug_window_fread,
++ .write = debug_window_fwrite,
++ .owner = THIS_MODULE,
++};
++
++/**
++ * init_debugfs - A function to initialize the debugfs interface files
++ *
++ * This function creates entries in debugfs for "hwlat_detector", including
++ * files to read values from the detector, current samples, and the
++ * maximum sample that has been captured since the hardware latency
++ * dectector was started.
++ */
++static int init_debugfs(void)
++{
++ int ret = -ENOMEM;
++
++ debug_dir = debugfs_create_dir(DRVNAME, NULL);
++ if (!debug_dir)
++ goto err_debug_dir;
++
++ debug_sample = debugfs_create_file("sample", 0444,
++ debug_dir, NULL,
++ &sample_fops);
++ if (!debug_sample)
++ goto err_sample;
++
++ debug_count = debugfs_create_file("count", 0444,
++ debug_dir, NULL,
++ &count_fops);
++ if (!debug_count)
++ goto err_count;
++
++ debug_max = debugfs_create_file("max", 0444,
++ debug_dir, NULL,
++ &max_fops);
++ if (!debug_max)
++ goto err_max;
++
++ debug_sample_window = debugfs_create_file("window", 0644,
++ debug_dir, NULL,
++ &window_fops);
++ if (!debug_sample_window)
++ goto err_window;
++
++ debug_sample_width = debugfs_create_file("width", 0644,
++ debug_dir, NULL,
++ &width_fops);
++ if (!debug_sample_width)
++ goto err_width;
++
++ debug_threshold = debugfs_create_file("threshold", 0644,
++ debug_dir, NULL,
++ &threshold_fops);
++ if (!debug_threshold)
++ goto err_threshold;
++
++ debug_enable = debugfs_create_file("enable", 0644,
++ debug_dir, &enabled,
++ &enable_fops);
++ if (!debug_enable)
++ goto err_enable;
++
++ else {
++ ret = 0;
++ goto out;
++ }
++
++err_enable:
++ debugfs_remove(debug_threshold);
++err_threshold:
++ debugfs_remove(debug_sample_width);
++err_width:
++ debugfs_remove(debug_sample_window);
++err_window:
++ debugfs_remove(debug_max);
++err_max:
++ debugfs_remove(debug_count);
++err_count:
++ debugfs_remove(debug_sample);
++err_sample:
++ debugfs_remove(debug_dir);
++err_debug_dir:
++out:
++ return ret;
++}
++
++/**
++ * free_debugfs - A function to cleanup the debugfs file interface
++ */
++static void free_debugfs(void)
++{
++ /* could also use a debugfs_remove_recursive */
++ debugfs_remove(debug_enable);
++ debugfs_remove(debug_threshold);
++ debugfs_remove(debug_sample_width);
++ debugfs_remove(debug_sample_window);
++ debugfs_remove(debug_max);
++ debugfs_remove(debug_count);
++ debugfs_remove(debug_sample);
++ debugfs_remove(debug_dir);
++}
++
++/**
++ * detector_init - Standard module initialization code
++ */
++static int detector_init(void)
++{
++ int ret = -ENOMEM;
++
++ printk(KERN_INFO BANNER "version %s\n", VERSION);
++
++ ret = init_stats();
++ if (0 != ret)
++ goto out;
++
++ ret = init_debugfs();
++ if (0 != ret)
++ goto err_stats;
++
++ if (enabled)
++ ret = start_kthread();
++
++ goto out;
++
++err_stats:
++ ring_buffer_free(ring_buffer);
++out:
++ return ret;
++
++}
++
++/**
++ * detector_exit - Standard module cleanup code
++ */
++static void detector_exit(void)
++{
++ int err;
++
++ if (enabled) {
++ enabled = 0;
++ err = stop_kthread();
++ if (err)
++ printk(KERN_ERR BANNER "cannot stop kthread\n");
++ }
++
++ free_debugfs();
++ ring_buffer_free(ring_buffer); /* free up the ring buffer */
++
++}
++
++module_init(detector_init);
++module_exit(detector_exit);
diff --git a/patches/i2c-omap-drop-the-lock-hard-irq-context.patch b/patches/i2c-omap-drop-the-lock-hard-irq-context.patch
new file mode 100644
index 0000000..fe35c0a
--- /dev/null
+++ b/patches/i2c-omap-drop-the-lock-hard-irq-context.patch
@@ -0,0 +1,34 @@
+From 5145351047b216cca13aaca99f939a9a594c6c4d Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 21 Mar 2013 11:35:49 +0100
+Subject: [PATCH 2/3] i2c/omap: drop the lock hard irq context
+
+The lock is taken while reading two registers. On RT the first lock is
+taken in hard irq where it might sleep and in the threaded irq.
+The threaded irq runs in oneshot mode so the hard irq does not run until
+the thread the completes so there is no reason to grab the lock.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/i2c/busses/i2c-omap.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-omap.c
++++ b/drivers/i2c/busses/i2c-omap.c
+@@ -881,15 +881,12 @@ omap_i2c_isr(int irq, void *dev_id)
+ u16 mask;
+ u16 stat;
+
+- spin_lock(&dev->lock);
+- mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
+ stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
++ mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
+
+ if (stat & mask)
+ ret = IRQ_WAKE_THREAD;
+
+- spin_unlock(&dev->lock);
+-
+ return ret;
+ }
+
diff --git a/patches/ide-use-nort-local-irq-variants.patch b/patches/ide-use-nort-local-irq-variants.patch
new file mode 100644
index 0000000..21d4299
--- /dev/null
+++ b/patches/ide-use-nort-local-irq-variants.patch
@@ -0,0 +1,169 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:16 -0500
+Subject: ide: Do not disable interrupts for PREEMPT-RT
+
+Use the local_irq_*_nort variants.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/ide/alim15x3.c | 4 ++--
+ drivers/ide/hpt366.c | 4 ++--
+ drivers/ide/ide-io-std.c | 8 ++++----
+ drivers/ide/ide-io.c | 2 +-
+ drivers/ide/ide-iops.c | 4 ++--
+ drivers/ide/ide-probe.c | 4 ++--
+ drivers/ide/ide-taskfile.c | 6 +++---
+ 7 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/drivers/ide/alim15x3.c
++++ b/drivers/ide/alim15x3.c
+@@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct p
+
+ isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+
+ if (m5229_revision < 0xC2) {
+ /*
+@@ -325,7 +325,7 @@ out:
+ }
+ pci_dev_put(north);
+ pci_dev_put(isa_dev);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ return 0;
+ }
+
+--- a/drivers/ide/hpt366.c
++++ b/drivers/ide/hpt366.c
+@@ -1241,7 +1241,7 @@ static int init_dma_hpt366(ide_hwif_t *h
+
+ dma_old = inb(base + 2);
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+
+ dma_new = dma_old;
+ pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
+@@ -1252,7 +1252,7 @@ static int init_dma_hpt366(ide_hwif_t *h
+ if (dma_new != dma_old)
+ outb(dma_new, base + 2);
+
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
+ hwif->name, base, base + 7);
+--- a/drivers/ide/ide-io-std.c
++++ b/drivers/ide/ide-io-std.c
+@@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive,
+ unsigned long uninitialized_var(flags);
+
+ if ((io_32bit & 2) && !mmio) {
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ ata_vlb_sync(io_ports->nsect_addr);
+ }
+
+@@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive,
+ insl(data_addr, buf, words);
+
+ if ((io_32bit & 2) && !mmio)
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ if (((len + 1) & 3) < 2)
+ return;
+@@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive,
+ unsigned long uninitialized_var(flags);
+
+ if ((io_32bit & 2) && !mmio) {
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ ata_vlb_sync(io_ports->nsect_addr);
+ }
+
+@@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive,
+ outsl(data_addr, buf, words);
+
+ if ((io_32bit & 2) && !mmio)
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ if (((len + 1) & 3) < 2)
+ return;
+--- a/drivers/ide/ide-io.c
++++ b/drivers/ide/ide-io.c
+@@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long dat
+ /* disable_irq_nosync ?? */
+ disable_irq(hwif->irq);
+ /* local CPU only, as if we were handling an interrupt */
+- local_irq_disable();
++ local_irq_disable_nort();
+ if (hwif->polling) {
+ startstop = handler(drive);
+ } else if (drive_is_ready(drive)) {
+--- a/drivers/ide/ide-iops.c
++++ b/drivers/ide/ide-iops.c
+@@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive,
+ if ((stat & ATA_BUSY) == 0)
+ break;
+
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ *rstat = stat;
+ return -EBUSY;
+ }
+ }
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ }
+ /*
+ * Allow status to settle, then read it again.
+--- a/drivers/ide/ide-probe.c
++++ b/drivers/ide/ide-probe.c
+@@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *dri
+ int bswap = 1;
+
+ /* local CPU only; some systems need this */
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ /* read 512 bytes of id info */
+ hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ drive->dev_flags |= IDE_DFLAG_ID_READ;
+ #ifdef DEBUG
+--- a/drivers/ide/ide-taskfile.c
++++ b/drivers/ide/ide-taskfile.c
+@@ -251,7 +251,7 @@ void ide_pio_bytes(ide_drive_t *drive, s
+
+ page_is_high = PageHighMem(page);
+ if (page_is_high)
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+
+ buf = kmap_atomic(page) + offset;
+
+@@ -272,7 +272,7 @@ void ide_pio_bytes(ide_drive_t *drive, s
+ kunmap_atomic(buf);
+
+ if (page_is_high)
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ len -= nr_bytes;
+ }
+@@ -415,7 +415,7 @@ static ide_startstop_t pre_task_out_intr
+ }
+
+ if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
+- local_irq_disable();
++ local_irq_disable_nort();
+
+ ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
+
diff --git a/patches/idle-state.patch b/patches/idle-state.patch
new file mode 100644
index 0000000..4f1255c
--- /dev/null
+++ b/patches/idle-state.patch
@@ -0,0 +1,19 @@
+Subject: sched: Init idle->on_rq in init_idle()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 23:03:29 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4941,6 +4941,7 @@ void __cpuinit init_idle(struct task_str
+ rcu_read_unlock();
+
+ rq->curr = rq->idle = idle;
++ idle->on_rq = 1;
+ #if defined(CONFIG_SMP)
+ idle->on_cpu = 1;
+ #endif
diff --git a/patches/infiniband-mellanox-ib-use-nort-irq.patch b/patches/infiniband-mellanox-ib-use-nort-irq.patch
new file mode 100644
index 0000000..8b2c7fd
--- /dev/null
+++ b/patches/infiniband-mellanox-ib-use-nort-irq.patch
@@ -0,0 +1,40 @@
+From: Sven-Thorsten Dietrich <sdietrich@novell.com>
+Date: Fri, 3 Jul 2009 08:30:35 -0500
+Subject: infiniband: Mellanox IB driver patch use _nort() primitives
+
+Fixes in_atomic stack-dump, when Mellanox module is loaded into the RT
+Kernel.
+
+Michael S. Tsirkin <mst@dev.mellanox.co.il> sayeth:
+"Basically, if you just make spin_lock_irqsave (and spin_lock_irq) not disable
+interrupts for non-raw spinlocks, I think all of infiniband will be fine without
+changes."
+
+Signed-off-by: Sven-Thorsten Dietrich <sven@thebigcorporation.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+@@ -783,7 +783,7 @@ void ipoib_mcast_restart_task(struct wor
+
+ ipoib_mcast_stop_thread(dev, 0);
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ netif_addr_lock(dev);
+ spin_lock(&priv->lock);
+
+@@ -865,7 +865,7 @@ void ipoib_mcast_restart_task(struct wor
+
+ spin_unlock(&priv->lock);
+ netif_addr_unlock(dev);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ /* We have to cancel outside of the spinlock */
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
diff --git a/patches/inpt-gameport-use-local-irq-nort.patch b/patches/inpt-gameport-use-local-irq-nort.patch
new file mode 100644
index 0000000..fc11de2
--- /dev/null
+++ b/patches/inpt-gameport-use-local-irq-nort.patch
@@ -0,0 +1,44 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:16 -0500
+Subject: input: gameport: Do not disable interrupts on PREEMPT_RT
+
+Use the _nort() primitives.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/input/gameport/gameport.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/input/gameport/gameport.c
++++ b/drivers/input/gameport/gameport.c
+@@ -87,12 +87,12 @@ static int gameport_measure_speed(struct
+ tx = 1 << 30;
+
+ for(i = 0; i < 50; i++) {
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ GET_TIME(t1);
+ for (t = 0; t < 50; t++) gameport_read(gameport);
+ GET_TIME(t2);
+ GET_TIME(t3);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ udelay(i * 10);
+ if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
+ }
+@@ -111,11 +111,11 @@ static int gameport_measure_speed(struct
+ tx = 1 << 30;
+
+ for(i = 0; i < 50; i++) {
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ rdtscl(t1);
+ for (t = 0; t < 50; t++) gameport_read(gameport);
+ rdtscl(t2);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ udelay(i * 10);
+ if (t2 - t1 < tx) tx = t2 - t1;
+ }
diff --git a/patches/intel_idle-convert-i7300_idle_lock-to-raw-spinlock.patch b/patches/intel_idle-convert-i7300_idle_lock-to-raw-spinlock.patch
new file mode 100644
index 0000000..2d46f5d
--- /dev/null
+++ b/patches/intel_idle-convert-i7300_idle_lock-to-raw-spinlock.patch
@@ -0,0 +1,67 @@
+Subject: intel_idle: Convert i7300_idle_lock to raw spinlock
+From: Mike Galbraith <efault@gmx.de>
+Date: Wed, 07 Dec 2011 12:48:42 +0100
+
+24 core Intel box's first exposure to 3.0.12-rt30-rc3 didn't go well.
+
+[ 27.104159] i7300_idle: loaded v1.55
+[ 27.104192] BUG: scheduling while atomic: swapper/2/0/0x00000002
+[ 27.104309] Pid: 0, comm: swapper/2 Tainted: G N 3.0.12-rt30-rc3-rt #1
+[ 27.104317] Call Trace:
+[ 27.104338] [<ffffffff810046a5>] dump_trace+0x85/0x2e0
+[ 27.104372] [<ffffffff8144eb00>] thread_return+0x12b/0x30b
+[ 27.104381] [<ffffffff8144f1b9>] schedule+0x29/0xb0
+[ 27.104389] [<ffffffff814506e5>] rt_spin_lock_slowlock+0xc5/0x240
+[ 27.104401] [<ffffffffa01f818f>] i7300_idle_notifier+0x3f/0x360 [i7300_idle]
+[ 27.104415] [<ffffffff814546c7>] notifier_call_chain+0x37/0x70
+[ 27.104426] [<ffffffff81454748>] __atomic_notifier_call_chain+0x48/0x70
+[ 27.104439] [<ffffffff81001a39>] cpu_idle+0x89/0xb0
+[ 27.104449] bad: scheduling from the idle thread!
+
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/1323258522.5057.73.camel@marge.simson.net
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/idle/i7300_idle.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/idle/i7300_idle.c
++++ b/drivers/idle/i7300_idle.c
+@@ -75,7 +75,7 @@ static unsigned long past_skip;
+
+ static struct pci_dev *fbd_dev;
+
+-static spinlock_t i7300_idle_lock;
++static raw_spinlock_t i7300_idle_lock;
+ static int i7300_idle_active;
+
+ static u8 i7300_idle_thrtctl_saved;
+@@ -457,7 +457,7 @@ static int i7300_idle_notifier(struct no
+ idle_begin_time = ktime_get();
+ }
+
+- spin_lock_irqsave(&i7300_idle_lock, flags);
++ raw_spin_lock_irqsave(&i7300_idle_lock, flags);
+ if (val == IDLE_START) {
+
+ cpumask_set_cpu(smp_processor_id(), idle_cpumask);
+@@ -506,7 +506,7 @@ static int i7300_idle_notifier(struct no
+ }
+ }
+ end:
+- spin_unlock_irqrestore(&i7300_idle_lock, flags);
++ raw_spin_unlock_irqrestore(&i7300_idle_lock, flags);
+ return 0;
+ }
+
+@@ -548,7 +548,7 @@ struct debugfs_file_info {
+
+ static int __init i7300_idle_init(void)
+ {
+- spin_lock_init(&i7300_idle_lock);
++ raw_spin_lock_init(&i7300_idle_lock);
+ total_us = 0;
+
+ if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
diff --git a/patches/ipc-make-rt-aware.patch b/patches/ipc-make-rt-aware.patch
new file mode 100644
index 0000000..8b081cf
--- /dev/null
+++ b/patches/ipc-make-rt-aware.patch
@@ -0,0 +1,85 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:12 -0500
+Subject: ipc: Make the ipc code -rt aware
+
+RT serializes the code with the (rt)spinlock but keeps preemption
+enabled. Some parts of the code need to be atomic nevertheless.
+
+Protect it with preempt_disable/enable_rt pairts.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ ipc/mqueue.c | 5 +++++
+ ipc/msg.c | 16 ++++++++++++++++
+ 2 files changed, 21 insertions(+)
+
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -912,12 +912,17 @@ static inline void pipelined_send(struct
+ struct msg_msg *message,
+ struct ext_wait_queue *receiver)
+ {
++ /*
++ * Keep them in one critical section for PREEMPT_RT:
++ */
++ preempt_disable_rt();
+ receiver->msg = message;
+ list_del(&receiver->list);
+ receiver->state = STATE_PENDING;
+ wake_up_process(receiver->task);
+ smp_wmb();
+ receiver->state = STATE_READY;
++ preempt_enable_rt();
+ }
+
+ /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -259,12 +259,20 @@ static void expunge_all(struct msg_queue
+ while (tmp != &msq->q_receivers) {
+ struct msg_receiver *msr;
+
++ /*
++ * Make sure that the wakeup doesnt preempt
++ * this CPU prematurely. (on PREEMPT_RT)
++ */
++ preempt_disable_rt();
++
+ msr = list_entry(tmp, struct msg_receiver, r_list);
+ tmp = tmp->next;
+ msr->r_msg = NULL;
+ wake_up_process(msr->r_tsk);
+ smp_mb();
+ msr->r_msg = ERR_PTR(res);
++
++ preempt_enable_rt();
+ }
+ }
+
+@@ -614,6 +622,12 @@ static inline int pipelined_send(struct
+ !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
+ msr->r_msgtype, msr->r_mode)) {
+
++ /*
++ * Make sure that the wakeup doesnt preempt
++ * this CPU prematurely. (on PREEMPT_RT)
++ */
++ preempt_disable_rt();
++
+ list_del(&msr->r_list);
+ if (msr->r_maxsize < msg->m_ts) {
+ msr->r_msg = NULL;
+@@ -627,9 +641,11 @@ static inline int pipelined_send(struct
+ wake_up_process(msr->r_tsk);
+ smp_mb();
+ msr->r_msg = msg;
++ preempt_enable_rt();
+
+ return 1;
+ }
++ preempt_enable_rt();
+ }
+ }
+ return 0;
diff --git a/patches/ipc-mqueue-add-a-critical-section-to-avoid-a-deadlock.patch b/patches/ipc-mqueue-add-a-critical-section-to-avoid-a-deadlock.patch
new file mode 100644
index 0000000..4d65d5b
--- /dev/null
+++ b/patches/ipc-mqueue-add-a-critical-section-to-avoid-a-deadlock.patch
@@ -0,0 +1,64 @@
+Subject: ipc/mqueue: Add a critical section to avoid a deadlock
+From: KOBAYASHI Yoshitake <yoshitake.kobayashi@toshiba.co.jp>
+Date: Sat, 23 Jul 2011 11:57:36 +0900
+
+(Repost for v3.0-rt1 and changed the distination addreses)
+I have tested the following patch on v3.0-rt1 with PREEMPT_RT_FULL.
+In POSIX message queue, if a sender process uses SCHED_FIFO and
+has a higher priority than a receiver process, the sender will
+be stuck at ipc/mqueue.c:452
+
+ 452 while (ewp->state == STATE_PENDING)
+ 453 cpu_relax();
+
+Description of the problem
+ (receiver process)
+ 1. receiver changes sender's state to STATE_PENDING (mqueue.c:846)
+ 2. wake up sender process and "switch to sender" (mqueue.c:847)
+ Note: This context switch only happens in PREEMPT_RT_FULL kernel.
+ (sender process)
+ 3. sender check the own state in above loop (mqueue.c:452-453)
+ *. receiver will never wake up and cannot change sender's state to
+ STATE_READY because sender has higher priority
+
+
+Signed-off-by: Yoshitake Kobayashi <yoshitake.kobayashi@toshiba.co.jp>
+Cc: viro@zeniv.linux.org.uk
+Cc: dchinner@redhat.com
+Cc: npiggin@kernel.dk
+Cc: hch@lst.de
+Cc: arnd@arndb.de
+Link: http://lkml.kernel.org/r/4E2A38A0.1090601@toshiba.co.jp
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ ipc/mqueue.c | 19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -936,13 +936,18 @@ static inline void pipelined_receive(str
+ wake_up_interruptible(&info->wait_q);
+ return;
+ }
+- if (msg_insert(sender->msg, info))
+- return;
+- list_del(&sender->list);
+- sender->state = STATE_PENDING;
+- wake_up_process(sender->task);
+- smp_wmb();
+- sender->state = STATE_READY;
++ /*
++ * Keep them in one critical section for PREEMPT_RT:
++ */
++ preempt_disable_rt();
++ if (!msg_insert(sender->msg, info)) {
++ list_del(&sender->list);
++ sender->state = STATE_PENDING;
++ wake_up_process(sender->task);
++ smp_wmb();
++ sender->state = STATE_READY;
++ }
++ preempt_enable_rt();
+ }
+
+ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
diff --git a/patches/ipc-sem-rework-semaphore-wakeups.patch b/patches/ipc-sem-rework-semaphore-wakeups.patch
new file mode 100644
index 0000000..a51262a
--- /dev/null
+++ b/patches/ipc-sem-rework-semaphore-wakeups.patch
@@ -0,0 +1,73 @@
+Subject: ipc/sem: Rework semaphore wakeups
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 14 Sep 2011 11:57:04 +0200
+
+Subject: ipc/sem: Rework semaphore wakeups
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Tue Sep 13 15:09:40 CEST 2011
+
+Current sysv sems have a weird ass wakeup scheme that involves keeping
+preemption disabled over a potential O(n^2) loop and busy waiting on
+that on other CPUs.
+
+Kill this and simply wake the task directly from under the sem_lock.
+
+This was discovered by a migrate_disable() debug feature that
+disallows:
+
+ spin_lock();
+ preempt_disable();
+ spin_unlock()
+ preempt_enable();
+
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Reported-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Link: http://lkml.kernel.org/r/1315994224.5040.1.camel@twins
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ ipc/sem.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -461,6 +461,13 @@ undo:
+ static void wake_up_sem_queue_prepare(struct list_head *pt,
+ struct sem_queue *q, int error)
+ {
++#ifdef CONFIG_PREEMPT_RT_BASE
++ struct task_struct *p = q->sleeper;
++ get_task_struct(p);
++ q->status = error;
++ wake_up_process(p);
++ put_task_struct(p);
++#else
+ if (list_empty(pt)) {
+ /*
+ * Hold preempt off so that we don't get preempted and have the
+@@ -472,6 +479,7 @@ static void wake_up_sem_queue_prepare(st
+ q->pid = error;
+
+ list_add_tail(&q->simple_list, pt);
++#endif
+ }
+
+ /**
+@@ -485,6 +493,7 @@ static void wake_up_sem_queue_prepare(st
+ */
+ static void wake_up_sem_queue_do(struct list_head *pt)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ struct sem_queue *q, *t;
+ int did_something;
+
+@@ -497,6 +506,7 @@ static void wake_up_sem_queue_do(struct
+ }
+ if (did_something)
+ preempt_enable();
++#endif
+ }
+
+ static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
diff --git a/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch b/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch
new file mode 100644
index 0000000..be85497
--- /dev/null
+++ b/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch
@@ -0,0 +1,144 @@
+Subject: genirq: Allow disabling of softirq processing in irq thread context
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 31 Jan 2012 13:01:27 +0100
+
+The processing of softirqs in irq thread context is a performance gain
+for the non-rt workloads of a system, but it's counterproductive for
+interrupts which are explicitely related to the realtime
+workload. Allow such interrupts to prevent softirq processing in their
+thread context.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ include/linux/interrupt.h | 2 ++
+ include/linux/irq.h | 5 ++++-
+ kernel/irq/manage.c | 13 ++++++++++++-
+ kernel/irq/settings.h | 12 ++++++++++++
+ kernel/softirq.c | 7 +++++++
+ 5 files changed, 37 insertions(+), 2 deletions(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -58,6 +58,7 @@
+ * IRQF_NO_THREAD - Interrupt cannot be threaded
+ * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
+ * resume time.
++ * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
+ */
+ #define IRQF_DISABLED 0x00000020
+ #define IRQF_SHARED 0x00000080
+@@ -71,6 +72,7 @@
+ #define IRQF_FORCE_RESUME 0x00008000
+ #define IRQF_NO_THREAD 0x00010000
+ #define IRQF_EARLY_RESUME 0x00020000
++#define IRQF_NO_SOFTIRQ_CALL 0x00040000
+
+ #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
+
+--- a/include/linux/irq.h
++++ b/include/linux/irq.h
+@@ -70,6 +70,7 @@ typedef void (*irq_preflow_handler_t)(st
+ * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context
+ * IRQ_NESTED_TRHEAD - Interrupt nests into another thread
+ * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable
++ * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
+ */
+ enum {
+ IRQ_TYPE_NONE = 0x00000000,
+@@ -94,12 +95,14 @@ enum {
+ IRQ_NESTED_THREAD = (1 << 15),
+ IRQ_NOTHREAD = (1 << 16),
+ IRQ_PER_CPU_DEVID = (1 << 17),
++ IRQ_NO_SOFTIRQ_CALL = (1 << 18),
+ };
+
+ #define IRQF_MODIFY_MASK \
+ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
+ IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
+- IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID)
++ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
++ IRQ_NO_SOFTIRQ_CALL)
+
+ #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
+
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -782,7 +782,15 @@ irq_forced_thread_fn(struct irq_desc *de
+ local_bh_disable();
+ ret = action->thread_fn(action->irq, action->dev_id);
+ irq_finalize_oneshot(desc, action);
+- local_bh_enable();
++ /*
++ * Interrupts which have real time requirements can be set up
++ * to avoid softirq processing in the thread handler. This is
++ * safe as these interrupts do not raise soft interrupts.
++ */
++ if (irq_settings_no_softirq_call(desc))
++ _local_bh_enable();
++ else
++ local_bh_enable();
+ return ret;
+ }
+
+@@ -1127,6 +1135,9 @@ __setup_irq(unsigned int irq, struct irq
+ irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
+ }
+
++ if (new->flags & IRQF_NO_SOFTIRQ_CALL)
++ irq_settings_set_no_softirq_call(desc);
++
+ /* Set default affinity mask once everything is setup */
+ setup_affinity(irq, desc, mask);
+
+--- a/kernel/irq/settings.h
++++ b/kernel/irq/settings.h
+@@ -14,6 +14,7 @@ enum {
+ _IRQ_NO_BALANCING = IRQ_NO_BALANCING,
+ _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
+ _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
++ _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
+ _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
+ };
+
+@@ -26,6 +27,7 @@ enum {
+ #define IRQ_NOAUTOEN GOT_YOU_MORON
+ #define IRQ_NESTED_THREAD GOT_YOU_MORON
+ #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
++#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
+ #undef IRQF_MODIFY_MASK
+ #define IRQF_MODIFY_MASK GOT_YOU_MORON
+
+@@ -36,6 +38,16 @@ irq_settings_clr_and_set(struct irq_desc
+ desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
+ }
+
++static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
++{
++ return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
++}
++
++static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
++{
++ desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
++}
++
+ static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
+ {
+ return desc->status_use_accessors & _IRQ_PER_CPU;
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -417,6 +417,13 @@ void local_bh_enable_ip(unsigned long ip
+ }
+ EXPORT_SYMBOL(local_bh_enable_ip);
+
++void _local_bh_enable(void)
++{
++ current->softirq_nestcnt--;
++ migrate_enable();
++}
++EXPORT_SYMBOL(_local_bh_enable);
++
+ /* For tracing */
+ int notrace __in_softirq(void)
+ {
diff --git a/patches/jump-label-rt.patch b/patches/jump-label-rt.patch
new file mode 100644
index 0000000..b3c1ead
--- /dev/null
+++ b/patches/jump-label-rt.patch
@@ -0,0 +1,21 @@
+Subject: jump-label-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 13 Jul 2011 11:03:16 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/jump_label.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/include/linux/jump_label.h
++++ b/include/linux/jump_label.h
+@@ -50,7 +50,8 @@
+ #include <linux/compiler.h>
+ #include <linux/workqueue.h>
+
+-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
++#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) && \
++ !defined(CONFIG_PREEMPT_BASE)
+
+ struct static_key {
+ atomic_t enabled;
diff --git a/patches/kconfig-disable-a-few-options-rt.patch b/patches/kconfig-disable-a-few-options-rt.patch
new file mode 100644
index 0000000..17aee87
--- /dev/null
+++ b/patches/kconfig-disable-a-few-options-rt.patch
@@ -0,0 +1,44 @@
+Subject: kconfig-disable-a-few-options-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 24 Jul 2011 12:11:43 +0200
+
+Disable stuff which is known to have issues on RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/Kconfig | 1 +
+ drivers/net/Kconfig | 1 +
+ mm/Kconfig | 2 +-
+ 3 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -6,6 +6,7 @@ config OPROFILE
+ tristate "OProfile system profiling"
+ depends on PROFILING
+ depends on HAVE_OPROFILE
++ depends on !PREEMPT_RT_FULL
+ select RING_BUFFER
+ select RING_BUFFER_ALLOW_SWAP
+ help
+--- a/drivers/net/Kconfig
++++ b/drivers/net/Kconfig
+@@ -164,6 +164,7 @@ config VXLAN
+
+ config NETCONSOLE
+ tristate "Network console logging support"
++ depends on !PREEMPT_RT_FULL
+ ---help---
+ If you want to log kernel messages over the network, enable this.
+ See <file:Documentation/networking/netconsole.txt> for details.
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -353,7 +353,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
+
+ config TRANSPARENT_HUGEPAGE
+ bool "Transparent Hugepage Support"
+- depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
++ depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
+ select COMPACTION
+ help
+ Transparent Hugepages allows the kernel to use huge pages and
diff --git a/patches/kconfig-preempt-rt-full.patch b/patches/kconfig-preempt-rt-full.patch
new file mode 100644
index 0000000..62da6d7
--- /dev/null
+++ b/patches/kconfig-preempt-rt-full.patch
@@ -0,0 +1,56 @@
+Subject: kconfig-preempt-rt-full.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 14:58:57 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ init/Makefile | 2 +-
+ kernel/Kconfig.preempt | 8 ++++++++
+ scripts/mkcompile_h | 4 +++-
+ 3 files changed, 12 insertions(+), 2 deletions(-)
+
+--- a/init/Makefile
++++ b/init/Makefile
+@@ -33,4 +33,4 @@ silent_chk_compile.h = :
+ include/generated/compile.h: FORCE
+ @$($(quiet)chk_compile.h)
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
+- "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
++ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -73,6 +73,14 @@ config PREEMPT_RTB
+ enables changes which are preliminary for the full preemptiple
+ RT kernel.
+
++config PREEMPT_RT_FULL
++ bool "Fully Preemptible Kernel (RT)"
++ depends on IRQ_FORCED_THREADING
++ select PREEMPT_RT_BASE
++ select PREEMPT_RCU
++ help
++ All and everything
++
+ endchoice
+
+ config PREEMPT_COUNT
+--- a/scripts/mkcompile_h
++++ b/scripts/mkcompile_h
+@@ -4,7 +4,8 @@ TARGET=$1
+ ARCH=$2
+ SMP=$3
+ PREEMPT=$4
+-CC=$5
++RT=$5
++CC=$6
+
+ vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
+
+@@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION"
+ CONFIG_FLAGS=""
+ if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
+ if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
++if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
+ UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
+
+ # Truncate to maximum length
diff --git a/patches/kgb-serial-hackaround.patch b/patches/kgb-serial-hackaround.patch
new file mode 100644
index 0000000..1a13d29
--- /dev/null
+++ b/patches/kgb-serial-hackaround.patch
@@ -0,0 +1,102 @@
+From: Jason Wessel <jason.wessel@windriver.com>
+Date: Thu, 28 Jul 2011 12:42:23 -0500
+Subject: kgdb/serial: Short term workaround
+
+On 07/27/2011 04:37 PM, Thomas Gleixner wrote:
+> - KGDB (not yet disabled) is reportedly unusable on -rt right now due
+> to missing hacks in the console locking which I dropped on purpose.
+>
+
+To work around this in the short term you can use this patch, in
+addition to the clocksource watchdog patch that Thomas brewed up.
+
+Comments are welcome of course. Ultimately the right solution is to
+change separation between the console and the HW to have a polled mode
++ work queue so as not to introduce any kind of latency.
+
+Thanks,
+Jason.
+
+---
+ drivers/tty/serial/8250/8250.c | 3 ++-
+ include/linux/kdb.h | 3 ++-
+ kernel/debug/kdb/kdb_io.c | 6 ++----
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/tty/serial/8250/8250.c
++++ b/drivers/tty/serial/8250/8250.c
+@@ -38,6 +38,7 @@
+ #include <linux/nmi.h>
+ #include <linux/mutex.h>
+ #include <linux/slab.h>
++#include <linux/kdb.h>
+ #ifdef CONFIG_SPARC
+ #include <linux/sunserialcore.h>
+ #endif
+@@ -2909,7 +2910,7 @@ serial8250_console_write(struct console
+
+ touch_nmi_watchdog();
+
+- if (port->sysrq || oops_in_progress)
++ if (port->sysrq || oops_in_progress || in_kdb_printk())
+ locked = spin_trylock_irqsave(&port->lock, flags);
+ else
+ spin_lock_irqsave(&port->lock, flags);
+--- a/include/linux/kdb.h
++++ b/include/linux/kdb.h
+@@ -115,7 +115,7 @@ extern int kdb_trap_printk;
+ extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args);
+ extern __printf(1, 2) int kdb_printf(const char *, ...);
+ typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
+-
++#define in_kdb_printk() (kdb_trap_printk)
+ extern void kdb_init(int level);
+
+ /* Access to kdb specific polling devices */
+@@ -150,6 +150,7 @@ extern int kdb_register_repeat(char *, k
+ extern int kdb_unregister(char *);
+ #else /* ! CONFIG_KGDB_KDB */
+ static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
++#define in_kdb_printk() (0)
+ static inline void kdb_init(int level) {}
+ static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
+ char *help, short minlen) { return 0; }
+--- a/kernel/debug/kdb/kdb_io.c
++++ b/kernel/debug/kdb/kdb_io.c
+@@ -554,7 +554,6 @@ int vkdb_printf(const char *fmt, va_list
+ int linecount;
+ int colcount;
+ int logging, saved_loglevel = 0;
+- int saved_trap_printk;
+ int got_printf_lock = 0;
+ int retlen = 0;
+ int fnd, len;
+@@ -565,8 +564,6 @@ int vkdb_printf(const char *fmt, va_list
+ unsigned long uninitialized_var(flags);
+
+ preempt_disable();
+- saved_trap_printk = kdb_trap_printk;
+- kdb_trap_printk = 0;
+
+ /* Serialize kdb_printf if multiple cpus try to write at once.
+ * But if any cpu goes recursive in kdb, just print the output,
+@@ -833,7 +830,6 @@ kdb_print_out:
+ } else {
+ __release(kdb_printf_lock);
+ }
+- kdb_trap_printk = saved_trap_printk;
+ preempt_enable();
+ return retlen;
+ }
+@@ -843,9 +839,11 @@ int kdb_printf(const char *fmt, ...)
+ va_list ap;
+ int r;
+
++ kdb_trap_printk++;
+ va_start(ap, fmt);
+ r = vkdb_printf(fmt, ap);
+ va_end(ap);
++ kdb_trap_printk--;
+
+ return r;
+ }
diff --git a/patches/latency-hist.patch b/patches/latency-hist.patch
new file mode 100644
index 0000000..6e646b3
--- /dev/null
+++ b/patches/latency-hist.patch
@@ -0,0 +1,1804 @@
+Subject: latency-hist.patch
+From: Carsten Emde <C.Emde@osadl.org>
+Date: Tue, 19 Jul 2011 14:03:41 +0100
+
+This patch provides a recording mechanism to store data of potential
+sources of system latencies. The recordings separately determine the
+latency caused by a delayed timer expiration, by a delayed wakeup of the
+related user space program and by the sum of both. The histograms can be
+enabled and reset individually. The data are accessible via the debug
+filesystem. For details please consult Documentation/trace/histograms.txt.
+
+Signed-off-by: Carsten Emde <C.Emde@osadl.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ Documentation/trace/histograms.txt | 186 +++++
+ include/linux/hrtimer.h | 3
+ include/linux/sched.h | 6
+ include/trace/events/hist.h | 69 ++
+ include/trace/events/latency_hist.h | 29
+ kernel/hrtimer.c | 23
+ kernel/trace/Kconfig | 104 +++
+ kernel/trace/Makefile | 4
+ kernel/trace/latency_hist.c | 1176 ++++++++++++++++++++++++++++++++++++
+ kernel/trace/trace_irqsoff.c | 11
+ 10 files changed, 1611 insertions(+)
+
+--- /dev/null
++++ b/Documentation/trace/histograms.txt
+@@ -0,0 +1,186 @@
++ Using the Linux Kernel Latency Histograms
++
++
++This document gives a short explanation how to enable, configure and use
++latency histograms. Latency histograms are primarily relevant in the
++context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
++and are used in the quality management of the Linux real-time
++capabilities.
++
++
++* Purpose of latency histograms
++
++A latency histogram continuously accumulates the frequencies of latency
++data. There are two types of histograms
++- potential sources of latencies
++- effective latencies
++
++
++* Potential sources of latencies
++
++Potential sources of latencies are code segments where interrupts,
++preemption or both are disabled (aka critical sections). To create
++histograms of potential sources of latency, the kernel stores the time
++stamp at the start of a critical section, determines the time elapsed
++when the end of the section is reached, and increments the frequency
++counter of that latency value - irrespective of whether any concurrently
++running process is affected by latency or not.
++- Configuration items (in the Kernel hacking/Tracers submenu)
++ CONFIG_INTERRUPT_OFF_LATENCY
++ CONFIG_PREEMPT_OFF_LATENCY
++
++
++* Effective latencies
++
++Effective latencies are actually occuring during wakeup of a process. To
++determine effective latencies, the kernel stores the time stamp when a
++process is scheduled to be woken up, and determines the duration of the
++wakeup time shortly before control is passed over to this process. Note
++that the apparent latency in user space may be somewhat longer, since the
++process may be interrupted after control is passed over to it but before
++the execution in user space takes place. Simply measuring the interval
++between enqueuing and wakeup may also not appropriate in cases when a
++process is scheduled as a result of a timer expiration. The timer may have
++missed its deadline, e.g. due to disabled interrupts, but this latency
++would not be registered. Therefore, the offsets of missed timers are
++recorded in a separate histogram. If both wakeup latency and missed timer
++offsets are configured and enabled, a third histogram may be enabled that
++records the overall latency as a sum of the timer latency, if any, and the
++wakeup latency. This histogram is called "timerandwakeup".
++- Configuration items (in the Kernel hacking/Tracers submenu)
++ CONFIG_WAKEUP_LATENCY
++ CONFIG_MISSED_TIMER_OFSETS
++
++
++* Usage
++
++The interface to the administration of the latency histograms is located
++in the debugfs file system. To mount it, either enter
++
++mount -t sysfs nodev /sys
++mount -t debugfs nodev /sys/kernel/debug
++
++from shell command line level, or add
++
++nodev /sys sysfs defaults 0 0
++nodev /sys/kernel/debug debugfs defaults 0 0
++
++to the file /etc/fstab. All latency histogram related files are then
++available in the directory /sys/kernel/debug/tracing/latency_hist. A
++particular histogram type is enabled by writing non-zero to the related
++variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
++Select "preemptirqsoff" for the histograms of potential sources of
++latencies and "wakeup" for histograms of effective latencies etc. The
++histogram data - one per CPU - are available in the files
++
++/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
++/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
++/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
++/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
++/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
++/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
++/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
++
++The histograms are reset by writing non-zero to the file "reset" in a
++particular latency directory. To reset all latency data, use
++
++#!/bin/sh
++
++TRACINGDIR=/sys/kernel/debug/tracing
++HISTDIR=$TRACINGDIR/latency_hist
++
++if test -d $HISTDIR
++then
++ cd $HISTDIR
++ for i in `find . | grep /reset$`
++ do
++ echo 1 >$i
++ done
++fi
++
++
++* Data format
++
++Latency data are stored with a resolution of one microsecond. The
++maximum latency is 10,240 microseconds. The data are only valid, if the
++overflow register is empty. Every output line contains the latency in
++microseconds in the first row and the number of samples in the second
++row. To display only lines with a positive latency count, use, for
++example,
++
++grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
++
++#Minimum latency: 0 microseconds.
++#Average latency: 0 microseconds.
++#Maximum latency: 25 microseconds.
++#Total samples: 3104770694
++#There are 0 samples greater or equal than 10240 microseconds
++#usecs samples
++ 0 2984486876
++ 1 49843506
++ 2 58219047
++ 3 5348126
++ 4 2187960
++ 5 3388262
++ 6 959289
++ 7 208294
++ 8 40420
++ 9 4485
++ 10 14918
++ 11 18340
++ 12 25052
++ 13 19455
++ 14 5602
++ 15 969
++ 16 47
++ 17 18
++ 18 14
++ 19 1
++ 20 3
++ 21 2
++ 22 5
++ 23 2
++ 25 1
++
++
++* Wakeup latency of a selected process
++
++To only collect wakeup latency data of a particular process, write the
++PID of the requested process to
++
++/sys/kernel/debug/tracing/latency_hist/wakeup/pid
++
++PIDs are not considered, if this variable is set to 0.
++
++
++* Details of the process with the highest wakeup latency so far
++
++Selected data of the process that suffered from the highest wakeup
++latency that occurred in a particular CPU are available in the file
++
++/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
++
++In addition, other relevant system data at the time when the
++latency occurred are given.
++
++The format of the data is (all in one line):
++<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
++<- <PID> <Priority> <Command> <Timestamp>
++
++The value of <Timeroffset> is only relevant in the combined timer
++and wakeup latency recording. In the wakeup recording, it is
++always 0, in the missed_timer_offsets recording, it is the same
++as <Latency>.
++
++When retrospectively searching for the origin of a latency and
++tracing was not enabled, it may be helpful to know the name and
++some basic data of the task that (finally) was switching to the
++late real-tlme task. In addition to the victim's data, also the
++data of the possible culprit are therefore displayed after the
++"<-" symbol.
++
++Finally, the timestamp of the time when the latency occurred
++in <seconds>.<microseconds> after the most recent system boot
++is provided.
++
++These data are also reset when the wakeup histogram is reset.
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -111,6 +111,9 @@ struct hrtimer {
+ enum hrtimer_restart (*function)(struct hrtimer *);
+ struct hrtimer_clock_base *base;
+ unsigned long state;
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ ktime_t praecox;
++#endif
+ #ifdef CONFIG_TIMER_STATS
+ int start_pid;
+ void *start_site;
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1598,6 +1598,12 @@ struct task_struct {
+ unsigned long trace;
+ /* bitmask and counter of trace recursion */
+ unsigned long trace_recursion;
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ u64 preempt_timestamp_hist;
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ long timer_offset;
++#endif
++#endif
+ #endif /* CONFIG_TRACING */
+ #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
+ struct memcg_batch_info {
+--- /dev/null
++++ b/include/trace/events/hist.h
+@@ -0,0 +1,69 @@
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM hist
++
++#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_HIST_H
++
++#include "latency_hist.h"
++#include <linux/tracepoint.h>
++
++#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
++#define trace_preemptirqsoff_hist(a,b)
++#else
++TRACE_EVENT(preemptirqsoff_hist,
++
++ TP_PROTO(int reason, int starthist),
++
++ TP_ARGS(reason, starthist),
++
++ TP_STRUCT__entry(
++ __field(int, reason )
++ __field(int, starthist )
++ ),
++
++ TP_fast_assign(
++ __entry->reason = reason;
++ __entry->starthist = starthist;
++ ),
++
++ TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
++ __entry->starthist ? "start" : "stop")
++);
++#endif
++
++#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
++#define trace_hrtimer_interrupt(a,b,c,d)
++#else
++TRACE_EVENT(hrtimer_interrupt,
++
++ TP_PROTO(int cpu, long long offset, struct task_struct *curr, struct task_struct *task),
++
++ TP_ARGS(cpu, offset, curr, task),
++
++ TP_STRUCT__entry(
++ __field(int, cpu )
++ __field(long long, offset )
++ __array(char, ccomm, TASK_COMM_LEN)
++ __field(int, cprio )
++ __array(char, tcomm, TASK_COMM_LEN)
++ __field(int, tprio )
++ ),
++
++ TP_fast_assign(
++ __entry->cpu = cpu;
++ __entry->offset = offset;
++ memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
++ __entry->cprio = curr->prio;
++ memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", task != NULL ? TASK_COMM_LEN : 7);
++ __entry->tprio = task != NULL ? task->prio : -1;
++ ),
++
++ TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
++ __entry->cpu, __entry->offset, __entry->ccomm, __entry->cprio, __entry->tcomm, __entry->tprio)
++);
++#endif
++
++#endif /* _TRACE_HIST_H */
++
++/* This part must be outside protection */
++#include <trace/define_trace.h>
+--- /dev/null
++++ b/include/trace/events/latency_hist.h
+@@ -0,0 +1,29 @@
++#ifndef _LATENCY_HIST_H
++#define _LATENCY_HIST_H
++
++enum hist_action {
++ IRQS_ON,
++ PREEMPT_ON,
++ TRACE_STOP,
++ IRQS_OFF,
++ PREEMPT_OFF,
++ TRACE_START,
++};
++
++static char *actions[] = {
++ "IRQS_ON",
++ "PREEMPT_ON",
++ "TRACE_STOP",
++ "IRQS_OFF",
++ "PREEMPT_OFF",
++ "TRACE_START",
++};
++
++static inline char *getaction(int action)
++{
++ if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
++ return(actions[action]);
++ return("unknown");
++}
++
++#endif /* _LATENCY_HIST_H */
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -49,6 +49,7 @@
+ #include <asm/uaccess.h>
+
+ #include <trace/events/timer.h>
++#include <trace/events/hist.h>
+
+ /*
+ * The timer bases:
+@@ -970,6 +971,17 @@ int __hrtimer_start_range_ns(struct hrti
+ #endif
+ }
+
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ {
++ ktime_t now = new_base->get_time();
++
++ if (ktime_to_ns(tim) < ktime_to_ns(now))
++ timer->praecox = now;
++ else
++ timer->praecox = ktime_set(0, 0);
++ }
++#endif
++
+ hrtimer_set_expires_range_ns(timer, tim, delta_ns);
+
+ timer_stats_hrtimer_set_start_info(timer);
+@@ -1246,6 +1258,8 @@ static void __run_hrtimer(struct hrtimer
+
+ #ifdef CONFIG_HIGH_RES_TIMERS
+
++static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
++
+ /*
+ * High resolution timer interrupt
+ * Called with interrupts disabled
+@@ -1289,6 +1303,15 @@ retry:
+
+ timer = container_of(node, struct hrtimer, node);
+
++ trace_hrtimer_interrupt(raw_smp_processor_id(),
++ ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
++ timer->praecox : hrtimer_get_expires(timer),
++ basenow)),
++ current,
++ timer->function == hrtimer_wakeup ?
++ container_of(timer, struct hrtimer_sleeper,
++ timer)->task : NULL);
++
+ /*
+ * The immediate goal for using the softexpires is
+ * minimizing wakeups, not running timers at the
+--- a/kernel/trace/Kconfig
++++ b/kernel/trace/Kconfig
+@@ -202,6 +202,24 @@ config IRQSOFF_TRACER
+ enabled. This option and the preempt-off timing option can be
+ used together or separately.)
+
++config INTERRUPT_OFF_HIST
++ bool "Interrupts-off Latency Histogram"
++ depends on IRQSOFF_TRACER
++ help
++ This option generates continuously updated histograms (one per cpu)
++ of the duration of time periods with interrupts disabled. The
++ histograms are disabled by default. To enable them, write a non-zero
++ number to
++
++ /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
++
++ If PREEMPT_OFF_HIST is also selected, additional histograms (one
++ per cpu) are generated that accumulate the duration of time periods
++ when both interrupts and preemption are disabled. The histogram data
++ will be located in the debug file system at
++
++ /sys/kernel/debug/tracing/latency_hist/irqsoff
++
+ config PREEMPT_TRACER
+ bool "Preemption-off Latency Tracer"
+ default n
+@@ -224,6 +242,24 @@ config PREEMPT_TRACER
+ enabled. This option and the irqs-off timing option can be
+ used together or separately.)
+
++config PREEMPT_OFF_HIST
++ bool "Preemption-off Latency Histogram"
++ depends on PREEMPT_TRACER
++ help
++ This option generates continuously updated histograms (one per cpu)
++ of the duration of time periods with preemption disabled. The
++ histograms are disabled by default. To enable them, write a non-zero
++ number to
++
++ /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
++
++ If INTERRUPT_OFF_HIST is also selected, additional histograms (one
++ per cpu) are generated that accumulate the duration of time periods
++ when both interrupts and preemption are disabled. The histogram data
++ will be located in the debug file system at
++
++ /sys/kernel/debug/tracing/latency_hist/preemptoff
++
+ config SCHED_TRACER
+ bool "Scheduling Latency Tracer"
+ select GENERIC_TRACER
+@@ -233,6 +269,74 @@ config SCHED_TRACER
+ This tracer tracks the latency of the highest priority task
+ to be scheduled in, starting from the point it has woken up.
+
++config WAKEUP_LATENCY_HIST
++ bool "Scheduling Latency Histogram"
++ depends on SCHED_TRACER
++ help
++ This option generates continuously updated histograms (one per cpu)
++ of the scheduling latency of the highest priority task.
++ The histograms are disabled by default. To enable them, write a
++ non-zero number to
++
++ /sys/kernel/debug/tracing/latency_hist/enable/wakeup
++
++ Two different algorithms are used, one to determine the latency of
++ processes that exclusively use the highest priority of the system and
++ another one to determine the latency of processes that share the
++ highest system priority with other processes. The former is used to
++ improve hardware and system software, the latter to optimize the
++ priority design of a given system. The histogram data will be
++ located in the debug file system at
++
++ /sys/kernel/debug/tracing/latency_hist/wakeup
++
++ and
++
++ /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
++
++ If both Scheduling Latency Histogram and Missed Timer Offsets
++ Histogram are selected, additional histogram data will be collected
++ that contain, in addition to the wakeup latency, the timer latency, in
++ case the wakeup was triggered by an expired timer. These histograms
++ are available in the
++
++ /sys/kernel/debug/tracing/latency_hist/timerandwakeup
++
++ directory. They reflect the apparent interrupt and scheduling latency
++ and are best suitable to determine the worst-case latency of a given
++ system. To enable these histograms, write a non-zero number to
++
++ /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
++
++config MISSED_TIMER_OFFSETS_HIST
++ depends on HIGH_RES_TIMERS
++ select GENERIC_TRACER
++ bool "Missed Timer Offsets Histogram"
++ help
++ Generate a histogram of missed timer offsets in microseconds. The
++ histograms are disabled by default. To enable them, write a non-zero
++ number to
++
++ /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
++
++ The histogram data will be located in the debug file system at
++
++ /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
++
++ If both Scheduling Latency Histogram and Missed Timer Offsets
++ Histogram are selected, additional histogram data will be collected
++ that contain, in addition to the wakeup latency, the timer latency, in
++ case the wakeup was triggered by an expired timer. These histograms
++ are available in the
++
++ /sys/kernel/debug/tracing/latency_hist/timerandwakeup
++
++ directory. They reflect the apparent interrupt and scheduling latency
++ and are best suitable to determine the worst-case latency of a given
++ system. To enable these histograms, write a non-zero number to
++
++ /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
++
+ config ENABLE_DEFAULT_TRACERS
+ bool "Trace process context switches and events"
+ depends on !GENERIC_TRACER
+--- a/kernel/trace/Makefile
++++ b/kernel/trace/Makefile
+@@ -34,6 +34,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_f
+ obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
+ obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
+ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
++obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
++obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
++obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
++obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
+ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
+ obj-$(CONFIG_STACK_TRACER) += trace_stack.o
+ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
+--- /dev/null
++++ b/kernel/trace/latency_hist.c
+@@ -0,0 +1,1176 @@
++/*
++ * kernel/trace/latency_hist.c
++ *
++ * Add support for histograms of preemption-off latency and
++ * interrupt-off latency and wakeup latency, it depends on
++ * Real-Time Preemption Support.
++ *
++ * Copyright (C) 2005 MontaVista Software, Inc.
++ * Yi Yang <yyang@ch.mvista.com>
++ *
++ * Converted to work with the new latency tracer.
++ * Copyright (C) 2008 Red Hat, Inc.
++ * Steven Rostedt <srostedt@redhat.com>
++ *
++ */
++#include <linux/module.h>
++#include <linux/debugfs.h>
++#include <linux/seq_file.h>
++#include <linux/percpu.h>
++#include <linux/kallsyms.h>
++#include <linux/uaccess.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <asm/atomic.h>
++#include <asm/div64.h>
++
++#include "trace.h"
++#include <trace/events/sched.h>
++
++#define NSECS_PER_USECS 1000L
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/hist.h>
++
++enum {
++ IRQSOFF_LATENCY = 0,
++ PREEMPTOFF_LATENCY,
++ PREEMPTIRQSOFF_LATENCY,
++ WAKEUP_LATENCY,
++ WAKEUP_LATENCY_SHAREDPRIO,
++ MISSED_TIMER_OFFSETS,
++ TIMERANDWAKEUP_LATENCY,
++ MAX_LATENCY_TYPE,
++};
++
++#define MAX_ENTRY_NUM 10240
++
++struct hist_data {
++ atomic_t hist_mode; /* 0 log, 1 don't log */
++ long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
++ long min_lat;
++ long max_lat;
++ unsigned long long below_hist_bound_samples;
++ unsigned long long above_hist_bound_samples;
++ long long accumulate_lat;
++ unsigned long long total_samples;
++ unsigned long long hist_array[MAX_ENTRY_NUM];
++};
++
++struct enable_data {
++ int latency_type;
++ int enabled;
++};
++
++static char *latency_hist_dir_root = "latency_hist";
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
++static char *irqsoff_hist_dir = "irqsoff";
++static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
++static DEFINE_PER_CPU(int, hist_irqsoff_counting);
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
++static char *preemptoff_hist_dir = "preemptoff";
++static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
++static DEFINE_PER_CPU(int, hist_preemptoff_counting);
++#endif
++
++#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
++static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
++static char *preemptirqsoff_hist_dir = "preemptirqsoff";
++static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
++static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
++#endif
++
++#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
++static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
++static struct enable_data preemptirqsoff_enabled_data = {
++ .latency_type = PREEMPTIRQSOFF_LATENCY,
++ .enabled = 0,
++};
++#endif
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++struct maxlatproc_data {
++ char comm[FIELD_SIZEOF(struct task_struct, comm)];
++ char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
++ int pid;
++ int current_pid;
++ int prio;
++ int current_prio;
++ long latency;
++ long timeroffset;
++ cycle_t timestamp;
++};
++#endif
++
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
++static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
++static char *wakeup_latency_hist_dir = "wakeup";
++static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
++static notrace void probe_wakeup_latency_hist_start(void *v,
++ struct task_struct *p, int success);
++static notrace void probe_wakeup_latency_hist_stop(void *v,
++ struct task_struct *prev, struct task_struct *next);
++static notrace void probe_sched_migrate_task(void *,
++ struct task_struct *task, int cpu);
++static struct enable_data wakeup_latency_enabled_data = {
++ .latency_type = WAKEUP_LATENCY,
++ .enabled = 0,
++};
++static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
++static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
++static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
++static DEFINE_PER_CPU(int, wakeup_sharedprio);
++static unsigned long wakeup_pid;
++#endif
++
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
++static char *missed_timer_offsets_dir = "missed_timer_offsets";
++static notrace void probe_hrtimer_interrupt(void *v, int cpu,
++ long long offset, struct task_struct *curr, struct task_struct *task);
++static struct enable_data missed_timer_offsets_enabled_data = {
++ .latency_type = MISSED_TIMER_OFFSETS,
++ .enabled = 0,
++};
++static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
++static unsigned long missed_timer_offsets_pid;
++#endif
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
++static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
++static struct enable_data timerandwakeup_enabled_data = {
++ .latency_type = TIMERANDWAKEUP_LATENCY,
++ .enabled = 0,
++};
++static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
++#endif
++
++void notrace latency_hist(int latency_type, int cpu, long latency,
++ long timeroffset, cycle_t stop,
++ struct task_struct *p)
++{
++ struct hist_data *my_hist;
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ struct maxlatproc_data *mp = NULL;
++#endif
++
++ if (cpu < 0 || cpu >= NR_CPUS || latency_type < 0 ||
++ latency_type >= MAX_LATENCY_TYPE)
++ return;
++
++ switch (latency_type) {
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++ case IRQSOFF_LATENCY:
++ my_hist = &per_cpu(irqsoff_hist, cpu);
++ break;
++#endif
++#ifdef CONFIG_PREEMPT_OFF_HIST
++ case PREEMPTOFF_LATENCY:
++ my_hist = &per_cpu(preemptoff_hist, cpu);
++ break;
++#endif
++#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
++ case PREEMPTIRQSOFF_LATENCY:
++ my_hist = &per_cpu(preemptirqsoff_hist, cpu);
++ break;
++#endif
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ case WAKEUP_LATENCY:
++ my_hist = &per_cpu(wakeup_latency_hist, cpu);
++ mp = &per_cpu(wakeup_maxlatproc, cpu);
++ break;
++ case WAKEUP_LATENCY_SHAREDPRIO:
++ my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
++ mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
++ break;
++#endif
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ case MISSED_TIMER_OFFSETS:
++ my_hist = &per_cpu(missed_timer_offsets, cpu);
++ mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
++ break;
++#endif
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ case TIMERANDWAKEUP_LATENCY:
++ my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
++ mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
++ break;
++#endif
++
++ default:
++ return;
++ }
++
++ latency += my_hist->offset;
++
++ if (atomic_read(&my_hist->hist_mode) == 0)
++ return;
++
++ if (latency < 0 || latency >= MAX_ENTRY_NUM) {
++ if (latency < 0)
++ my_hist->below_hist_bound_samples++;
++ else
++ my_hist->above_hist_bound_samples++;
++ } else
++ my_hist->hist_array[latency]++;
++
++ if (unlikely(latency > my_hist->max_lat ||
++ my_hist->min_lat == LONG_MAX)) {
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ if (latency_type == WAKEUP_LATENCY ||
++ latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
++ latency_type == MISSED_TIMER_OFFSETS ||
++ latency_type == TIMERANDWAKEUP_LATENCY) {
++ strncpy(mp->comm, p->comm, sizeof(mp->comm));
++ strncpy(mp->current_comm, current->comm,
++ sizeof(mp->current_comm));
++ mp->pid = task_pid_nr(p);
++ mp->current_pid = task_pid_nr(current);
++ mp->prio = p->prio;
++ mp->current_prio = current->prio;
++ mp->latency = latency;
++ mp->timeroffset = timeroffset;
++ mp->timestamp = stop;
++ }
++#endif
++ my_hist->max_lat = latency;
++ }
++ if (unlikely(latency < my_hist->min_lat))
++ my_hist->min_lat = latency;
++ my_hist->total_samples++;
++ my_hist->accumulate_lat += latency;
++}
++
++static void *l_start(struct seq_file *m, loff_t *pos)
++{
++ loff_t *index_ptr = NULL;
++ loff_t index = *pos;
++ struct hist_data *my_hist = m->private;
++
++ if (index == 0) {
++ char minstr[32], avgstr[32], maxstr[32];
++
++ atomic_dec(&my_hist->hist_mode);
++
++ if (likely(my_hist->total_samples)) {
++ long avg = (long) div64_s64(my_hist->accumulate_lat,
++ my_hist->total_samples);
++ snprintf(minstr, sizeof(minstr), "%ld",
++ my_hist->min_lat - my_hist->offset);
++ snprintf(avgstr, sizeof(avgstr), "%ld",
++ avg - my_hist->offset);
++ snprintf(maxstr, sizeof(maxstr), "%ld",
++ my_hist->max_lat - my_hist->offset);
++ } else {
++ strcpy(minstr, "<undef>");
++ strcpy(avgstr, minstr);
++ strcpy(maxstr, minstr);
++ }
++
++ seq_printf(m, "#Minimum latency: %s microseconds\n"
++ "#Average latency: %s microseconds\n"
++ "#Maximum latency: %s microseconds\n"
++ "#Total samples: %llu\n"
++ "#There are %llu samples lower than %ld"
++ " microseconds.\n"
++ "#There are %llu samples greater or equal"
++ " than %ld microseconds.\n"
++ "#usecs\t%16s\n",
++ minstr, avgstr, maxstr,
++ my_hist->total_samples,
++ my_hist->below_hist_bound_samples,
++ -my_hist->offset,
++ my_hist->above_hist_bound_samples,
++ MAX_ENTRY_NUM - my_hist->offset,
++ "samples");
++ }
++ if (index < MAX_ENTRY_NUM) {
++ index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
++ if (index_ptr)
++ *index_ptr = index;
++ }
++
++ return index_ptr;
++}
++
++static void *l_next(struct seq_file *m, void *p, loff_t *pos)
++{
++ loff_t *index_ptr = p;
++ struct hist_data *my_hist = m->private;
++
++ if (++*pos >= MAX_ENTRY_NUM) {
++ atomic_inc(&my_hist->hist_mode);
++ return NULL;
++ }
++ *index_ptr = *pos;
++ return index_ptr;
++}
++
++static void l_stop(struct seq_file *m, void *p)
++{
++ kfree(p);
++}
++
++static int l_show(struct seq_file *m, void *p)
++{
++ int index = *(loff_t *) p;
++ struct hist_data *my_hist = m->private;
++
++ seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
++ my_hist->hist_array[index]);
++ return 0;
++}
++
++static struct seq_operations latency_hist_seq_op = {
++ .start = l_start,
++ .next = l_next,
++ .stop = l_stop,
++ .show = l_show
++};
++
++static int latency_hist_open(struct inode *inode, struct file *file)
++{
++ int ret;
++
++ ret = seq_open(file, &latency_hist_seq_op);
++ if (!ret) {
++ struct seq_file *seq = file->private_data;
++ seq->private = inode->i_private;
++ }
++ return ret;
++}
++
++static struct file_operations latency_hist_fops = {
++ .open = latency_hist_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static void clear_maxlatprocdata(struct maxlatproc_data *mp)
++{
++ mp->comm[0] = mp->current_comm[0] = '\0';
++ mp->prio = mp->current_prio = mp->pid = mp->current_pid =
++ mp->latency = mp->timeroffset = -1;
++ mp->timestamp = 0;
++}
++#endif
++
++static void hist_reset(struct hist_data *hist)
++{
++ atomic_dec(&hist->hist_mode);
++
++ memset(hist->hist_array, 0, sizeof(hist->hist_array));
++ hist->below_hist_bound_samples = 0ULL;
++ hist->above_hist_bound_samples = 0ULL;
++ hist->min_lat = LONG_MAX;
++ hist->max_lat = LONG_MIN;
++ hist->total_samples = 0ULL;
++ hist->accumulate_lat = 0LL;
++
++ atomic_inc(&hist->hist_mode);
++}
++
++static ssize_t
++latency_hist_reset(struct file *file, const char __user *a,
++ size_t size, loff_t *off)
++{
++ int cpu;
++ struct hist_data *hist = NULL;
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ struct maxlatproc_data *mp = NULL;
++#endif
++ off_t latency_type = (off_t) file->private_data;
++
++ for_each_online_cpu(cpu) {
++
++ switch (latency_type) {
++#ifdef CONFIG_PREEMPT_OFF_HIST
++ case PREEMPTOFF_LATENCY:
++ hist = &per_cpu(preemptoff_hist, cpu);
++ break;
++#endif
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++ case IRQSOFF_LATENCY:
++ hist = &per_cpu(irqsoff_hist, cpu);
++ break;
++#endif
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++ case PREEMPTIRQSOFF_LATENCY:
++ hist = &per_cpu(preemptirqsoff_hist, cpu);
++ break;
++#endif
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ case WAKEUP_LATENCY:
++ hist = &per_cpu(wakeup_latency_hist, cpu);
++ mp = &per_cpu(wakeup_maxlatproc, cpu);
++ break;
++ case WAKEUP_LATENCY_SHAREDPRIO:
++ hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
++ mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
++ break;
++#endif
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ case MISSED_TIMER_OFFSETS:
++ hist = &per_cpu(missed_timer_offsets, cpu);
++ mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
++ break;
++#endif
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ case TIMERANDWAKEUP_LATENCY:
++ hist = &per_cpu(timerandwakeup_latency_hist, cpu);
++ mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
++ break;
++#endif
++ }
++
++ hist_reset(hist);
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ if (latency_type == WAKEUP_LATENCY ||
++ latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
++ latency_type == MISSED_TIMER_OFFSETS ||
++ latency_type == TIMERANDWAKEUP_LATENCY)
++ clear_maxlatprocdata(mp);
++#endif
++ }
++
++ return size;
++}
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static ssize_t
++show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++ char buf[64];
++ int r;
++ unsigned long *this_pid = file->private_data;
++
++ r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++}
++
++static ssize_t do_pid(struct file *file, const char __user *ubuf,
++ size_t cnt, loff_t *ppos)
++{
++ char buf[64];
++ unsigned long pid;
++ unsigned long *this_pid = file->private_data;
++
++ if (cnt >= sizeof(buf))
++ return -EINVAL;
++
++ if (copy_from_user(&buf, ubuf, cnt))
++ return -EFAULT;
++
++ buf[cnt] = '\0';
++
++ if (strict_strtoul(buf, 10, &pid))
++ return(-EINVAL);
++
++ *this_pid = pid;
++
++ return cnt;
++}
++#endif
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static ssize_t
++show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++ int r;
++ struct maxlatproc_data *mp = file->private_data;
++ int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
++ unsigned long long t;
++ unsigned long usecs, secs;
++ char *buf;
++
++ if (mp->pid == -1 || mp->current_pid == -1) {
++ buf = "(none)\n";
++ return simple_read_from_buffer(ubuf, cnt, ppos, buf,
++ strlen(buf));
++ }
++
++ buf = kmalloc(strmaxlen, GFP_KERNEL);
++ if (buf == NULL)
++ return -ENOMEM;
++
++ t = ns2usecs(mp->timestamp);
++ usecs = do_div(t, USEC_PER_SEC);
++ secs = (unsigned long) t;
++ r = snprintf(buf, strmaxlen,
++ "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
++ MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
++ mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
++ secs, usecs);
++ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++ kfree(buf);
++ return r;
++}
++#endif
++
++static ssize_t
++show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++ char buf[64];
++ struct enable_data *ed = file->private_data;
++ int r;
++
++ r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
++ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++}
++
++static ssize_t
++do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++ char buf[64];
++ long enable;
++ struct enable_data *ed = file->private_data;
++
++ if (cnt >= sizeof(buf))
++ return -EINVAL;
++
++ if (copy_from_user(&buf, ubuf, cnt))
++ return -EFAULT;
++
++ buf[cnt] = 0;
++
++ if (strict_strtol(buf, 10, &enable))
++ return(-EINVAL);
++
++ if ((enable && ed->enabled) || (!enable && !ed->enabled))
++ return cnt;
++
++ if (enable) {
++ int ret;
++
++ switch (ed->latency_type) {
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++ case PREEMPTIRQSOFF_LATENCY:
++ ret = register_trace_preemptirqsoff_hist(
++ probe_preemptirqsoff_hist, NULL);
++ if (ret) {
++ pr_info("wakeup trace: Couldn't assign "
++ "probe_preemptirqsoff_hist "
++ "to trace_preemptirqsoff_hist\n");
++ return ret;
++ }
++ break;
++#endif
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ case WAKEUP_LATENCY:
++ ret = register_trace_sched_wakeup(
++ probe_wakeup_latency_hist_start, NULL);
++ if (ret) {
++ pr_info("wakeup trace: Couldn't assign "
++ "probe_wakeup_latency_hist_start "
++ "to trace_sched_wakeup\n");
++ return ret;
++ }
++ ret = register_trace_sched_wakeup_new(
++ probe_wakeup_latency_hist_start, NULL);
++ if (ret) {
++ pr_info("wakeup trace: Couldn't assign "
++ "probe_wakeup_latency_hist_start "
++ "to trace_sched_wakeup_new\n");
++ unregister_trace_sched_wakeup(
++ probe_wakeup_latency_hist_start, NULL);
++ return ret;
++ }
++ ret = register_trace_sched_switch(
++ probe_wakeup_latency_hist_stop, NULL);
++ if (ret) {
++ pr_info("wakeup trace: Couldn't assign "
++ "probe_wakeup_latency_hist_stop "
++ "to trace_sched_switch\n");
++ unregister_trace_sched_wakeup(
++ probe_wakeup_latency_hist_start, NULL);
++ unregister_trace_sched_wakeup_new(
++ probe_wakeup_latency_hist_start, NULL);
++ return ret;
++ }
++ ret = register_trace_sched_migrate_task(
++ probe_sched_migrate_task, NULL);
++ if (ret) {
++ pr_info("wakeup trace: Couldn't assign "
++ "probe_sched_migrate_task "
++ "to trace_sched_migrate_task\n");
++ unregister_trace_sched_wakeup(
++ probe_wakeup_latency_hist_start, NULL);
++ unregister_trace_sched_wakeup_new(
++ probe_wakeup_latency_hist_start, NULL);
++ unregister_trace_sched_switch(
++ probe_wakeup_latency_hist_stop, NULL);
++ return ret;
++ }
++ break;
++#endif
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ case MISSED_TIMER_OFFSETS:
++ ret = register_trace_hrtimer_interrupt(
++ probe_hrtimer_interrupt, NULL);
++ if (ret) {
++ pr_info("wakeup trace: Couldn't assign "
++ "probe_hrtimer_interrupt "
++ "to trace_hrtimer_interrupt\n");
++ return ret;
++ }
++ break;
++#endif
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ case TIMERANDWAKEUP_LATENCY:
++ if (!wakeup_latency_enabled_data.enabled ||
++ !missed_timer_offsets_enabled_data.enabled)
++ return -EINVAL;
++ break;
++#endif
++ default:
++ break;
++ }
++ } else {
++ switch (ed->latency_type) {
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++ case PREEMPTIRQSOFF_LATENCY:
++ {
++ int cpu;
++
++ unregister_trace_preemptirqsoff_hist(
++ probe_preemptirqsoff_hist, NULL);
++ for_each_online_cpu(cpu) {
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++ per_cpu(hist_irqsoff_counting,
++ cpu) = 0;
++#endif
++#ifdef CONFIG_PREEMPT_OFF_HIST
++ per_cpu(hist_preemptoff_counting,
++ cpu) = 0;
++#endif
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++ per_cpu(hist_preemptirqsoff_counting,
++ cpu) = 0;
++#endif
++ }
++ }
++ break;
++#endif
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ case WAKEUP_LATENCY:
++ {
++ int cpu;
++
++ unregister_trace_sched_wakeup(
++ probe_wakeup_latency_hist_start, NULL);
++ unregister_trace_sched_wakeup_new(
++ probe_wakeup_latency_hist_start, NULL);
++ unregister_trace_sched_switch(
++ probe_wakeup_latency_hist_stop, NULL);
++ unregister_trace_sched_migrate_task(
++ probe_sched_migrate_task, NULL);
++
++ for_each_online_cpu(cpu) {
++ per_cpu(wakeup_task, cpu) = NULL;
++ per_cpu(wakeup_sharedprio, cpu) = 0;
++ }
++ }
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ timerandwakeup_enabled_data.enabled = 0;
++#endif
++ break;
++#endif
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ case MISSED_TIMER_OFFSETS:
++ unregister_trace_hrtimer_interrupt(
++ probe_hrtimer_interrupt, NULL);
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ timerandwakeup_enabled_data.enabled = 0;
++#endif
++ break;
++#endif
++ default:
++ break;
++ }
++ }
++ ed->enabled = enable;
++ return cnt;
++}
++
++static const struct file_operations latency_hist_reset_fops = {
++ .open = tracing_open_generic,
++ .write = latency_hist_reset,
++};
++
++static const struct file_operations enable_fops = {
++ .open = tracing_open_generic,
++ .read = show_enable,
++ .write = do_enable,
++};
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++static const struct file_operations pid_fops = {
++ .open = tracing_open_generic,
++ .read = show_pid,
++ .write = do_pid,
++};
++
++static const struct file_operations maxlatproc_fops = {
++ .open = tracing_open_generic,
++ .read = show_maxlatproc,
++};
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++static notrace void probe_preemptirqsoff_hist(void *v, int reason,
++ int starthist)
++{
++ int cpu = raw_smp_processor_id();
++ int time_set = 0;
++
++ if (starthist) {
++ cycle_t uninitialized_var(start);
++
++ if (!preempt_count() && !irqs_disabled())
++ return;
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++ if ((reason == IRQS_OFF || reason == TRACE_START) &&
++ !per_cpu(hist_irqsoff_counting, cpu)) {
++ per_cpu(hist_irqsoff_counting, cpu) = 1;
++ start = ftrace_now(cpu);
++ time_set++;
++ per_cpu(hist_irqsoff_start, cpu) = start;
++ }
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++ if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
++ !per_cpu(hist_preemptoff_counting, cpu)) {
++ per_cpu(hist_preemptoff_counting, cpu) = 1;
++ if (!(time_set++))
++ start = ftrace_now(cpu);
++ per_cpu(hist_preemptoff_start, cpu) = start;
++ }
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++ if (per_cpu(hist_irqsoff_counting, cpu) &&
++ per_cpu(hist_preemptoff_counting, cpu) &&
++ !per_cpu(hist_preemptirqsoff_counting, cpu)) {
++ per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
++ if (!time_set)
++ start = ftrace_now(cpu);
++ per_cpu(hist_preemptirqsoff_start, cpu) = start;
++ }
++#endif
++ } else {
++ cycle_t uninitialized_var(stop);
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++ if ((reason == IRQS_ON || reason == TRACE_STOP) &&
++ per_cpu(hist_irqsoff_counting, cpu)) {
++ cycle_t start = per_cpu(hist_irqsoff_start, cpu);
++
++ stop = ftrace_now(cpu);
++ time_set++;
++ if (start) {
++ long latency = ((long) (stop - start)) /
++ NSECS_PER_USECS;
++
++ latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
++ stop, NULL);
++ }
++ per_cpu(hist_irqsoff_counting, cpu) = 0;
++ }
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++ if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
++ per_cpu(hist_preemptoff_counting, cpu)) {
++ cycle_t start = per_cpu(hist_preemptoff_start, cpu);
++
++ if (!(time_set++))
++ stop = ftrace_now(cpu);
++ if (start) {
++ long latency = ((long) (stop - start)) /
++ NSECS_PER_USECS;
++
++ latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
++ 0, stop, NULL);
++ }
++ per_cpu(hist_preemptoff_counting, cpu) = 0;
++ }
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++ if ((!per_cpu(hist_irqsoff_counting, cpu) ||
++ !per_cpu(hist_preemptoff_counting, cpu)) &&
++ per_cpu(hist_preemptirqsoff_counting, cpu)) {
++ cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
++
++ if (!time_set)
++ stop = ftrace_now(cpu);
++ if (start) {
++ long latency = ((long) (stop - start)) /
++ NSECS_PER_USECS;
++
++ latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
++ latency, 0, stop, NULL);
++ }
++ per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
++ }
++#endif
++ }
++}
++#endif
++
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++static DEFINE_RAW_SPINLOCK(wakeup_lock);
++static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
++ int cpu)
++{
++ int old_cpu = task_cpu(task);
++
++ if (cpu != old_cpu) {
++ unsigned long flags;
++ struct task_struct *cpu_wakeup_task;
++
++ raw_spin_lock_irqsave(&wakeup_lock, flags);
++
++ cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
++ if (task == cpu_wakeup_task) {
++ put_task_struct(cpu_wakeup_task);
++ per_cpu(wakeup_task, old_cpu) = NULL;
++ cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
++ get_task_struct(cpu_wakeup_task);
++ }
++
++ raw_spin_unlock_irqrestore(&wakeup_lock, flags);
++ }
++}
++
++static notrace void probe_wakeup_latency_hist_start(void *v,
++ struct task_struct *p, int success)
++{
++ unsigned long flags;
++ struct task_struct *curr = current;
++ int cpu = task_cpu(p);
++ struct task_struct *cpu_wakeup_task;
++
++ raw_spin_lock_irqsave(&wakeup_lock, flags);
++
++ cpu_wakeup_task = per_cpu(wakeup_task, cpu);
++
++ if (wakeup_pid) {
++ if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
++ p->prio == curr->prio)
++ per_cpu(wakeup_sharedprio, cpu) = 1;
++ if (likely(wakeup_pid != task_pid_nr(p)))
++ goto out;
++ } else {
++ if (likely(!rt_task(p)) ||
++ (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
++ p->prio > curr->prio)
++ goto out;
++ if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
++ p->prio == curr->prio)
++ per_cpu(wakeup_sharedprio, cpu) = 1;
++ }
++
++ if (cpu_wakeup_task)
++ put_task_struct(cpu_wakeup_task);
++ cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
++ get_task_struct(cpu_wakeup_task);
++ cpu_wakeup_task->preempt_timestamp_hist =
++ ftrace_now(raw_smp_processor_id());
++out:
++ raw_spin_unlock_irqrestore(&wakeup_lock, flags);
++}
++
++static notrace void probe_wakeup_latency_hist_stop(void *v,
++ struct task_struct *prev, struct task_struct *next)
++{
++ unsigned long flags;
++ int cpu = task_cpu(next);
++ long latency;
++ cycle_t stop;
++ struct task_struct *cpu_wakeup_task;
++
++ raw_spin_lock_irqsave(&wakeup_lock, flags);
++
++ cpu_wakeup_task = per_cpu(wakeup_task, cpu);
++
++ if (cpu_wakeup_task == NULL)
++ goto out;
++
++ /* Already running? */
++ if (unlikely(current == cpu_wakeup_task))
++ goto out_reset;
++
++ if (next != cpu_wakeup_task) {
++ if (next->prio < cpu_wakeup_task->prio)
++ goto out_reset;
++
++ if (next->prio == cpu_wakeup_task->prio)
++ per_cpu(wakeup_sharedprio, cpu) = 1;
++
++ goto out;
++ }
++
++ if (current->prio == cpu_wakeup_task->prio)
++ per_cpu(wakeup_sharedprio, cpu) = 1;
++
++ /*
++ * The task we are waiting for is about to be switched to.
++ * Calculate latency and store it in histogram.
++ */
++ stop = ftrace_now(raw_smp_processor_id());
++
++ latency = ((long) (stop - next->preempt_timestamp_hist)) /
++ NSECS_PER_USECS;
++
++ if (per_cpu(wakeup_sharedprio, cpu)) {
++ latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
++ next);
++ per_cpu(wakeup_sharedprio, cpu) = 0;
++ } else {
++ latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ if (timerandwakeup_enabled_data.enabled) {
++ latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
++ next->timer_offset + latency, next->timer_offset,
++ stop, next);
++ }
++#endif
++ }
++
++out_reset:
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ next->timer_offset = 0;
++#endif
++ put_task_struct(cpu_wakeup_task);
++ per_cpu(wakeup_task, cpu) = NULL;
++out:
++ raw_spin_unlock_irqrestore(&wakeup_lock, flags);
++}
++#endif
++
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++static notrace void probe_hrtimer_interrupt(void *v, int cpu,
++ long long latency_ns, struct task_struct *curr, struct task_struct *task)
++{
++ if (latency_ns <= 0 && task != NULL && rt_task(task) &&
++ (task->prio < curr->prio ||
++ (task->prio == curr->prio &&
++ !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
++ long latency;
++ cycle_t now;
++
++ if (missed_timer_offsets_pid) {
++ if (likely(missed_timer_offsets_pid !=
++ task_pid_nr(task)))
++ return;
++ }
++
++ now = ftrace_now(cpu);
++ latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
++ latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
++ task);
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ task->timer_offset = latency;
++#endif
++ }
++}
++#endif
++
++static __init int latency_hist_init(void)
++{
++ struct dentry *latency_hist_root = NULL;
++ struct dentry *dentry;
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ struct dentry *dentry_sharedprio;
++#endif
++ struct dentry *entry;
++ struct dentry *enable_root;
++ int i = 0;
++ struct hist_data *my_hist;
++ char name[64];
++ char *cpufmt = "CPU%d";
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ char *cpufmt_maxlatproc = "max_latency-CPU%d";
++ struct maxlatproc_data *mp = NULL;
++#endif
++
++ dentry = tracing_init_dentry();
++ latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
++ enable_root = debugfs_create_dir("enable", latency_hist_root);
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++ dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
++ for_each_possible_cpu(i) {
++ sprintf(name, cpufmt, i);
++ entry = debugfs_create_file(name, 0444, dentry,
++ &per_cpu(irqsoff_hist, i), &latency_hist_fops);
++ my_hist = &per_cpu(irqsoff_hist, i);
++ atomic_set(&my_hist->hist_mode, 1);
++ my_hist->min_lat = LONG_MAX;
++ }
++ entry = debugfs_create_file("reset", 0644, dentry,
++ (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++ dentry = debugfs_create_dir(preemptoff_hist_dir,
++ latency_hist_root);
++ for_each_possible_cpu(i) {
++ sprintf(name, cpufmt, i);
++ entry = debugfs_create_file(name, 0444, dentry,
++ &per_cpu(preemptoff_hist, i), &latency_hist_fops);
++ my_hist = &per_cpu(preemptoff_hist, i);
++ atomic_set(&my_hist->hist_mode, 1);
++ my_hist->min_lat = LONG_MAX;
++ }
++ entry = debugfs_create_file("reset", 0644, dentry,
++ (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++ dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
++ latency_hist_root);
++ for_each_possible_cpu(i) {
++ sprintf(name, cpufmt, i);
++ entry = debugfs_create_file(name, 0444, dentry,
++ &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
++ my_hist = &per_cpu(preemptirqsoff_hist, i);
++ atomic_set(&my_hist->hist_mode, 1);
++ my_hist->min_lat = LONG_MAX;
++ }
++ entry = debugfs_create_file("reset", 0644, dentry,
++ (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++ entry = debugfs_create_file("preemptirqsoff", 0644,
++ enable_root, (void *)&preemptirqsoff_enabled_data,
++ &enable_fops);
++#endif
++
++#ifdef CONFIG_WAKEUP_LATENCY_HIST
++ dentry = debugfs_create_dir(wakeup_latency_hist_dir,
++ latency_hist_root);
++ dentry_sharedprio = debugfs_create_dir(
++ wakeup_latency_hist_dir_sharedprio, dentry);
++ for_each_possible_cpu(i) {
++ sprintf(name, cpufmt, i);
++
++ entry = debugfs_create_file(name, 0444, dentry,
++ &per_cpu(wakeup_latency_hist, i),
++ &latency_hist_fops);
++ my_hist = &per_cpu(wakeup_latency_hist, i);
++ atomic_set(&my_hist->hist_mode, 1);
++ my_hist->min_lat = LONG_MAX;
++
++ entry = debugfs_create_file(name, 0444, dentry_sharedprio,
++ &per_cpu(wakeup_latency_hist_sharedprio, i),
++ &latency_hist_fops);
++ my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
++ atomic_set(&my_hist->hist_mode, 1);
++ my_hist->min_lat = LONG_MAX;
++
++ sprintf(name, cpufmt_maxlatproc, i);
++
++ mp = &per_cpu(wakeup_maxlatproc, i);
++ entry = debugfs_create_file(name, 0444, dentry, mp,
++ &maxlatproc_fops);
++ clear_maxlatprocdata(mp);
++
++ mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
++ entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
++ &maxlatproc_fops);
++ clear_maxlatprocdata(mp);
++ }
++ entry = debugfs_create_file("pid", 0644, dentry,
++ (void *)&wakeup_pid, &pid_fops);
++ entry = debugfs_create_file("reset", 0644, dentry,
++ (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
++ entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
++ (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
++ entry = debugfs_create_file("wakeup", 0644,
++ enable_root, (void *)&wakeup_latency_enabled_data,
++ &enable_fops);
++#endif
++
++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
++ dentry = debugfs_create_dir(missed_timer_offsets_dir,
++ latency_hist_root);
++ for_each_possible_cpu(i) {
++ sprintf(name, cpufmt, i);
++ entry = debugfs_create_file(name, 0444, dentry,
++ &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
++ my_hist = &per_cpu(missed_timer_offsets, i);
++ atomic_set(&my_hist->hist_mode, 1);
++ my_hist->min_lat = LONG_MAX;
++
++ sprintf(name, cpufmt_maxlatproc, i);
++ mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
++ entry = debugfs_create_file(name, 0444, dentry, mp,
++ &maxlatproc_fops);
++ clear_maxlatprocdata(mp);
++ }
++ entry = debugfs_create_file("pid", 0644, dentry,
++ (void *)&missed_timer_offsets_pid, &pid_fops);
++ entry = debugfs_create_file("reset", 0644, dentry,
++ (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
++ entry = debugfs_create_file("missed_timer_offsets", 0644,
++ enable_root, (void *)&missed_timer_offsets_enabled_data,
++ &enable_fops);
++#endif
++
++#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
++ defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
++ dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
++ latency_hist_root);
++ for_each_possible_cpu(i) {
++ sprintf(name, cpufmt, i);
++ entry = debugfs_create_file(name, 0444, dentry,
++ &per_cpu(timerandwakeup_latency_hist, i),
++ &latency_hist_fops);
++ my_hist = &per_cpu(timerandwakeup_latency_hist, i);
++ atomic_set(&my_hist->hist_mode, 1);
++ my_hist->min_lat = LONG_MAX;
++
++ sprintf(name, cpufmt_maxlatproc, i);
++ mp = &per_cpu(timerandwakeup_maxlatproc, i);
++ entry = debugfs_create_file(name, 0444, dentry, mp,
++ &maxlatproc_fops);
++ clear_maxlatprocdata(mp);
++ }
++ entry = debugfs_create_file("reset", 0644, dentry,
++ (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
++ entry = debugfs_create_file("timerandwakeup", 0644,
++ enable_root, (void *)&timerandwakeup_enabled_data,
++ &enable_fops);
++#endif
++ return 0;
++}
++
++__initcall(latency_hist_init);
+--- a/kernel/trace/trace_irqsoff.c
++++ b/kernel/trace/trace_irqsoff.c
+@@ -17,6 +17,7 @@
+ #include <linux/fs.h>
+
+ #include "trace.h"
++#include <trace/events/hist.h>
+
+ static struct trace_array *irqsoff_trace __read_mostly;
+ static int tracer_enabled __read_mostly;
+@@ -438,11 +439,13 @@ void start_critical_timings(void)
+ {
+ if (preempt_trace() || irq_trace())
+ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
++ trace_preemptirqsoff_hist(TRACE_START, 1);
+ }
+ EXPORT_SYMBOL_GPL(start_critical_timings);
+
+ void stop_critical_timings(void)
+ {
++ trace_preemptirqsoff_hist(TRACE_STOP, 0);
+ if (preempt_trace() || irq_trace())
+ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+@@ -452,6 +455,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings)
+ #ifdef CONFIG_PROVE_LOCKING
+ void time_hardirqs_on(unsigned long a0, unsigned long a1)
+ {
++ trace_preemptirqsoff_hist(IRQS_ON, 0);
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(a0, a1);
+ }
+@@ -460,6 +464,7 @@ void time_hardirqs_off(unsigned long a0,
+ {
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(a0, a1);
++ trace_preemptirqsoff_hist(IRQS_OFF, 1);
+ }
+
+ #else /* !CONFIG_PROVE_LOCKING */
+@@ -485,6 +490,7 @@ inline void print_irqtrace_events(struct
+ */
+ void trace_hardirqs_on(void)
+ {
++ trace_preemptirqsoff_hist(IRQS_ON, 0);
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+@@ -494,11 +500,13 @@ void trace_hardirqs_off(void)
+ {
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
++ trace_preemptirqsoff_hist(IRQS_OFF, 1);
+ }
+ EXPORT_SYMBOL(trace_hardirqs_off);
+
+ void trace_hardirqs_on_caller(unsigned long caller_addr)
+ {
++ trace_preemptirqsoff_hist(IRQS_ON, 0);
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(CALLER_ADDR0, caller_addr);
+ }
+@@ -508,6 +516,7 @@ void trace_hardirqs_off_caller(unsigned
+ {
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(CALLER_ADDR0, caller_addr);
++ trace_preemptirqsoff_hist(IRQS_OFF, 1);
+ }
+ EXPORT_SYMBOL(trace_hardirqs_off_caller);
+
+@@ -517,12 +526,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller)
+ #ifdef CONFIG_PREEMPT_TRACER
+ void trace_preempt_on(unsigned long a0, unsigned long a1)
+ {
++ trace_preemptirqsoff_hist(PREEMPT_ON, 0);
+ if (preempt_trace() && !irq_trace())
+ stop_critical_timing(a0, a1);
+ }
+
+ void trace_preempt_off(unsigned long a0, unsigned long a1)
+ {
++ trace_preemptirqsoff_hist(PREEMPT_ON, 1);
+ if (preempt_trace() && !irq_trace())
+ start_critical_timing(a0, a1);
+ }
diff --git a/patches/lglocks-rt.patch b/patches/lglocks-rt.patch
new file mode 100644
index 0000000..f870ffc
--- /dev/null
+++ b/patches/lglocks-rt.patch
@@ -0,0 +1,173 @@
+Subject: lglocks-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 15 Jun 2011 11:02:21 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/lglock.h | 19 +++++++++++++++--
+ kernel/lglock.c | 54 ++++++++++++++++++++++++++++++++-----------------
+ 2 files changed, 53 insertions(+), 20 deletions(-)
+
+--- a/include/linux/lglock.h
++++ b/include/linux/lglock.h
+@@ -42,22 +42,37 @@
+ #endif
+
+ struct lglock {
++#ifndef CONFIG_PREEMPT_RT_FULL
+ arch_spinlock_t __percpu *lock;
++#else
++ struct rt_mutex __percpu *lock;
++#endif
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lock_class_key lock_key;
+ struct lockdep_map lock_dep_map;
+ #endif
+ };
+
+-#define DEFINE_LGLOCK(name) \
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define DEFINE_LGLOCK(name) \
+ static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
+ = __ARCH_SPIN_LOCK_UNLOCKED; \
+ struct lglock name = { .lock = &name ## _lock }
+
+-#define DEFINE_STATIC_LGLOCK(name) \
++# define DEFINE_STATIC_LGLOCK(name) \
+ static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
+ = __ARCH_SPIN_LOCK_UNLOCKED; \
+ static struct lglock name = { .lock = &name ## _lock }
++#else
++
++# define DEFINE_LGLOCK(name) \
++ static DEFINE_PER_CPU(struct rt_mutex, name ## _lock); \
++ struct lglock name = { .lock = &name ## _lock }
++
++# define DEFINE_STATIC_LGLOCK(name) \
++ static DEFINE_PER_CPU(struct rt_mutex, name ## _lock); \
++ static struct lglock name = { .lock = &name ## _lock }
++#endif
+
+ void lg_lock_init(struct lglock *lg, char *name);
+ void lg_local_lock(struct lglock *lg);
+--- a/kernel/lglock.c
++++ b/kernel/lglock.c
+@@ -4,6 +4,15 @@
+ #include <linux/cpu.h>
+ #include <linux/string.h>
+
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define lg_lock_ptr arch_spinlock_t
++# define lg_do_lock(l) arch_spin_lock(l)
++# define lg_do_unlock(l) arch_spin_unlock(l)
++#else
++# define lg_lock_ptr struct rt_mutex
++# define lg_do_lock(l) __rt_spin_lock(l)
++# define lg_do_unlock(l) __rt_spin_unlock(l)
++#endif
+ /*
+ * Note there is no uninit, so lglocks cannot be defined in
+ * modules (but it's fine to use them from there)
+@@ -12,51 +21,60 @@
+
+ void lg_lock_init(struct lglock *lg, char *name)
+ {
++#ifdef CONFIG_PREEMPT_RT_FULL
++ int i;
++
++ for_each_possible_cpu(i) {
++ struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
++
++ rt_mutex_init(lock);
++ }
++#endif
+ LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
+ }
+ EXPORT_SYMBOL(lg_lock_init);
+
+ void lg_local_lock(struct lglock *lg)
+ {
+- arch_spinlock_t *lock;
++ lg_lock_ptr *lock;
+
+- preempt_disable();
++ migrate_disable();
+ rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ lock = this_cpu_ptr(lg->lock);
+- arch_spin_lock(lock);
++ lg_do_lock(lock);
+ }
+ EXPORT_SYMBOL(lg_local_lock);
+
+ void lg_local_unlock(struct lglock *lg)
+ {
+- arch_spinlock_t *lock;
++ lg_lock_ptr *lock;
+
+ rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ lock = this_cpu_ptr(lg->lock);
+- arch_spin_unlock(lock);
+- preempt_enable();
++ lg_do_unlock(lock);
++ migrate_enable();
+ }
+ EXPORT_SYMBOL(lg_local_unlock);
+
+ void lg_local_lock_cpu(struct lglock *lg, int cpu)
+ {
+- arch_spinlock_t *lock;
++ lg_lock_ptr *lock;
+
+- preempt_disable();
++ preempt_disable_nort();
+ rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ lock = per_cpu_ptr(lg->lock, cpu);
+- arch_spin_lock(lock);
++ lg_do_lock(lock);
+ }
+ EXPORT_SYMBOL(lg_local_lock_cpu);
+
+ void lg_local_unlock_cpu(struct lglock *lg, int cpu)
+ {
+- arch_spinlock_t *lock;
++ lg_lock_ptr *lock;
+
+ rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ lock = per_cpu_ptr(lg->lock, cpu);
+- arch_spin_unlock(lock);
+- preempt_enable();
++ lg_do_unlock(lock);
++ preempt_enable_nort();
+ }
+ EXPORT_SYMBOL(lg_local_unlock_cpu);
+
+@@ -64,12 +82,12 @@ void lg_global_lock(struct lglock *lg)
+ {
+ int i;
+
+- preempt_disable();
++ preempt_disable_nort();
+ rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ for_each_possible_cpu(i) {
+- arch_spinlock_t *lock;
++ lg_lock_ptr *lock;
+ lock = per_cpu_ptr(lg->lock, i);
+- arch_spin_lock(lock);
++ lg_do_lock(lock);
+ }
+ }
+ EXPORT_SYMBOL(lg_global_lock);
+@@ -80,10 +98,10 @@ void lg_global_unlock(struct lglock *lg)
+
+ rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ for_each_possible_cpu(i) {
+- arch_spinlock_t *lock;
++ lg_lock_ptr *lock;
+ lock = per_cpu_ptr(lg->lock, i);
+- arch_spin_unlock(lock);
++ lg_do_unlock(lock);
+ }
+- preempt_enable();
++ preempt_enable_nort();
+ }
+ EXPORT_SYMBOL(lg_global_unlock);
diff --git a/patches/list-add-list-last-entry.patch b/patches/list-add-list-last-entry.patch
new file mode 100644
index 0000000..60a4798
--- /dev/null
+++ b/patches/list-add-list-last-entry.patch
@@ -0,0 +1,29 @@
+Subject: list-add-list-last-entry.patch
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 21 Jun 2011 11:22:36 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/list.h | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/include/linux/list.h
++++ b/include/linux/list.h
+@@ -362,6 +362,17 @@ static inline void list_splice_tail_init
+ list_entry((ptr)->next, type, member)
+
+ /**
++ * list_last_entry - get the last element from a list
++ * @ptr: the list head to take the element from.
++ * @type: the type of the struct this is embedded in.
++ * @member: the name of the list_struct within the struct.
++ *
++ * Note, that list is expected to be not empty.
++ */
++#define list_last_entry(ptr, type, member) \
++ list_entry((ptr)->prev, type, member)
++
++/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
diff --git a/patches/local-irq-rt-depending-variants.patch b/patches/local-irq-rt-depending-variants.patch
new file mode 100644
index 0000000..e53e0b4
--- /dev/null
+++ b/patches/local-irq-rt-depending-variants.patch
@@ -0,0 +1,52 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 21 Jul 2009 22:34:14 +0200
+Subject: rt: local_irq_* variants depending on RT/!RT
+
+Add local_irq_*_(no)rt variant which are mainly used to break
+interrupt disabled sections on PREEMPT_RT or to explicitely disable
+interrupts on PREEMPT_RT.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/interrupt.h | 2 +-
+ include/linux/irqflags.h | 19 +++++++++++++++++++
+ 2 files changed, 20 insertions(+), 1 deletion(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -211,7 +211,7 @@ extern void devm_free_irq(struct device
+ #ifdef CONFIG_LOCKDEP
+ # define local_irq_enable_in_hardirq() do { } while (0)
+ #else
+-# define local_irq_enable_in_hardirq() local_irq_enable()
++# define local_irq_enable_in_hardirq() local_irq_enable_nort()
+ #endif
+
+ extern void disable_irq_nosync(unsigned int irq);
+--- a/include/linux/irqflags.h
++++ b/include/linux/irqflags.h
+@@ -147,4 +147,23 @@
+
+ #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
+
++/*
++ * local_irq* variants depending on RT/!RT
++ */
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define local_irq_disable_nort() do { } while (0)
++# define local_irq_enable_nort() do { } while (0)
++# define local_irq_save_nort(flags) do { local_save_flags(flags); } while (0)
++# define local_irq_restore_nort(flags) do { (void)(flags); } while (0)
++# define local_irq_disable_rt() local_irq_disable()
++# define local_irq_enable_rt() local_irq_enable()
++#else
++# define local_irq_disable_nort() local_irq_disable()
++# define local_irq_enable_nort() local_irq_enable()
++# define local_irq_save_nort(flags) local_irq_save(flags)
++# define local_irq_restore_nort(flags) local_irq_restore(flags)
++# define local_irq_disable_rt() do { } while (0)
++# define local_irq_enable_rt() do { } while (0)
++#endif
++
+ #endif
diff --git a/patches/local-var.patch b/patches/local-var.patch
new file mode 100644
index 0000000..e39a312
--- /dev/null
+++ b/patches/local-var.patch
@@ -0,0 +1,23 @@
+Subject: local-var.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 24 Jun 2011 18:40:37 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/percpu.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/include/linux/percpu.h
++++ b/include/linux/percpu.h
+@@ -48,6 +48,11 @@
+ preempt_enable(); \
+ } while (0)
+
++#define get_local_var(var) get_cpu_var(var)
++#define put_local_var(var) put_cpu_var(var)
++#define get_local_ptr(var) get_cpu_ptr(var)
++#define put_local_ptr(var) put_cpu_ptr(var)
++
+ /* minimum unit size, also is the maximum supported allocation size */
+ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
+
diff --git a/patches/local-vars-migrate-disable.patch b/patches/local-vars-migrate-disable.patch
new file mode 100644
index 0000000..ebd6557
--- /dev/null
+++ b/patches/local-vars-migrate-disable.patch
@@ -0,0 +1,46 @@
+Subject: local-vars-migrate-disable.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 28 Jun 2011 20:42:16 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/percpu.h | 28 ++++++++++++++++++++++++----
+ 1 file changed, 24 insertions(+), 4 deletions(-)
+
+--- a/include/linux/percpu.h
++++ b/include/linux/percpu.h
+@@ -48,10 +48,30 @@
+ preempt_enable(); \
+ } while (0)
+
+-#define get_local_var(var) get_cpu_var(var)
+-#define put_local_var(var) put_cpu_var(var)
+-#define get_local_ptr(var) get_cpu_ptr(var)
+-#define put_local_ptr(var) put_cpu_ptr(var)
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define get_local_var(var) get_cpu_var(var)
++# define put_local_var(var) put_cpu_var(var)
++# define get_local_ptr(var) get_cpu_ptr(var)
++# define put_local_ptr(var) put_cpu_ptr(var)
++#else
++# define get_local_var(var) (*({ \
++ migrate_disable(); \
++ &__get_cpu_var(var); }))
++
++# define put_local_var(var) do { \
++ (void)&(var); \
++ migrate_enable(); \
++} while (0)
++
++# define get_local_ptr(var) ({ \
++ migrate_disable(); \
++ this_cpu_ptr(var); })
++
++# define put_local_ptr(var) do { \
++ (void)(var); \
++ migrate_enable(); \
++} while (0)
++#endif
+
+ /* minimum unit size, also is the maximum supported allocation size */
+ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
diff --git a/patches/localversion.patch b/patches/localversion.patch
new file mode 100644
index 0000000..56edefb
--- /dev/null
+++ b/patches/localversion.patch
@@ -0,0 +1,15 @@
+Subject: localversion.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 08 Jul 2011 20:25:16 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/n/tip-8vdw4bfcsds27cvox6rpb334@git.kernel.org
+---
+ localversion-rt | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- /dev/null
++++ b/localversion-rt
+@@ -0,0 +1 @@
++-rt1
diff --git a/patches/lockdep-no-softirq-accounting-on-rt.patch b/patches/lockdep-no-softirq-accounting-on-rt.patch
new file mode 100644
index 0000000..dd739db
--- /dev/null
+++ b/patches/lockdep-no-softirq-accounting-on-rt.patch
@@ -0,0 +1,56 @@
+Subject: lockdep-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 18:51:23 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/irqflags.h | 10 +++++++---
+ kernel/lockdep.c | 2 ++
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/include/linux/irqflags.h
++++ b/include/linux/irqflags.h
+@@ -25,8 +25,6 @@
+ # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
+ # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
+ # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
+-# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
+-# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
+ # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
+ #else
+ # define trace_hardirqs_on() do { } while (0)
+@@ -39,9 +37,15 @@
+ # define trace_softirqs_enabled(p) 0
+ # define trace_hardirq_enter() do { } while (0)
+ # define trace_hardirq_exit() do { } while (0)
++# define INIT_TRACE_IRQFLAGS
++#endif
++
++#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
++# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
++# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
++#else
+ # define lockdep_softirq_enter() do { } while (0)
+ # define lockdep_softirq_exit() do { } while (0)
+-# define INIT_TRACE_IRQFLAGS
+ #endif
+
+ #if defined(CONFIG_IRQSOFF_TRACER) || \
+--- a/kernel/lockdep.c
++++ b/kernel/lockdep.c
+@@ -3534,6 +3534,7 @@ static void check_flags(unsigned long fl
+ }
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * We dont accurately track softirq state in e.g.
+ * hardirq contexts (such as on 4KSTACKS), so only
+@@ -3548,6 +3549,7 @@ static void check_flags(unsigned long fl
+ DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
+ }
+ }
++#endif
+
+ if (!debug_locks)
+ print_irqtrace_events(current);
diff --git a/patches/lockdep-selftest-convert-spinlock-to-raw-spinlock.patch b/patches/lockdep-selftest-convert-spinlock-to-raw-spinlock.patch
new file mode 100644
index 0000000..8edb4d8
--- /dev/null
+++ b/patches/lockdep-selftest-convert-spinlock-to-raw-spinlock.patch
@@ -0,0 +1,90 @@
+Subject: lockdep: Selftest: convert spinlock to raw spinlock
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Mon, 16 Apr 2012 15:01:55 +0800
+
+From: Yong Zhang <yong.zhang@windriver.com>
+
+spinlock is sleepable on -rt and can not be used in
+interrupt context.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Yong Zhang <yong.zhang@windriver.com>
+Link: http://lkml.kernel.org/r/1334559716-18447-2-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/locking-selftest.c | 34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/lib/locking-selftest.c
++++ b/lib/locking-selftest.c
+@@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_de
+ * Normal standalone locks, for the circular and irq-context
+ * dependency tests:
+ */
+-static DEFINE_SPINLOCK(lock_A);
+-static DEFINE_SPINLOCK(lock_B);
+-static DEFINE_SPINLOCK(lock_C);
+-static DEFINE_SPINLOCK(lock_D);
++static DEFINE_RAW_SPINLOCK(lock_A);
++static DEFINE_RAW_SPINLOCK(lock_B);
++static DEFINE_RAW_SPINLOCK(lock_C);
++static DEFINE_RAW_SPINLOCK(lock_D);
+
+ static DEFINE_RWLOCK(rwlock_A);
+ static DEFINE_RWLOCK(rwlock_B);
+@@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D);
+ * but X* and Y* are different classes. We do this so that
+ * we do not trigger a real lockup:
+ */
+-static DEFINE_SPINLOCK(lock_X1);
+-static DEFINE_SPINLOCK(lock_X2);
+-static DEFINE_SPINLOCK(lock_Y1);
+-static DEFINE_SPINLOCK(lock_Y2);
+-static DEFINE_SPINLOCK(lock_Z1);
+-static DEFINE_SPINLOCK(lock_Z2);
++static DEFINE_RAW_SPINLOCK(lock_X1);
++static DEFINE_RAW_SPINLOCK(lock_X2);
++static DEFINE_RAW_SPINLOCK(lock_Y1);
++static DEFINE_RAW_SPINLOCK(lock_Y2);
++static DEFINE_RAW_SPINLOCK(lock_Z1);
++static DEFINE_RAW_SPINLOCK(lock_Z2);
+
+ static DEFINE_RWLOCK(rwlock_X1);
+ static DEFINE_RWLOCK(rwlock_X2);
+@@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2);
+ */
+ #define INIT_CLASS_FUNC(class) \
+ static noinline void \
+-init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
+- struct rw_semaphore *rwsem) \
++init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \
++ struct mutex *mutex, struct rw_semaphore *rwsem)\
+ { \
+- spin_lock_init(lock); \
++ raw_spin_lock_init(lock); \
+ rwlock_init(rwlock); \
+ mutex_init(mutex); \
+ init_rwsem(rwsem); \
+@@ -168,10 +168,10 @@ static void init_shared_classes(void)
+ * Shortcuts for lock/unlock API variants, to keep
+ * the testcases compact:
+ */
+-#define L(x) spin_lock(&lock_##x)
+-#define U(x) spin_unlock(&lock_##x)
++#define L(x) raw_spin_lock(&lock_##x)
++#define U(x) raw_spin_unlock(&lock_##x)
+ #define LU(x) L(x); U(x)
+-#define SI(x) spin_lock_init(&lock_##x)
++#define SI(x) raw_spin_lock_init(&lock_##x)
+
+ #define WL(x) write_lock(&rwlock_##x)
+ #define WU(x) write_unlock(&rwlock_##x)
+@@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_
+
+ #define I2(x) \
+ do { \
+- spin_lock_init(&lock_##x); \
++ raw_spin_lock_init(&lock_##x); \
+ rwlock_init(&rwlock_##x); \
+ mutex_init(&mutex_##x); \
+ init_rwsem(&rwsem_##x); \
diff --git a/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch b/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch
new file mode 100644
index 0000000..f476b56
--- /dev/null
+++ b/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch
@@ -0,0 +1,56 @@
+Subject: lockdep: Selftest: Only do hardirq context test for raw spinlock
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Mon, 16 Apr 2012 15:01:56 +0800
+
+From: Yong Zhang <yong.zhang@windriver.com>
+
+On -rt there is no softirq context any more and rwlock is sleepable,
+disable softirq context test and rwlock+irq test.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Yong Zhang <yong.zhang@windriver.com>
+Link: http://lkml.kernel.org/r/1334559716-18447-3-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/locking-selftest.c | 23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/lib/locking-selftest.c
++++ b/lib/locking-selftest.c
+@@ -1175,6 +1175,7 @@ void locking_selftest(void)
+
+ printk(" --------------------------------------------------------------------------\n");
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * irq-context testcases:
+ */
+@@ -1187,6 +1188,28 @@ void locking_selftest(void)
+
+ DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
+ // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
++#else
++ /* On -rt, we only do hardirq context test for raw spinlock */
++ DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
++ DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
++
++ DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
++ DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
++
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
++
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
++ DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
++#endif
+
+ if (unexpected_testcase_failures) {
+ printk("-----------------------------------------------------------------\n");
diff --git a/patches/locking-various-init-fixes.patch b/patches/locking-various-init-fixes.patch
new file mode 100644
index 0000000..9ccea4d
--- /dev/null
+++ b/patches/locking-various-init-fixes.patch
@@ -0,0 +1,74 @@
+Subject: locking-various-init-fixes.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:25:03 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/char/random.c | 6 +++---
+ drivers/usb/chipidea/debug.c | 2 +-
+ fs/file.c | 2 +-
+ include/linux/idr.h | 2 +-
+ 4 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -445,7 +445,7 @@ static struct entropy_store input_pool =
+ .poolinfo = &poolinfo_table[0],
+ .name = "input",
+ .limit = 1,
+- .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock),
++ .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
+ .pool = input_pool_data
+ };
+
+@@ -454,7 +454,7 @@ static struct entropy_store blocking_poo
+ .name = "blocking",
+ .limit = 1,
+ .pull = &input_pool,
+- .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock),
++ .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
+ .pool = blocking_pool_data
+ };
+
+@@ -462,7 +462,7 @@ static struct entropy_store nonblocking_
+ .poolinfo = &poolinfo_table[1],
+ .name = "nonblocking",
+ .pull = &input_pool,
+- .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock),
++ .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
+ .pool = nonblocking_pool_data
+ };
+
+--- a/drivers/usb/chipidea/debug.c
++++ b/drivers/usb/chipidea/debug.c
+@@ -222,7 +222,7 @@ static struct {
+ } dbg_data = {
+ .idx = 0,
+ .tty = 0,
+- .lck = __RW_LOCK_UNLOCKED(lck)
++ .lck = __RW_LOCK_UNLOCKED(dbg_data.lck)
+ };
+
+ /**
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -516,7 +516,7 @@ struct files_struct init_files = {
+ .close_on_exec = init_files.close_on_exec_init,
+ .open_fds = init_files.open_fds_init,
+ },
+- .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
++ .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
+ };
+
+ /*
+--- a/include/linux/idr.h
++++ b/include/linux/idr.h
+@@ -136,7 +136,7 @@ struct ida {
+ struct ida_bitmap *free_bitmap;
+ };
+
+-#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, }
++#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
+ #define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
+
+ int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
diff --git a/patches/md-raid5-percpu-handling-rt-aware.patch b/patches/md-raid5-percpu-handling-rt-aware.patch
new file mode 100644
index 0000000..c4e56e5
--- /dev/null
+++ b/patches/md-raid5-percpu-handling-rt-aware.patch
@@ -0,0 +1,61 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 6 Apr 2010 16:51:31 +0200
+Subject: md: raid5: Make raid5_percpu handling RT aware
+
+__raid_run_ops() disables preemption with get_cpu() around the access
+to the raid5_percpu variables. That causes scheduling while atomic
+spews on RT.
+
+Serialize the access to the percpu data with a lock and keep the code
+preemptible.
+
+Reported-by: Udo van den Heuvel <udovdh@xs4all.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Udo van den Heuvel <udovdh@xs4all.nl>
+
+---
+ drivers/md/raid5.c | 7 +++++--
+ drivers/md/raid5.h | 1 +
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -1415,8 +1415,9 @@ static void __raid_run_ops(struct stripe
+ struct raid5_percpu *percpu;
+ unsigned long cpu;
+
+- cpu = get_cpu();
++ cpu = get_cpu_light();
+ percpu = per_cpu_ptr(conf->percpu, cpu);
++ spin_lock(&percpu->lock);
+ if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
+ ops_run_biofill(sh);
+ overlap_clear++;
+@@ -1468,7 +1469,8 @@ static void __raid_run_ops(struct stripe
+ if (test_and_clear_bit(R5_Overlap, &dev->flags))
+ wake_up(&sh->raid_conf->wait_for_overlap);
+ }
+- put_cpu();
++ spin_unlock(&percpu->lock);
++ put_cpu_light();
+ }
+
+ #ifdef CONFIG_MULTICORE_RAID456
+@@ -5093,6 +5095,7 @@ static int raid5_alloc_percpu(struct r5c
+ break;
+ }
+ per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
++ spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
+ }
+ #ifdef CONFIG_HOTPLUG_CPU
+ conf->cpu_notify.notifier_call = raid456_cpu_notify;
+--- a/drivers/md/raid5.h
++++ b/drivers/md/raid5.h
+@@ -428,6 +428,7 @@ struct r5conf {
+ int recovery_disabled;
+ /* per cpu variables */
+ struct raid5_percpu {
++ spinlock_t lock; /* Protection for -RT */
+ struct page *spare_page; /* Used when checking P/Q in raid6 */
+ void *scribble; /* space for constructing buffer
+ * lists and performing address
diff --git a/patches/might-sleep-check-for-idle.patch b/patches/might-sleep-check-for-idle.patch
new file mode 100644
index 0000000..be17e41
--- /dev/null
+++ b/patches/might-sleep-check-for-idle.patch
@@ -0,0 +1,23 @@
+Subject: sched: Check for idle task in might_sleep()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 23:34:08 +0100
+
+Idle is not allowed to call sleeping functions ever!
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7391,7 +7391,8 @@ void __might_sleep(const char *file, int
+ static unsigned long prev_jiffy; /* ratelimiting */
+
+ rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
+- if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
++ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
++ !is_idle_task(current)) ||
+ system_state != SYSTEM_RUNNING || oops_in_progress)
+ return;
+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
diff --git a/patches/migrate-disable-rt-variant.patch b/patches/migrate-disable-rt-variant.patch
new file mode 100644
index 0000000..68958de
--- /dev/null
+++ b/patches/migrate-disable-rt-variant.patch
@@ -0,0 +1,27 @@
+Subject: migrate-disable-rt-variant.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 19:48:20 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/preempt.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -121,11 +121,15 @@ extern void migrate_enable(void);
+ # define preempt_enable_rt() preempt_enable()
+ # define preempt_disable_nort() do { } while (0)
+ # define preempt_enable_nort() do { } while (0)
++# define migrate_disable_rt() migrate_disable()
++# define migrate_enable_rt() migrate_enable()
+ #else
+ # define preempt_disable_rt() do { } while (0)
+ # define preempt_enable_rt() do { } while (0)
+ # define preempt_disable_nort() preempt_disable()
+ # define preempt_enable_nort() preempt_enable()
++# define migrate_disable_rt() do { } while (0)
++# define migrate_enable_rt() do { } while (0)
+ #endif
+
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/patches/mips-disable-highmem-on-rt.patch b/patches/mips-disable-highmem-on-rt.patch
new file mode 100644
index 0000000..eb1312b
--- /dev/null
+++ b/patches/mips-disable-highmem-on-rt.patch
@@ -0,0 +1,20 @@
+Subject: mips-disable-highmem-on-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 17:10:12 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/mips/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/Kconfig
++++ b/arch/mips/Kconfig
+@@ -2102,7 +2102,7 @@ config CPU_R4400_WORKAROUNDS
+ #
+ config HIGHMEM
+ bool "High Memory Support"
+- depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM
++ depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !PREEMPT_RT_FULL
+
+ config CPU_SUPPORTS_HIGHMEM
+ bool
diff --git a/patches/mips-enable-interrupts-in-signal.patch b/patches/mips-enable-interrupts-in-signal.patch
new file mode 100644
index 0000000..571c43a
--- /dev/null
+++ b/patches/mips-enable-interrupts-in-signal.patch
@@ -0,0 +1,19 @@
+Subject: mips-enable-interrupts-in-signal.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 21:32:10 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/mips/kernel/signal.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/mips/kernel/signal.c
++++ b/arch/mips/kernel/signal.c
+@@ -601,6 +601,7 @@ asmlinkage void do_notify_resume(struct
+ __u32 thread_info_flags)
+ {
+ local_irq_enable();
++ preempt_check_resched();
+
+ /* deal with pending signal delivery */
+ if (thread_info_flags & _TIF_SIGPENDING)
diff --git a/patches/mm-allow-slab-rt.patch b/patches/mm-allow-slab-rt.patch
new file mode 100644
index 0000000..864280a
--- /dev/null
+++ b/patches/mm-allow-slab-rt.patch
@@ -0,0 +1,29 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:44:03 -0500
+Subject: mm: Allow only slab on RT
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ init/Kconfig | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1493,6 +1493,7 @@ config SLAB
+
+ config SLUB
+ bool "SLUB (Unqueued Allocator)"
++ depends on !PREEMPT_RT_FULL
+ help
+ SLUB is a slab allocator that minimizes cache line usage
+ instead of managing queues of cached objects (SLAB approach).
+@@ -1504,6 +1505,7 @@ config SLUB
+ config SLOB
+ depends on EXPERT
+ bool "SLOB (Simple Allocator)"
++ depends on !PREEMPT_RT_FULL
+ help
+ SLOB replaces the stock allocator with a drastically simpler
+ allocator. SLOB is generally more space efficient but
diff --git a/patches/mm-bounce-local-irq-save-nort.patch b/patches/mm-bounce-local-irq-save-nort.patch
new file mode 100644
index 0000000..7a0219d
--- /dev/null
+++ b/patches/mm-bounce-local-irq-save-nort.patch
@@ -0,0 +1,27 @@
+Subject: mm: bounce: Use local_irq_save_nort
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 10:33:09 +0100
+
+kmap_atomic() is preemptible on RT.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/bounce.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/bounce.c
++++ b/mm/bounce.c
+@@ -51,11 +51,11 @@ static void bounce_copy_vec(struct bio_v
+ unsigned long flags;
+ unsigned char *vto;
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ vto = kmap_atomic(to->bv_page);
+ memcpy(vto + to->bv_offset, vfrom, to->bv_len);
+ kunmap_atomic(vto);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ }
+
+ #else /* CONFIG_HIGHMEM */
diff --git a/patches/mm-cgroup-page-bit-spinlock.patch b/patches/mm-cgroup-page-bit-spinlock.patch
new file mode 100644
index 0000000..3b3b3a7
--- /dev/null
+++ b/patches/mm-cgroup-page-bit-spinlock.patch
@@ -0,0 +1,91 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 19 Aug 2009 09:56:42 +0200
+Subject: mm: Replace cgroup_page bit spinlock
+
+Bit spinlocks are not working on RT. Replace them.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/page_cgroup.h | 15 +++++++++++++++
+ mm/page_cgroup.c | 11 +++++++++++
+ 2 files changed, 26 insertions(+)
+
+--- a/include/linux/page_cgroup.h
++++ b/include/linux/page_cgroup.h
+@@ -24,6 +24,9 @@ enum {
+ */
+ struct page_cgroup {
+ unsigned long flags;
++#ifdef CONFIG_PREEMPT_RT_BASE
++ spinlock_t pcg_lock;
++#endif
+ struct mem_cgroup *mem_cgroup;
+ };
+
+@@ -74,12 +77,20 @@ static inline void lock_page_cgroup(stru
+ * Don't take this lock in IRQ context.
+ * This lock is for pc->mem_cgroup, USED, MIGRATION
+ */
++#ifndef CONFIG_PREEMPT_RT_BASE
+ bit_spin_lock(PCG_LOCK, &pc->flags);
++#else
++ spin_lock(&pc->pcg_lock);
++#endif
+ }
+
+ static inline void unlock_page_cgroup(struct page_cgroup *pc)
+ {
++#ifndef CONFIG_PREEMPT_RT_BASE
+ bit_spin_unlock(PCG_LOCK, &pc->flags);
++#else
++ spin_unlock(&pc->pcg_lock);
++#endif
+ }
+
+ #else /* CONFIG_MEMCG */
+@@ -102,6 +113,10 @@ static inline void __init page_cgroup_in
+ {
+ }
+
++static inline void page_cgroup_lock_init(struct page_cgroup *pc)
++{
++}
++
+ #endif /* CONFIG_MEMCG */
+
+ #include <linux/swap.h>
+--- a/mm/page_cgroup.c
++++ b/mm/page_cgroup.c
+@@ -13,6 +13,14 @@
+
+ static unsigned long total_usage;
+
++static void page_cgroup_lock_init(struct page_cgroup *pc, int nr_pages)
++{
++#ifdef CONFIG_PREEMPT_RT_BASE
++ for (; nr_pages; nr_pages--, pc++)
++ spin_lock_init(&pc->pcg_lock);
++#endif
++}
++
+ #if !defined(CONFIG_SPARSEMEM)
+
+
+@@ -60,6 +68,7 @@ static int __init alloc_node_page_cgroup
+ return -ENOMEM;
+ NODE_DATA(nid)->node_page_cgroup = base;
+ total_usage += table_size;
++ page_cgroup_lock_init(base, nr_pages);
+ return 0;
+ }
+
+@@ -150,6 +159,8 @@ static int __meminit init_section_page_c
+ return -ENOMEM;
+ }
+
++ page_cgroup_lock_init(base, PAGES_PER_SECTION);
++
+ /*
+ * The passed "pfn" may not be aligned to SECTION. For the calculation
+ * we need to apply a mask.
diff --git a/patches/mm-convert-swap-to-percpu-locked.patch b/patches/mm-convert-swap-to-percpu-locked.patch
new file mode 100644
index 0000000..0e8ac43
--- /dev/null
+++ b/patches/mm-convert-swap-to-percpu-locked.patch
@@ -0,0 +1,113 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:51 -0500
+Subject: mm: convert swap to percpu locked
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/swap.c | 30 ++++++++++++++++++------------
+ 1 file changed, 18 insertions(+), 12 deletions(-)
+
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -30,6 +30,7 @@
+ #include <linux/backing-dev.h>
+ #include <linux/memcontrol.h>
+ #include <linux/gfp.h>
++#include <linux/locallock.h>
+
+ #include "internal.h"
+
+@@ -40,6 +41,9 @@ static DEFINE_PER_CPU(struct pagevec[NR_
+ static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
+ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
+
++static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
++static DEFINE_LOCAL_IRQ_LOCK(swap_lock);
++
+ /*
+ * This path almost never happens for VM activity - pages are normally
+ * freed via pagevecs. But it gets used by networking.
+@@ -354,11 +358,11 @@ void rotate_reclaimable_page(struct page
+ unsigned long flags;
+
+ page_cache_get(page);
+- local_irq_save(flags);
++ local_lock_irqsave(rotate_lock, flags);
+ pvec = &__get_cpu_var(lru_rotate_pvecs);
+ if (!pagevec_add(pvec, page))
+ pagevec_move_tail(pvec);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(rotate_lock, flags);
+ }
+ }
+
+@@ -403,12 +407,13 @@ static void activate_page_drain(int cpu)
+ void activate_page(struct page *page)
+ {
+ if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
++ struct pagevec *pvec = &get_locked_var(swap_lock,
++ activate_page_pvecs);
+
+ page_cache_get(page);
+ if (!pagevec_add(pvec, page))
+ pagevec_lru_move_fn(pvec, __activate_page, NULL);
+- put_cpu_var(activate_page_pvecs);
++ put_locked_var(swap_lock, activate_page_pvecs);
+ }
+ }
+
+@@ -456,13 +461,13 @@ EXPORT_SYMBOL(mark_page_accessed);
+ */
+ void __lru_cache_add(struct page *page, enum lru_list lru)
+ {
+- struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
++ struct pagevec *pvec = &get_locked_var(swap_lock, lru_add_pvecs)[lru];
+
+ page_cache_get(page);
+ if (!pagevec_space(pvec))
+ __pagevec_lru_add(pvec, lru);
+ pagevec_add(pvec, page);
+- put_cpu_var(lru_add_pvecs);
++ put_locked_var(swap_lock, lru_add_pvecs);
+ }
+ EXPORT_SYMBOL(__lru_cache_add);
+
+@@ -597,9 +602,9 @@ void lru_add_drain_cpu(int cpu)
+ unsigned long flags;
+
+ /* No harm done if a racing interrupt already did this */
+- local_irq_save(flags);
++ local_lock_irqsave(rotate_lock, flags);
+ pagevec_move_tail(pvec);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(rotate_lock, flags);
+ }
+
+ pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+@@ -627,18 +632,19 @@ void deactivate_page(struct page *page)
+ return;
+
+ if (likely(get_page_unless_zero(page))) {
+- struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
++ struct pagevec *pvec = &get_locked_var(swap_lock,
++ lru_deactivate_pvecs);
+
+ if (!pagevec_add(pvec, page))
+ pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+- put_cpu_var(lru_deactivate_pvecs);
++ put_locked_var(swap_lock, lru_deactivate_pvecs);
+ }
+ }
+
+ void lru_add_drain(void)
+ {
+- lru_add_drain_cpu(get_cpu());
+- put_cpu();
++ lru_add_drain_cpu(local_lock_cpu(swap_lock));
++ local_unlock_cpu(swap_lock);
+ }
+
+ static void lru_add_drain_per_cpu(struct work_struct *dummy)
diff --git a/patches/mm-enable-slub.patch b/patches/mm-enable-slub.patch
new file mode 100644
index 0000000..160b577
--- /dev/null
+++ b/patches/mm-enable-slub.patch
@@ -0,0 +1,402 @@
+Subject: mm: Enable SLUB for RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 25 Oct 2012 10:32:35 +0100
+
+Make SLUB RT aware and remove the restriction in Kconfig.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/slub_def.h | 2
+ init/Kconfig | 1
+ mm/slub.c | 119 +++++++++++++++++++++++++++++++++++------------
+ 3 files changed, 92 insertions(+), 30 deletions(-)
+
+--- a/include/linux/slub_def.h
++++ b/include/linux/slub_def.h
+@@ -54,7 +54,7 @@ struct kmem_cache_cpu {
+ };
+
+ struct kmem_cache_node {
+- spinlock_t list_lock; /* Protect partial list and nr_partial */
++ raw_spinlock_t list_lock; /* Protect partial list and nr_partial */
+ unsigned long nr_partial;
+ struct list_head partial;
+ #ifdef CONFIG_SLUB_DEBUG
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1494,7 +1494,6 @@ config SLAB
+
+ config SLUB
+ bool "SLUB (Unqueued Allocator)"
+- depends on !PREEMPT_RT_FULL
+ help
+ SLUB is a slab allocator that minimizes cache line usage
+ instead of managing queues of cached objects (SLAB approach).
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1070,7 +1070,7 @@ static noinline struct kmem_cache_node *
+ {
+ struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+
+- spin_lock_irqsave(&n->list_lock, *flags);
++ raw_spin_lock_irqsave(&n->list_lock, *flags);
+ slab_lock(page);
+
+ if (!check_slab(s, page))
+@@ -1118,7 +1118,7 @@ out:
+
+ fail:
+ slab_unlock(page);
+- spin_unlock_irqrestore(&n->list_lock, *flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, *flags);
+ slab_fix(s, "Object at 0x%p not freed", object);
+ return NULL;
+ }
+@@ -1253,6 +1253,12 @@ static inline void slab_free_hook(struct
+
+ #endif /* CONFIG_SLUB_DEBUG */
+
++struct slub_free_list {
++ raw_spinlock_t lock;
++ struct list_head list;
++};
++static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
++
+ /*
+ * Slab allocation and freeing
+ */
+@@ -1277,7 +1283,11 @@ static struct page *allocate_slab(struct
+
+ flags &= gfp_allowed_mask;
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++ if (system_state == SYSTEM_RUNNING)
++#else
+ if (flags & __GFP_WAIT)
++#endif
+ local_irq_enable();
+
+ flags |= s->allocflags;
+@@ -1317,7 +1327,11 @@ static struct page *allocate_slab(struct
+ kmemcheck_mark_unallocated_pages(page, pages);
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++ if (system_state == SYSTEM_RUNNING)
++#else
+ if (flags & __GFP_WAIT)
++#endif
+ local_irq_disable();
+ if (!page)
+ return NULL;
+@@ -1414,6 +1428,16 @@ static void __free_slab(struct kmem_cach
+ __free_memcg_kmem_pages(page, order);
+ }
+
++static void free_delayed(struct kmem_cache *s, struct list_head *h)
++{
++ while(!list_empty(h)) {
++ struct page *page = list_first_entry(h, struct page, lru);
++
++ list_del(&page->lru);
++ __free_slab(s, page);
++ }
++}
++
+ #define need_reserve_slab_rcu \
+ (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
+
+@@ -1448,6 +1472,12 @@ static void free_slab(struct kmem_cache
+ }
+
+ call_rcu(head, rcu_free_slab);
++ } else if (irqs_disabled()) {
++ struct slub_free_list *f = &__get_cpu_var(slub_free_list);
++
++ raw_spin_lock(&f->lock);
++ list_add(&page->lru, &f->list);
++ raw_spin_unlock(&f->lock);
+ } else
+ __free_slab(s, page);
+ }
+@@ -1549,7 +1579,7 @@ static void *get_partial_node(struct kme
+ if (!n || !n->nr_partial)
+ return NULL;
+
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ list_for_each_entry_safe(page, page2, &n->partial, lru) {
+ void *t;
+ int available;
+@@ -1574,7 +1604,7 @@ static void *get_partial_node(struct kme
+ break;
+
+ }
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+ return object;
+ }
+
+@@ -1816,7 +1846,7 @@ redo:
+ * that acquire_slab() will see a slab page that
+ * is frozen
+ */
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ }
+ } else {
+ m = M_FULL;
+@@ -1827,7 +1857,7 @@ redo:
+ * slabs from diagnostic functions will not see
+ * any frozen slabs.
+ */
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ }
+ }
+
+@@ -1862,7 +1892,7 @@ redo:
+ goto redo;
+
+ if (lock)
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+
+ if (m == M_FREE) {
+ stat(s, DEACTIVATE_EMPTY);
+@@ -1893,10 +1923,10 @@ static void unfreeze_partials(struct kme
+ n2 = get_node(s, page_to_nid(page));
+ if (n != n2) {
+ if (n)
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+
+ n = n2;
+- spin_lock(&n->list_lock);
++ raw_spin_lock(&n->list_lock);
+ }
+
+ do {
+@@ -1925,7 +1955,7 @@ static void unfreeze_partials(struct kme
+ }
+
+ if (n)
+- spin_unlock(&n->list_lock);
++ raw_spin_unlock(&n->list_lock);
+
+ while (discard_page) {
+ page = discard_page;
+@@ -1961,14 +1991,21 @@ static int put_cpu_partial(struct kmem_c
+ pobjects = oldpage->pobjects;
+ pages = oldpage->pages;
+ if (drain && pobjects > s->cpu_partial) {
++ struct slub_free_list *f;
+ unsigned long flags;
++ LIST_HEAD(tofree);
+ /*
+ * partial array is full. Move the existing
+ * set to the per node partial list.
+ */
+ local_irq_save(flags);
+ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++ f = &__get_cpu_var(slub_free_list);
++ raw_spin_lock(&f->lock);
++ list_splice_init(&f->list, &tofree);
++ raw_spin_unlock(&f->lock);
+ local_irq_restore(flags);
++ free_delayed(s, &tofree);
+ oldpage = NULL;
+ pobjects = 0;
+ pages = 0;
+@@ -2031,7 +2068,22 @@ static bool has_cpu_slab(int cpu, void *
+
+ static void flush_all(struct kmem_cache *s)
+ {
++ LIST_HEAD(tofree);
++ int cpu;
++
+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
++ for_each_online_cpu(cpu) {
++ struct slub_free_list *f;
++
++ if (!has_cpu_slab(cpu, s))
++ continue;
++
++ f = &per_cpu(slub_free_list, cpu);
++ raw_spin_lock_irq(&f->lock);
++ list_splice_init(&f->list, &tofree);
++ raw_spin_unlock_irq(&f->lock);
++ free_delayed(s, &tofree);
++ }
+ }
+
+ /*
+@@ -2059,10 +2111,10 @@ static unsigned long count_partial(struc
+ unsigned long x = 0;
+ struct page *page;
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ x += get_count(page);
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+ }
+
+@@ -2205,9 +2257,11 @@ static inline void *get_freelist(struct
+ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ unsigned long addr, struct kmem_cache_cpu *c)
+ {
++ struct slub_free_list *f;
+ void *freelist;
+ struct page *page;
+ unsigned long flags;
++ LIST_HEAD(tofree);
+
+ local_irq_save(flags);
+ #ifdef CONFIG_PREEMPT
+@@ -2270,7 +2324,13 @@ load_freelist:
+ VM_BUG_ON(!c->page->frozen);
+ c->freelist = get_freepointer(s, freelist);
+ c->tid = next_tid(c->tid);
++out:
++ f = &__get_cpu_var(slub_free_list);
++ raw_spin_lock(&f->lock);
++ list_splice_init(&f->list, &tofree);
++ raw_spin_unlock(&f->lock);
+ local_irq_restore(flags);
++ free_delayed(s, &tofree);
+ return freelist;
+
+ new_slab:
+@@ -2288,9 +2348,7 @@ new_slab:
+ if (unlikely(!freelist)) {
+ if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
+ slab_out_of_memory(s, gfpflags, node);
+-
+- local_irq_restore(flags);
+- return NULL;
++ goto out;
+ }
+
+ page = c->page;
+@@ -2304,8 +2362,7 @@ new_slab:
+ deactivate_slab(s, page, get_freepointer(s, freelist));
+ c->page = NULL;
+ c->freelist = NULL;
+- local_irq_restore(flags);
+- return freelist;
++ goto out;
+ }
+
+ /*
+@@ -2477,7 +2534,7 @@ static void __slab_free(struct kmem_cach
+
+ do {
+ if (unlikely(n)) {
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ n = NULL;
+ }
+ prior = page->freelist;
+@@ -2507,7 +2564,7 @@ static void __slab_free(struct kmem_cach
+ * Otherwise the list_lock will synchronize with
+ * other processors updating the list of slabs.
+ */
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+
+ }
+ }
+@@ -2548,7 +2605,7 @@ static void __slab_free(struct kmem_cach
+ add_partial(n, page, DEACTIVATE_TO_TAIL);
+ stat(s, FREE_ADD_PARTIAL);
+ }
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ return;
+
+ slab_empty:
+@@ -2562,7 +2619,7 @@ slab_empty:
+ /* Slab must be on the full list */
+ remove_full(s, page);
+
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ stat(s, FREE_SLAB);
+ discard_slab(s, page);
+ }
+@@ -2764,7 +2821,7 @@ static void
+ init_kmem_cache_node(struct kmem_cache_node *n)
+ {
+ n->nr_partial = 0;
+- spin_lock_init(&n->list_lock);
++ raw_spin_lock_init(&n->list_lock);
+ INIT_LIST_HEAD(&n->partial);
+ #ifdef CONFIG_SLUB_DEBUG
+ atomic_long_set(&n->nr_slabs, 0);
+@@ -3451,7 +3508,7 @@ int kmem_cache_shrink(struct kmem_cache
+ for (i = 0; i < objects; i++)
+ INIT_LIST_HEAD(slabs_by_inuse + i);
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+
+ /*
+ * Build lists indexed by the items in use in each slab.
+@@ -3472,7 +3529,7 @@ int kmem_cache_shrink(struct kmem_cache
+ for (i = objects - 1; i > 0; i--)
+ list_splice(slabs_by_inuse + i, n->partial.prev);
+
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+
+ /* Release empty slabs */
+ list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
+@@ -3642,6 +3699,12 @@ void __init kmem_cache_init(void)
+ boot_kmem_cache_node;
+ int i;
+ int caches = 2;
++ int cpu;
++
++ for_each_possible_cpu(cpu) {
++ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
++ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
++ }
+
+ if (debug_guardpage_minorder())
+ slub_max_order = 0;
+@@ -4033,7 +4096,7 @@ static int validate_slab_node(struct kme
+ struct page *page;
+ unsigned long flags;
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+
+ list_for_each_entry(page, &n->partial, lru) {
+ validate_slab_slab(s, page, map);
+@@ -4056,7 +4119,7 @@ static int validate_slab_node(struct kme
+ atomic_long_read(&n->nr_slabs));
+
+ out:
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ return count;
+ }
+
+@@ -4246,12 +4309,12 @@ static int list_locations(struct kmem_ca
+ if (!atomic_long_read(&n->nr_slabs))
+ continue;
+
+- spin_lock_irqsave(&n->list_lock, flags);
++ raw_spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ process_slab(&t, s, page, alloc, map);
+ list_for_each_entry(page, &n->full, lru)
+ process_slab(&t, s, page, alloc, map);
+- spin_unlock_irqrestore(&n->list_lock, flags);
++ raw_spin_unlock_irqrestore(&n->list_lock, flags);
+ }
+
+ for (i = 0; i < t.count; i++) {
diff --git a/patches/mm-make-vmstat-rt-aware.patch b/patches/mm-make-vmstat-rt-aware.patch
new file mode 100644
index 0000000..52da0b1
--- /dev/null
+++ b/patches/mm-make-vmstat-rt-aware.patch
@@ -0,0 +1,84 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:13 -0500
+Subject: [PATCH] mm: make vmstat -rt aware
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/vmstat.h | 4 ++++
+ mm/vmstat.c | 6 ++++++
+ 2 files changed, 10 insertions(+)
+
+--- a/include/linux/vmstat.h
++++ b/include/linux/vmstat.h
+@@ -29,7 +29,9 @@ DECLARE_PER_CPU(struct vm_event_state, v
+
+ static inline void __count_vm_event(enum vm_event_item item)
+ {
++ preempt_disable_rt();
+ __this_cpu_inc(vm_event_states.event[item]);
++ preempt_enable_rt();
+ }
+
+ static inline void count_vm_event(enum vm_event_item item)
+@@ -39,7 +41,9 @@ static inline void count_vm_event(enum v
+
+ static inline void __count_vm_events(enum vm_event_item item, long delta)
+ {
++ preempt_disable_rt();
+ __this_cpu_add(vm_event_states.event[item], delta);
++ preempt_enable_rt();
+ }
+
+ static inline void count_vm_events(enum vm_event_item item, long delta)
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -216,6 +216,7 @@ void __mod_zone_page_state(struct zone *
+ long x;
+ long t;
+
++ preempt_disable_rt();
+ x = delta + __this_cpu_read(*p);
+
+ t = __this_cpu_read(pcp->stat_threshold);
+@@ -225,6 +226,7 @@ void __mod_zone_page_state(struct zone *
+ x = 0;
+ }
+ __this_cpu_write(*p, x);
++ preempt_enable_rt();
+ }
+ EXPORT_SYMBOL(__mod_zone_page_state);
+
+@@ -257,6 +259,7 @@ void __inc_zone_state(struct zone *zone,
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ s8 v, t;
+
++ preempt_disable_rt();
+ v = __this_cpu_inc_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v > t)) {
+@@ -265,6 +268,7 @@ void __inc_zone_state(struct zone *zone,
+ zone_page_state_add(v + overstep, zone, item);
+ __this_cpu_write(*p, -overstep);
+ }
++ preempt_enable_rt();
+ }
+
+ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
+@@ -279,6 +283,7 @@ void __dec_zone_state(struct zone *zone,
+ s8 __percpu *p = pcp->vm_stat_diff + item;
+ s8 v, t;
+
++ preempt_disable_rt();
+ v = __this_cpu_dec_return(*p);
+ t = __this_cpu_read(pcp->stat_threshold);
+ if (unlikely(v < - t)) {
+@@ -287,6 +292,7 @@ void __dec_zone_state(struct zone *zone,
+ zone_page_state_add(v - overstep, zone, item);
+ __this_cpu_write(*p, overstep);
+ }
++ preempt_enable_rt();
+ }
+
+ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
diff --git a/patches/mm-page-alloc-fix.patch b/patches/mm-page-alloc-fix.patch
new file mode 100644
index 0000000..2256353
--- /dev/null
+++ b/patches/mm-page-alloc-fix.patch
@@ -0,0 +1,22 @@
+Subject: mm-page-alloc-fix.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 21 Jul 2011 16:47:49 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/page_alloc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2204,8 +2204,8 @@ __alloc_pages_direct_compact(gfp_t gfp_m
+ struct page *page;
+
+ /* Page migration frees to the PCP lists but we want merging */
+- drain_pages(get_cpu());
+- put_cpu();
++ drain_pages(get_cpu_light());
++ put_cpu_light();
+
+ page = get_page_from_freelist(gfp_mask, nodemask,
+ order, zonelist, high_zoneidx,
diff --git a/patches/mm-page-alloc-use-list-last-entry.patch b/patches/mm-page-alloc-use-list-last-entry.patch
new file mode 100644
index 0000000..5704803
--- /dev/null
+++ b/patches/mm-page-alloc-use-list-last-entry.patch
@@ -0,0 +1,20 @@
+Subject: mm-page-alloc-use-list-last-entry.patch
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 21 Jun 2011 11:24:35 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/page_alloc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -658,7 +658,7 @@ static void free_pcppages_bulk(struct zo
+ do {
+ int mt; /* migratetype of the to-be-freed page */
+
+- page = list_entry(list->prev, struct page, lru);
++ page = list_last_entry(list, struct page, lru);
+ /* must delete as __free_one_page list manipulates */
+ list_del(&page->lru);
+ mt = get_freepage_migratetype(page);
diff --git a/patches/mm-page-alloc-use-local-lock-on-target-cpu.patch b/patches/mm-page-alloc-use-local-lock-on-target-cpu.patch
new file mode 100644
index 0000000..8d12bf7
--- /dev/null
+++ b/patches/mm-page-alloc-use-local-lock-on-target-cpu.patch
@@ -0,0 +1,55 @@
+Subject: mm: page_alloc: Use local_lock_on() instead of plain spinlock
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 27 Sep 2012 11:11:46 +0200
+
+The plain spinlock while sufficient does not update the local_lock
+internals. Use a proper local_lock function instead to ease debugging.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ include/linux/locallock.h | 11 +++++++++++
+ mm/page_alloc.c | 4 ++--
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/include/linux/locallock.h
++++ b/include/linux/locallock.h
+@@ -137,6 +137,12 @@ static inline int __local_lock_irqsave(s
+ _flags = __get_cpu_var(lvar).flags; \
+ } while (0)
+
++#define local_lock_irqsave_on(lvar, _flags, cpu) \
++ do { \
++ __local_lock_irqsave(&per_cpu(lvar, cpu)); \
++ _flags = per_cpu(lvar, cpu).flags; \
++ } while (0)
++
+ static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
+ unsigned long flags)
+ {
+@@ -156,6 +162,11 @@ static inline int __local_unlock_irqrest
+ put_local_var(lvar); \
+ } while (0)
+
++#define local_unlock_irqrestore_on(lvar, flags, cpu) \
++ do { \
++ __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
++ } while (0)
++
+ #define local_spin_trylock_irq(lvar, lock) \
+ ({ \
+ int __locked; \
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -224,9 +224,9 @@ static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
+
+ #ifdef CONFIG_PREEMPT_RT_BASE
+ # define cpu_lock_irqsave(cpu, flags) \
+- spin_lock_irqsave(&per_cpu(pa_lock, cpu).lock, flags)
++ local_lock_irqsave_on(pa_lock, flags, cpu)
+ # define cpu_unlock_irqrestore(cpu, flags) \
+- spin_unlock_irqrestore(&per_cpu(pa_lock, cpu).lock, flags)
++ local_unlock_irqrestore_on(pa_lock, flags, cpu)
+ #else
+ # define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
+ # define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
diff --git a/patches/mm-page_alloc-reduce-lock-sections-further.patch b/patches/mm-page_alloc-reduce-lock-sections-further.patch
new file mode 100644
index 0000000..072387c
--- /dev/null
+++ b/patches/mm-page_alloc-reduce-lock-sections-further.patch
@@ -0,0 +1,219 @@
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 3 Jul 2009 08:44:37 -0500
+Subject: mm: page_alloc reduce lock sections further
+
+Split out the pages which are to be freed into a separate list and
+call free_pages_bulk() outside of the percpu page allocator locks.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/page_alloc.c | 98 +++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 69 insertions(+), 29 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -625,7 +625,7 @@ static inline int free_pages_check(struc
+ }
+
+ /*
+- * Frees a number of pages from the PCP lists
++ * Frees a number of pages which have been collected from the pcp lists.
+ * Assumes all pages on list are in same zone, and of same order.
+ * count is the number of pages to free.
+ *
+@@ -636,16 +636,50 @@ static inline int free_pages_check(struc
+ * pinned" detection logic.
+ */
+ static void free_pcppages_bulk(struct zone *zone, int count,
+- struct per_cpu_pages *pcp)
++ struct list_head *list)
+ {
+- int migratetype = 0;
+- int batch_free = 0;
+ int to_free = count;
++ unsigned long flags;
+
+- spin_lock(&zone->lock);
++ spin_lock_irqsave(&zone->lock, flags);
+ zone->all_unreclaimable = 0;
+ zone->pages_scanned = 0;
+
++ while (!list_empty(list)) {
++ struct page *page = list_first_entry(list, struct page, lru);
++ int mt; /* migratetype of the to-be-freed page */
++
++ /* must delete as __free_one_page list manipulates */
++ list_del(&page->lru);
++
++ mt = get_freepage_migratetype(page);
++ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
++ __free_one_page(page, zone, 0, mt);
++ trace_mm_page_pcpu_drain(page, 0, mt);
++ if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
++ __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
++ if (is_migrate_cma(mt))
++ __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
++ }
++
++ to_free--;
++ }
++ WARN_ON(to_free != 0);
++ spin_unlock_irqrestore(&zone->lock, flags);
++}
++
++/*
++ * Moves a number of pages from the PCP lists to free list which
++ * is freed outside of the locked region.
++ *
++ * Assumes all pages on list are in same zone, and of same order.
++ * count is the number of pages to free.
++ */
++static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
++ struct list_head *dst)
++{
++ int migratetype = 0, batch_free = 0;
++
+ while (to_free) {
+ struct page *page;
+ struct list_head *list;
+@@ -661,7 +695,7 @@ static void free_pcppages_bulk(struct zo
+ batch_free++;
+ if (++migratetype == MIGRATE_PCPTYPES)
+ migratetype = 0;
+- list = &pcp->lists[migratetype];
++ list = &src->lists[migratetype];
+ } while (list_empty(list));
+
+ /* This is the only non-empty list. Free them all. */
+@@ -669,36 +703,26 @@ static void free_pcppages_bulk(struct zo
+ batch_free = to_free;
+
+ do {
+- int mt; /* migratetype of the to-be-freed page */
+-
+ page = list_last_entry(list, struct page, lru);
+- /* must delete as __free_one_page list manipulates */
+ list_del(&page->lru);
+- mt = get_freepage_migratetype(page);
+- /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+- __free_one_page(page, zone, 0, mt);
+- trace_mm_page_pcpu_drain(page, 0, mt);
+- if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
+- __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
+- if (is_migrate_cma(mt))
+- __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
+- }
++ list_add(&page->lru, dst);
+ } while (--to_free && --batch_free && !list_empty(list));
+ }
+- spin_unlock(&zone->lock);
+ }
+
+ static void free_one_page(struct zone *zone, struct page *page, int order,
+ int migratetype)
+ {
+- spin_lock(&zone->lock);
++ unsigned long flags;
++
++ spin_lock_irqsave(&zone->lock, flags);
+ zone->all_unreclaimable = 0;
+ zone->pages_scanned = 0;
+
+ __free_one_page(page, zone, order, migratetype);
+ if (unlikely(migratetype != MIGRATE_ISOLATE))
+ __mod_zone_freepage_state(zone, 1 << order, migratetype);
+- spin_unlock(&zone->lock);
++ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+
+ static bool free_pages_prepare(struct page *page, unsigned int order)
+@@ -1180,6 +1204,7 @@ static int rmqueue_bulk(struct zone *zon
+ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
+ {
+ unsigned long flags;
++ LIST_HEAD(dst);
+ int to_drain;
+
+ local_lock_irqsave(pa_lock, flags);
+@@ -1188,10 +1213,11 @@ void drain_zone_pages(struct zone *zone,
+ else
+ to_drain = pcp->count;
+ if (to_drain > 0) {
+- free_pcppages_bulk(zone, to_drain, pcp);
++ isolate_pcp_pages(to_drain, pcp, &dst);
+ pcp->count -= to_drain;
+ }
+ local_unlock_irqrestore(pa_lock, flags);
++ free_pcppages_bulk(zone, to_drain, &dst);
+ }
+ #endif
+
+@@ -1210,16 +1236,21 @@ static void drain_pages(unsigned int cpu
+ for_each_populated_zone(zone) {
+ struct per_cpu_pageset *pset;
+ struct per_cpu_pages *pcp;
++ LIST_HEAD(dst);
++ int count;
+
+ cpu_lock_irqsave(cpu, flags);
+ pset = per_cpu_ptr(zone->pageset, cpu);
+
+ pcp = &pset->pcp;
+- if (pcp->count) {
+- free_pcppages_bulk(zone, pcp->count, pcp);
++ count = pcp->count;
++ if (count) {
++ isolate_pcp_pages(count, pcp, &dst);
+ pcp->count = 0;
+ }
+ cpu_unlock_irqrestore(cpu, flags);
++ if (count)
++ free_pcppages_bulk(zone, count, &dst);
+ }
+ }
+
+@@ -1357,8 +1388,15 @@ void free_hot_cold_page(struct page *pag
+ list_add(&page->lru, &pcp->lists[migratetype]);
+ pcp->count++;
+ if (pcp->count >= pcp->high) {
+- free_pcppages_bulk(zone, pcp->batch, pcp);
++ LIST_HEAD(dst);
++ int count;
++
++ isolate_pcp_pages(pcp->batch, pcp, &dst);
+ pcp->count -= pcp->batch;
++ count = pcp->batch;
++ local_unlock_irqrestore(pa_lock, flags);
++ free_pcppages_bulk(zone, count, &dst);
++ return;
+ }
+
+ out:
+@@ -5977,20 +6015,22 @@ static int __meminit __zone_pcp_update(v
+ {
+ struct zone *zone = data;
+ int cpu;
+- unsigned long batch = zone_batchsize(zone), flags;
++ unsigned long flags;
+
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_pageset *pset;
+ struct per_cpu_pages *pcp;
++ LIST_HEAD(dst);
+
+ pset = per_cpu_ptr(zone->pageset, cpu);
+ pcp = &pset->pcp;
+
+ cpu_lock_irqsave(cpu, flags);
+- if (pcp->count > 0)
+- free_pcppages_bulk(zone, pcp->count, pcp);
++ if (pcp->count > 0) {
++ isolate_pcp_pages(pcp->count, pcp, &dst);
++ free_pcppages_bulk(zone, pcp->count, &dst);
++ }
+ drain_zonestat(zone, pset);
+- setup_pageset(pset, batch);
+ cpu_unlock_irqrestore(cpu, flags);
+ }
+ return 0;
diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
new file mode 100644
index 0000000..6dcb233
--- /dev/null
+++ b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
@@ -0,0 +1,213 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:37 -0500
+Subject: mm: page_alloc: rt-friendly per-cpu pages
+
+rt-friendly per-cpu pages: convert the irqs-off per-cpu locking
+method into a preemptible, explicit-per-cpu-locks method.
+
+Contains fixes from:
+ Peter Zijlstra <a.p.zijlstra@chello.nl>
+ Thomas Gleixner <tglx@linutronix.de>
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/page_alloc.c | 57 ++++++++++++++++++++++++++++++++++++++------------------
+ 1 file changed, 39 insertions(+), 18 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -58,6 +58,7 @@
+ #include <linux/prefetch.h>
+ #include <linux/migrate.h>
+ #include <linux/page-debug-flags.h>
++#include <linux/locallock.h>
+
+ #include <asm/tlbflush.h>
+ #include <asm/div64.h>
+@@ -219,6 +220,18 @@ EXPORT_SYMBOL(nr_node_ids);
+ EXPORT_SYMBOL(nr_online_nodes);
+ #endif
+
++static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
++
++#ifdef CONFIG_PREEMPT_RT_BASE
++# define cpu_lock_irqsave(cpu, flags) \
++ spin_lock_irqsave(&per_cpu(pa_lock, cpu).lock, flags)
++# define cpu_unlock_irqrestore(cpu, flags) \
++ spin_unlock_irqrestore(&per_cpu(pa_lock, cpu).lock, flags)
++#else
++# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
++# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
++#endif
++
+ int page_group_by_mobility_disabled __read_mostly;
+
+ void set_pageblock_migratetype(struct page *page, int migratetype)
+@@ -722,12 +735,12 @@ static void __free_pages_ok(struct page
+ if (!free_pages_prepare(page, order))
+ return;
+
+- local_irq_save(flags);
++ local_lock_irqsave(pa_lock, flags);
+ __count_vm_events(PGFREE, 1 << order);
+ migratetype = get_pageblock_migratetype(page);
+ set_freepage_migratetype(page, migratetype);
+ free_one_page(page_zone(page), page, order, migratetype);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pa_lock, flags);
+ }
+
+ /*
+@@ -1169,7 +1182,7 @@ void drain_zone_pages(struct zone *zone,
+ unsigned long flags;
+ int to_drain;
+
+- local_irq_save(flags);
++ local_lock_irqsave(pa_lock, flags);
+ if (pcp->count >= pcp->batch)
+ to_drain = pcp->batch;
+ else
+@@ -1178,7 +1191,7 @@ void drain_zone_pages(struct zone *zone,
+ free_pcppages_bulk(zone, to_drain, pcp);
+ pcp->count -= to_drain;
+ }
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pa_lock, flags);
+ }
+ #endif
+
+@@ -1198,7 +1211,7 @@ static void drain_pages(unsigned int cpu
+ struct per_cpu_pageset *pset;
+ struct per_cpu_pages *pcp;
+
+- local_irq_save(flags);
++ cpu_lock_irqsave(cpu, flags);
+ pset = per_cpu_ptr(zone->pageset, cpu);
+
+ pcp = &pset->pcp;
+@@ -1206,7 +1219,7 @@ static void drain_pages(unsigned int cpu
+ free_pcppages_bulk(zone, pcp->count, pcp);
+ pcp->count = 0;
+ }
+- local_irq_restore(flags);
++ cpu_unlock_irqrestore(cpu, flags);
+ }
+ }
+
+@@ -1259,7 +1272,12 @@ void drain_all_pages(void)
+ else
+ cpumask_clear_cpu(cpu, &cpus_with_pcps);
+ }
++#ifndef CONFIG_PREEMPT_RT_BASE
+ on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
++#else
++ for_each_cpu(cpu, &cpus_with_pcps)
++ drain_pages(cpu);
++#endif
+ }
+
+ #ifdef CONFIG_HIBERNATION
+@@ -1314,7 +1332,7 @@ void free_hot_cold_page(struct page *pag
+
+ migratetype = get_pageblock_migratetype(page);
+ set_freepage_migratetype(page, migratetype);
+- local_irq_save(flags);
++ local_lock_irqsave(pa_lock, flags);
+ __count_vm_event(PGFREE);
+
+ /*
+@@ -1344,7 +1362,7 @@ void free_hot_cold_page(struct page *pag
+ }
+
+ out:
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pa_lock, flags);
+ }
+
+ /*
+@@ -1473,7 +1491,7 @@ again:
+ struct per_cpu_pages *pcp;
+ struct list_head *list;
+
+- local_irq_save(flags);
++ local_lock_irqsave(pa_lock, flags);
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
+ list = &pcp->lists[migratetype];
+ if (list_empty(list)) {
+@@ -1505,18 +1523,20 @@ again:
+ */
+ WARN_ON_ONCE(order > 1);
+ }
+- spin_lock_irqsave(&zone->lock, flags);
++ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
+ page = __rmqueue(zone, order, migratetype);
+- spin_unlock(&zone->lock);
+- if (!page)
++ if (!page) {
++ spin_unlock(&zone->lock);
+ goto failed;
++ }
+ __mod_zone_freepage_state(zone, -(1 << order),
+ get_pageblock_migratetype(page));
++ spin_unlock(&zone->lock);
+ }
+
+ __count_zone_vm_events(PGALLOC, zone, 1 << order);
+ zone_statistics(preferred_zone, zone, gfp_flags);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pa_lock, flags);
+
+ VM_BUG_ON(bad_range(zone, page));
+ if (prep_new_page(page, order, gfp_flags))
+@@ -1524,7 +1544,7 @@ again:
+ return page;
+
+ failed:
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pa_lock, flags);
+ return NULL;
+ }
+
+@@ -5133,6 +5153,7 @@ static int page_alloc_cpu_notify(struct
+ void __init page_alloc_init(void)
+ {
+ hotcpu_notifier(page_alloc_cpu_notify, 0);
++ local_irq_lock_init(pa_lock);
+ }
+
+ /*
+@@ -5965,12 +5986,12 @@ static int __meminit __zone_pcp_update(v
+ pset = per_cpu_ptr(zone->pageset, cpu);
+ pcp = &pset->pcp;
+
+- local_irq_save(flags);
++ cpu_lock_irqsave(cpu, flags);
+ if (pcp->count > 0)
+ free_pcppages_bulk(zone, pcp->count, pcp);
+ drain_zonestat(zone, pset);
+ setup_pageset(pset, batch);
+- local_irq_restore(flags);
++ cpu_unlock_irqrestore(cpu, flags);
+ }
+ return 0;
+ }
+@@ -5988,7 +6009,7 @@ void zone_pcp_reset(struct zone *zone)
+ struct per_cpu_pageset *pset;
+
+ /* avoid races with drain_pages() */
+- local_irq_save(flags);
++ local_lock_irqsave(pa_lock, flags);
+ if (zone->pageset != &boot_pageset) {
+ for_each_online_cpu(cpu) {
+ pset = per_cpu_ptr(zone->pageset, cpu);
+@@ -5997,7 +6018,7 @@ void zone_pcp_reset(struct zone *zone)
+ free_percpu(zone->pageset);
+ zone->pageset = &boot_pageset;
+ }
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pa_lock, flags);
+ }
+
+ #ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/patches/mm-prepare-pf-disable-discoupling.patch b/patches/mm-prepare-pf-disable-discoupling.patch
new file mode 100644
index 0000000..d3a5458
--- /dev/null
+++ b/patches/mm-prepare-pf-disable-discoupling.patch
@@ -0,0 +1,118 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:37 -0500
+Subject: mm: Prepare decoupling the page fault disabling logic
+
+Add a pagefault_disabled variable to task_struct to allow decoupling
+the pagefault-disabled logic from the preempt count.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/sched.h | 1 +
+ include/linux/uaccess.h | 33 +++------------------------------
+ kernel/fork.c | 1 +
+ mm/memory.c | 29 +++++++++++++++++++++++++++++
+ 4 files changed, 34 insertions(+), 30 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1452,6 +1452,7 @@ struct task_struct {
+ /* mutex deadlock detection */
+ struct mutex_waiter *blocked_on;
+ #endif
++ int pagefault_disabled;
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ unsigned int irq_events;
+ unsigned long hardirq_enable_ip;
+--- a/include/linux/uaccess.h
++++ b/include/linux/uaccess.h
+@@ -6,37 +6,10 @@
+
+ /*
+ * These routines enable/disable the pagefault handler in that
+- * it will not take any locks and go straight to the fixup table.
+- *
+- * They have great resemblance to the preempt_disable/enable calls
+- * and in fact they are identical; this is because currently there is
+- * no other way to make the pagefault handlers do this. So we do
+- * disable preemption but we don't necessarily care about that.
++ * it will not take any MM locks and go straight to the fixup table.
+ */
+-static inline void pagefault_disable(void)
+-{
+- inc_preempt_count();
+- /*
+- * make sure to have issued the store before a pagefault
+- * can hit.
+- */
+- barrier();
+-}
+-
+-static inline void pagefault_enable(void)
+-{
+- /*
+- * make sure to issue those last loads/stores before enabling
+- * the pagefault handler again.
+- */
+- barrier();
+- dec_preempt_count();
+- /*
+- * make sure we do..
+- */
+- barrier();
+- preempt_check_resched();
+-}
++extern void pagefault_disable(void);
++extern void pagefault_enable(void);
+
+ #ifndef ARCH_HAS_NOCACHE_UACCESS
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1285,6 +1285,7 @@ static struct task_struct *copy_process(
+ p->hardirq_context = 0;
+ p->softirq_context = 0;
+ #endif
++ p->pagefault_disabled = 0;
+ #ifdef CONFIG_LOCKDEP
+ p->lockdep_depth = 0; /* no locks held yet */
+ p->curr_chain_key = 0;
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3669,6 +3669,35 @@ unlock:
+ return 0;
+ }
+
++void pagefault_disable(void)
++{
++ inc_preempt_count();
++ current->pagefault_disabled++;
++ /*
++ * make sure to have issued the store before a pagefault
++ * can hit.
++ */
++ barrier();
++}
++EXPORT_SYMBOL(pagefault_disable);
++
++void pagefault_enable(void)
++{
++ /*
++ * make sure to issue those last loads/stores before enabling
++ * the pagefault handler again.
++ */
++ barrier();
++ current->pagefault_disabled--;
++ dec_preempt_count();
++ /*
++ * make sure we do..
++ */
++ barrier();
++ preempt_check_resched();
++}
++EXPORT_SYMBOL(pagefault_enable);
++
+ /*
+ * By the time we get here, we already hold the mm semaphore
+ */
diff --git a/patches/mm-protect-activate-switch-mm.patch b/patches/mm-protect-activate-switch-mm.patch
new file mode 100644
index 0000000..dcaa01f
--- /dev/null
+++ b/patches/mm-protect-activate-switch-mm.patch
@@ -0,0 +1,69 @@
+Subject: mm: Protect activate_mm() by preempt_[disable&enable]_rt()
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Tue, 15 May 2012 13:53:56 +0800
+
+User preempt_*_rt instead of local_irq_*_rt or otherwise there will be
+warning on ARM like below:
+
+WARNING: at build/linux/kernel/smp.c:459 smp_call_function_many+0x98/0x264()
+Modules linked in:
+[<c0013bb4>] (unwind_backtrace+0x0/0xe4) from [<c001be94>] (warn_slowpath_common+0x4c/0x64)
+[<c001be94>] (warn_slowpath_common+0x4c/0x64) from [<c001bec4>] (warn_slowpath_null+0x18/0x1c)
+[<c001bec4>] (warn_slowpath_null+0x18/0x1c) from [<c0053ff8>](smp_call_function_many+0x98/0x264)
+[<c0053ff8>] (smp_call_function_many+0x98/0x264) from [<c0054364>] (smp_call_function+0x44/0x6c)
+[<c0054364>] (smp_call_function+0x44/0x6c) from [<c0017d50>] (__new_context+0xbc/0x124)
+[<c0017d50>] (__new_context+0xbc/0x124) from [<c009e49c>] (flush_old_exec+0x460/0x5e4)
+[<c009e49c>] (flush_old_exec+0x460/0x5e4) from [<c00d61ac>] (load_elf_binary+0x2e0/0x11ac)
+[<c00d61ac>] (load_elf_binary+0x2e0/0x11ac) from [<c009d060>] (search_binary_handler+0x94/0x2a4)
+[<c009d060>] (search_binary_handler+0x94/0x2a4) from [<c009e8fc>] (do_execve+0x254/0x364)
+[<c009e8fc>] (do_execve+0x254/0x364) from [<c0010e84>] (sys_execve+0x34/0x54)
+[<c0010e84>] (sys_execve+0x34/0x54) from [<c000da00>] (ret_fast_syscall+0x0/0x30)
+---[ end trace 0000000000000002 ]---
+
+The reason is that ARM need irq enabled when doing activate_mm().
+According to mm-protect-activate-switch-mm.patch, actually
+preempt_[disable|enable]_rt() is sufficient.
+
+Inspired-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/1337061236-1766-1-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ fs/exec.c | 2 ++
+ mm/mmu_context.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -827,10 +827,12 @@ static int exec_mmap(struct mm_struct *m
+ }
+ }
+ task_lock(tsk);
++ preempt_disable_rt();
+ active_mm = tsk->active_mm;
+ tsk->mm = mm;
+ tsk->active_mm = mm;
+ activate_mm(active_mm, mm);
++ preempt_enable_rt();
+ task_unlock(tsk);
+ arch_pick_mmap_layout(mm);
+ if (old_mm) {
+--- a/mm/mmu_context.c
++++ b/mm/mmu_context.c
+@@ -26,6 +26,7 @@ void use_mm(struct mm_struct *mm)
+ struct task_struct *tsk = current;
+
+ task_lock(tsk);
++ preempt_disable_rt();
+ active_mm = tsk->active_mm;
+ if (active_mm != mm) {
+ atomic_inc(&mm->mm_count);
+@@ -33,6 +34,7 @@ void use_mm(struct mm_struct *mm)
+ }
+ tsk->mm = mm;
+ switch_mm(active_mm, mm, tsk);
++ preempt_enable_rt();
+ task_unlock(tsk);
+
+ if (active_mm != mm)
diff --git a/patches/mm-remove-preempt-count-from-pf.patch b/patches/mm-remove-preempt-count-from-pf.patch
new file mode 100644
index 0000000..a7ca61d
--- /dev/null
+++ b/patches/mm-remove-preempt-count-from-pf.patch
@@ -0,0 +1,34 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 25 Jul 2009 22:06:27 +0200
+Subject: mm: Remove preempt count from pagefault disable/enable
+
+Now that all users are cleaned up, we can remove the preemption count.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/memory.c | 7 -------
+ 1 file changed, 7 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3672,7 +3672,6 @@ unlock:
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ void pagefault_disable(void)
+ {
+- inc_preempt_count();
+ current->pagefault_disabled++;
+ /*
+ * make sure to have issued the store before a pagefault
+@@ -3690,12 +3689,6 @@ void pagefault_enable(void)
+ */
+ barrier();
+ current->pagefault_disabled--;
+- dec_preempt_count();
+- /*
+- * make sure we do..
+- */
+- barrier();
+- preempt_check_resched();
+ }
+ EXPORT_SYMBOL(pagefault_enable);
+ #endif
diff --git a/patches/mm-rt-kmap-atomic-scheduling.patch b/patches/mm-rt-kmap-atomic-scheduling.patch
new file mode 100644
index 0000000..6345236
--- /dev/null
+++ b/patches/mm-rt-kmap-atomic-scheduling.patch
@@ -0,0 +1,274 @@
+Subject: mm, rt: kmap_atomic scheduling
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 28 Jul 2011 10:43:51 +0200
+
+In fact, with migrate_disable() existing one could play games with
+kmap_atomic. You could save/restore the kmap_atomic slots on context
+switch (if there are any in use of course), this should be esp easy now
+that we have a kmap_atomic stack.
+
+Something like the below.. it wants replacing all the preempt_disable()
+stuff with pagefault_disable() && migrate_disable() of course, but then
+you can flip kmaps around like below.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+[dvhart@linux.intel.com: build fix]
+Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins
+
+[tglx@linutronix.de: Get rid of the per cpu variable and store the idx
+ and the pte content right away in the task struct.
+ Shortens the context switch code. ]
+---
+ arch/x86/kernel/process_32.c | 32 ++++++++++++++++++++++++++++++++
+ arch/x86/mm/highmem_32.c | 9 ++++++++-
+ arch/x86/mm/iomap_32.c | 9 ++++++++-
+ include/linux/highmem.h | 27 +++++++++++++++++++++++----
+ include/linux/sched.h | 7 +++++++
+ mm/highmem.c | 6 ++++--
+ mm/memory.c | 2 ++
+ 7 files changed, 84 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -36,6 +36,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/io.h>
+ #include <linux/kdebug.h>
++#include <linux/highmem.h>
+
+ #include <asm/pgtable.h>
+ #include <asm/ldt.h>
+@@ -216,6 +217,35 @@ start_thread(struct pt_regs *regs, unsig
+ }
+ EXPORT_SYMBOL_GPL(start_thread);
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
++{
++ int i;
++
++ /*
++ * Clear @prev's kmap_atomic mappings
++ */
++ for (i = 0; i < prev_p->kmap_idx; i++) {
++ int idx = i + KM_TYPE_NR * smp_processor_id();
++ pte_t *ptep = kmap_pte - idx;
++
++ kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
++ }
++ /*
++ * Restore @next_p's kmap_atomic mappings
++ */
++ for (i = 0; i < next_p->kmap_idx; i++) {
++ int idx = i + KM_TYPE_NR * smp_processor_id();
++
++ if (!pte_none(next_p->kmap_pte[i]))
++ set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
++ }
++}
++#else
++static inline void
++switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
++#endif
++
+
+ /*
+ * switch_to(x,y) should switch tasks from x to y.
+@@ -295,6 +325,8 @@ __switch_to(struct task_struct *prev_p,
+ task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
+ __switch_to_xtra(prev_p, next_p, tss);
+
++ switch_kmaps(prev_p, next_p);
++
+ /*
+ * Leave lazy mode, flushing any hypercalls made here.
+ * This must be done before restoring TLS segments so
+--- a/arch/x86/mm/highmem_32.c
++++ b/arch/x86/mm/highmem_32.c
+@@ -31,6 +31,7 @@ EXPORT_SYMBOL(kunmap);
+ */
+ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+ {
++ pte_t pte = mk_pte(page, prot);
+ unsigned long vaddr;
+ int idx, type;
+
+@@ -44,7 +45,10 @@ void *kmap_atomic_prot(struct page *page
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ BUG_ON(!pte_none(*(kmap_pte-idx)));
+- set_pte(kmap_pte-idx, mk_pte(page, prot));
++#ifdef CONFIG_PREEMPT_RT_FULL
++ current->kmap_pte[type] = pte;
++#endif
++ set_pte(kmap_pte-idx, pte);
+ arch_flush_lazy_mmu_mode();
+
+ return (void *)vaddr;
+@@ -87,6 +91,9 @@ void __kunmap_atomic(void *kvaddr)
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
++#ifdef CONFIG_PREEMPT_RT_FULL
++ current->kmap_pte[type] = __pte(0);
++#endif
+ kpte_clear_flush(kmap_pte-idx, vaddr);
+ kmap_atomic_idx_pop();
+ arch_flush_lazy_mmu_mode();
+--- a/arch/x86/mm/iomap_32.c
++++ b/arch/x86/mm/iomap_32.c
+@@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free);
+
+ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
+ {
++ pte_t pte = pfn_pte(pfn, prot);
+ unsigned long vaddr;
+ int idx, type;
+
+@@ -64,7 +65,10 @@ void *kmap_atomic_prot_pfn(unsigned long
+ type = kmap_atomic_idx_push();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+- set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
++#ifdef CONFIG_PREEMPT_RT_FULL
++ current->kmap_pte[type] = pte;
++#endif
++ set_pte(kmap_pte - idx, pte);
+ arch_flush_lazy_mmu_mode();
+
+ return (void *)vaddr;
+@@ -110,6 +114,9 @@ iounmap_atomic(void __iomem *kvaddr)
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
++#ifdef CONFIG_PREEMPT_RT_FULL
++ current->kmap_pte[type] = __pte(0);
++#endif
+ kpte_clear_flush(kmap_pte-idx, vaddr);
+ kmap_atomic_idx_pop();
+ }
+--- a/include/linux/highmem.h
++++ b/include/linux/highmem.h
+@@ -85,32 +85,51 @@ static inline void __kunmap_atomic(void
+
+ #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ DECLARE_PER_CPU(int, __kmap_atomic_idx);
++#endif
+
+ static inline int kmap_atomic_idx_push(void)
+ {
++#ifndef CONFIG_PREEMPT_RT_FULL
+ int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
+
+-#ifdef CONFIG_DEBUG_HIGHMEM
++# ifdef CONFIG_DEBUG_HIGHMEM
+ WARN_ON_ONCE(in_irq() && !irqs_disabled());
+ BUG_ON(idx > KM_TYPE_NR);
+-#endif
++# endif
+ return idx;
++#else
++ current->kmap_idx++;
++ BUG_ON(current->kmap_idx > KM_TYPE_NR);
++ return current->kmap_idx - 1;
++#endif
+ }
+
+ static inline int kmap_atomic_idx(void)
+ {
++#ifndef CONFIG_PREEMPT_RT_FULL
+ return __this_cpu_read(__kmap_atomic_idx) - 1;
++#else
++ return current->kmap_idx - 1;
++#endif
+ }
+
+ static inline void kmap_atomic_idx_pop(void)
+ {
+-#ifdef CONFIG_DEBUG_HIGHMEM
++#ifndef CONFIG_PREEMPT_RT_FULL
++# ifdef CONFIG_DEBUG_HIGHMEM
+ int idx = __this_cpu_dec_return(__kmap_atomic_idx);
+
+ BUG_ON(idx < 0);
+-#else
++# else
+ __this_cpu_dec(__kmap_atomic_idx);
++# endif
++#else
++ current->kmap_idx--;
++# ifdef CONFIG_DEBUG_HIGHMEM
++ BUG_ON(current->kmap_idx < 0);
++# endif
+ #endif
+ }
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -23,6 +23,7 @@ struct sched_param {
+ #include <linux/nodemask.h>
+ #include <linux/mm_types.h>
+
++#include <asm/kmap_types.h>
+ #include <asm/page.h>
+ #include <asm/ptrace.h>
+ #include <asm/cputime.h>
+@@ -1635,6 +1636,12 @@ struct task_struct {
+ struct rcu_head put_rcu;
+ int softirq_nestcnt;
+ #endif
++#ifdef CONFIG_PREEMPT_RT_FULL
++# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
++ int kmap_idx;
++ pte_t kmap_pte[KM_TYPE_NR];
++# endif
++#endif
+ };
+
+ #ifdef CONFIG_NUMA_BALANCING
+--- a/mm/highmem.c
++++ b/mm/highmem.c
+@@ -29,10 +29,11 @@
+ #include <linux/kgdb.h>
+ #include <asm/tlbflush.h>
+
+-
++#ifndef CONFIG_PREEMPT_RT_FULL
+ #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
+ DEFINE_PER_CPU(int, __kmap_atomic_idx);
+ #endif
++#endif
+
+ /*
+ * Virtual_count is not a pure "count".
+@@ -47,8 +48,9 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
+ unsigned long totalhigh_pages __read_mostly;
+ EXPORT_SYMBOL(totalhigh_pages);
+
+-
++#ifndef CONFIG_PREEMPT_RT_FULL
+ EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
++#endif
+
+ unsigned int nr_free_highpages (void)
+ {
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3672,6 +3672,7 @@ unlock:
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ void pagefault_disable(void)
+ {
++ migrate_disable();
+ current->pagefault_disabled++;
+ /*
+ * make sure to have issued the store before a pagefault
+@@ -3689,6 +3690,7 @@ void pagefault_enable(void)
+ */
+ barrier();
+ current->pagefault_disabled--;
++ migrate_enable();
+ }
+ EXPORT_SYMBOL(pagefault_enable);
+ #endif
diff --git a/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch b/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch
new file mode 100644
index 0000000..23a622c
--- /dev/null
+++ b/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch
@@ -0,0 +1,38 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:34 -0500
+Subject: mm: scatterlist dont disable irqs on RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/scatterlist.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/lib/scatterlist.c
++++ b/lib/scatterlist.c
+@@ -499,7 +499,7 @@ void sg_miter_stop(struct sg_mapping_ite
+ flush_kernel_dcache_page(miter->page);
+
+ if (miter->__flags & SG_MITER_ATOMIC) {
+- WARN_ON_ONCE(preemptible());
++ WARN_ON_ONCE(!pagefault_disabled());
+ kunmap_atomic(miter->addr);
+ } else
+ kunmap(miter->page);
+@@ -539,7 +539,7 @@ static size_t sg_copy_buffer(struct scat
+
+ sg_miter_start(&miter, sgl, nents, sg_flags);
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+
+ while (sg_miter_next(&miter) && offset < buflen) {
+ unsigned int len;
+@@ -556,7 +556,7 @@ static size_t sg_copy_buffer(struct scat
+
+ sg_miter_stop(&miter);
+
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ return offset;
+ }
+
diff --git a/patches/mm-shrink-the-page-frame-to-rt-size.patch b/patches/mm-shrink-the-page-frame-to-rt-size.patch
new file mode 100644
index 0000000..80f968b
--- /dev/null
+++ b/patches/mm-shrink-the-page-frame-to-rt-size.patch
@@ -0,0 +1,140 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 3 Jul 2009 08:44:54 -0500
+Subject: mm: shrink the page frame to !-rt size
+
+He below is a boot-tested hack to shrink the page frame size back to
+normal.
+
+Should be a net win since there should be many less PTE-pages than
+page-frames.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/mm.h | 46 +++++++++++++++++++++++++++++++++++++++-------
+ include/linux/mm_types.h | 4 ++++
+ mm/memory.c | 32 ++++++++++++++++++++++++++++++++
+ 3 files changed, 75 insertions(+), 7 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1259,27 +1259,59 @@ static inline pmd_t *pmd_alloc(struct mm
+ * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
++#ifndef CONFIG_PREEMPT_RT_FULL
++
+ #define __pte_lockptr(page) &((page)->ptl)
+-#define pte_lock_init(_page) do { \
+- spin_lock_init(__pte_lockptr(_page)); \
+-} while (0)
++
++static inline struct page *pte_lock_init(struct page *page)
++{
++ spin_lock_init(__pte_lockptr(page));
++ return page;
++}
++
+ #define pte_lock_deinit(page) ((page)->mapping = NULL)
++
++#else /* !PREEMPT_RT_FULL */
++
++/*
++ * On PREEMPT_RT_FULL the spinlock_t's are too large to embed in the
++ * page frame, hence it only has a pointer and we need to dynamically
++ * allocate the lock when we allocate PTE-pages.
++ *
++ * This is an overall win, since only a small fraction of the pages
++ * will be PTE pages under normal circumstances.
++ */
++
++#define __pte_lockptr(page) ((page)->ptl)
++
++extern struct page *pte_lock_init(struct page *page);
++extern void pte_lock_deinit(struct page *page);
++
++#endif /* PREEMPT_RT_FULL */
++
+ #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+ #else /* !USE_SPLIT_PTLOCKS */
+ /*
+ * We use mm->page_table_lock to guard all pagetable pages of the mm.
+ */
+-#define pte_lock_init(page) do {} while (0)
++static inline struct page *pte_lock_init(struct page *page) { return page; }
+ #define pte_lock_deinit(page) do {} while (0)
+ #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
+ #endif /* USE_SPLIT_PTLOCKS */
+
+-static inline void pgtable_page_ctor(struct page *page)
++static inline struct page *__pgtable_page_ctor(struct page *page)
+ {
+- pte_lock_init(page);
+- inc_zone_page_state(page, NR_PAGETABLE);
++ page = pte_lock_init(page);
++ if (page)
++ inc_zone_page_state(page, NR_PAGETABLE);
++ return page;
+ }
+
++#define pgtable_page_ctor(page) \
++do { \
++ page = __pgtable_page_ctor(page); \
++} while (0)
++
+ static inline void pgtable_page_dtor(struct page *page)
+ {
+ pte_lock_deinit(page);
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -141,7 +141,11 @@ struct page {
+ * system if PG_buddy is set.
+ */
+ #if USE_SPLIT_PTLOCKS
++# ifndef CONFIG_PREEMPT_RT_FULL
+ spinlock_t ptl;
++# else
++ spinlock_t *ptl;
++# endif
+ #endif
+ struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
+ struct page *first_page; /* Compound tail pages */
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -4264,3 +4264,35 @@ void copy_user_huge_page(struct page *ds
+ }
+ }
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
++
++#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0)
++/*
++ * Heinous hack, relies on the caller doing something like:
++ *
++ * pte = alloc_pages(PGALLOC_GFP, 0);
++ * if (pte)
++ * pgtable_page_ctor(pte);
++ * return pte;
++ *
++ * This ensures we release the page and return NULL when the
++ * lock allocation fails.
++ */
++struct page *pte_lock_init(struct page *page)
++{
++ page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
++ if (page->ptl) {
++ spin_lock_init(__pte_lockptr(page));
++ } else {
++ __free_page(page);
++ page = NULL;
++ }
++ return page;
++}
++
++void pte_lock_deinit(struct page *page)
++{
++ kfree(page->ptl);
++ page->mapping = NULL;
++}
++
++#endif
diff --git a/patches/mm-slab-more-lock-breaks.patch b/patches/mm-slab-more-lock-breaks.patch
new file mode 100644
index 0000000..455b7fd
--- /dev/null
+++ b/patches/mm-slab-more-lock-breaks.patch
@@ -0,0 +1,229 @@
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 3 Jul 2009 08:44:43 -0500
+Subject: mm: More lock breaks in slab.c
+
+Handle __free_pages outside of the locked regions. This reduces the
+lock contention on the percpu slab locks in -rt significantly.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ mm/slab.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 64 insertions(+), 18 deletions(-)
+
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -697,6 +697,7 @@ static void slab_set_debugobj_lock_class
+ #endif
+
+ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
++static DEFINE_PER_CPU(struct list_head, slab_free_list);
+ static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
+
+ #ifndef CONFIG_PREEMPT_RT_BASE
+@@ -729,6 +730,34 @@ static void unlock_slab_on(unsigned int
+ }
+ #endif
+
++static void free_delayed(struct list_head *h)
++{
++ while(!list_empty(h)) {
++ struct page *page = list_first_entry(h, struct page, lru);
++
++ list_del(&page->lru);
++ __free_pages(page, page->index);
++ }
++}
++
++static void unlock_l3_and_free_delayed(spinlock_t *list_lock)
++{
++ LIST_HEAD(tmp);
++
++ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
++ local_spin_unlock_irq(slab_lock, list_lock);
++ free_delayed(&tmp);
++}
++
++static void unlock_slab_and_free_delayed(unsigned long flags)
++{
++ LIST_HEAD(tmp);
++
++ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
++ local_unlock_irqrestore(slab_lock, flags);
++ free_delayed(&tmp);
++}
++
+ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
+ {
+ return cachep->array[smp_processor_id()];
+@@ -1345,7 +1374,7 @@ static void __cpuinit cpuup_canceled(lon
+ free_block(cachep, nc->entry, nc->avail, node);
+
+ if (!cpumask_empty(mask)) {
+- local_spin_unlock_irq(slab_lock, &l3->list_lock);
++ unlock_l3_and_free_delayed(&l3->list_lock);
+ goto free_array_cache;
+ }
+
+@@ -1359,7 +1388,7 @@ static void __cpuinit cpuup_canceled(lon
+ alien = l3->alien;
+ l3->alien = NULL;
+
+- local_spin_unlock_irq(slab_lock, &l3->list_lock);
++ unlock_l3_and_free_delayed(&l3->list_lock);
+
+ kfree(shared);
+ if (alien) {
+@@ -1652,6 +1681,8 @@ void __init kmem_cache_init(void)
+ use_alien_caches = 0;
+
+ local_irq_lock_init(slab_lock);
++ for_each_possible_cpu(i)
++ INIT_LIST_HEAD(&per_cpu(slab_free_list, i));
+
+ for (i = 0; i < NUM_INIT_LISTS; i++)
+ kmem_list3_init(&initkmem_list3[i]);
+@@ -1953,12 +1984,14 @@ static void *kmem_getpages(struct kmem_c
+ /*
+ * Interface to system's page release.
+ */
+-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
++static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed)
+ {
+ unsigned long i = (1 << cachep->gfporder);
+- struct page *page = virt_to_page(addr);
++ struct page *page, *basepage = virt_to_page(addr);
+ const unsigned long nr_freed = i;
+
++ page = basepage;
++
+ kmemcheck_free_shadow(page, cachep->gfporder);
+
+ if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+@@ -1977,7 +2010,12 @@ static void kmem_freepages(struct kmem_c
+ memcg_release_pages(cachep, cachep->gfporder);
+ if (current->reclaim_state)
+ current->reclaim_state->reclaimed_slab += nr_freed;
+- free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
++ if (!delayed) {
++ free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
++ } else {
++ basepage->index = cachep->gfporder;
++ list_add(&basepage->lru, &__get_cpu_var(slab_free_list));
++ }
+ }
+
+ static void kmem_rcu_free(struct rcu_head *head)
+@@ -1985,7 +2023,7 @@ static void kmem_rcu_free(struct rcu_hea
+ struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
+ struct kmem_cache *cachep = slab_rcu->cachep;
+
+- kmem_freepages(cachep, slab_rcu->addr);
++ kmem_freepages(cachep, slab_rcu->addr, false);
+ if (OFF_SLAB(cachep))
+ kmem_cache_free(cachep->slabp_cache, slab_rcu);
+ }
+@@ -2204,7 +2242,8 @@ static void slab_destroy_debugcheck(stru
+ * Before calling the slab must have been unlinked from the cache. The
+ * cache-lock is not held/needed.
+ */
+-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
++static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp,
++ bool delayed)
+ {
+ void *addr = slabp->s_mem - slabp->colouroff;
+
+@@ -2217,7 +2256,7 @@ static void slab_destroy(struct kmem_cac
+ slab_rcu->addr = addr;
+ call_rcu(&slab_rcu->head, kmem_rcu_free);
+ } else {
+- kmem_freepages(cachep, addr);
++ kmem_freepages(cachep, addr, delayed);
+ if (OFF_SLAB(cachep))
+ kmem_cache_free(cachep->slabp_cache, slabp);
+ }
+@@ -2628,9 +2667,15 @@ static void do_drain(void *arg)
+ __do_drain(arg, smp_processor_id());
+ }
+ #else
+-static void do_drain(void *arg, int this_cpu)
++static void do_drain(void *arg, int cpu)
+ {
+- __do_drain(arg, this_cpu);
++ LIST_HEAD(tmp);
++
++ lock_slab_on(cpu);
++ __do_drain(arg, cpu);
++ list_splice_init(&per_cpu(slab_free_list, cpu), &tmp);
++ unlock_slab_on(cpu);
++ free_delayed(&tmp);
+ }
+ #endif
+
+@@ -2688,7 +2733,7 @@ static int drain_freelist(struct kmem_ca
+ */
+ l3->free_objects -= cache->num;
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+- slab_destroy(cache, slabp);
++ slab_destroy(cache, slabp, false);
+ nr_freed++;
+ }
+ out:
+@@ -3003,7 +3048,7 @@ static int cache_grow(struct kmem_cache
+ spin_unlock(&l3->list_lock);
+ return 1;
+ opps1:
+- kmem_freepages(cachep, objp);
++ kmem_freepages(cachep, objp, false);
+ failed:
+ if (local_flags & __GFP_WAIT)
+ local_lock_irq(slab_lock);
+@@ -3684,7 +3729,7 @@ static void free_block(struct kmem_cache
+ * a different cache, refer to comments before
+ * alloc_slabmgmt.
+ */
+- slab_destroy(cachep, slabp);
++ slab_destroy(cachep, slabp, true);
+ } else {
+ list_add(&slabp->list, &l3->slabs_free);
+ }
+@@ -3952,7 +3997,7 @@ void kmem_cache_free(struct kmem_cache *
+ debug_check_no_obj_freed(objp, cachep->object_size);
+ local_lock_irqsave(slab_lock, flags);
+ __cache_free(cachep, objp, _RET_IP_);
+- local_unlock_irqrestore(slab_lock, flags);
++ unlock_slab_and_free_delayed(flags);
+
+ trace_kmem_cache_free(_RET_IP_, objp);
+ }
+@@ -3983,7 +4028,7 @@ void kfree(const void *objp)
+ debug_check_no_obj_freed(objp, c->object_size);
+ local_lock_irqsave(slab_lock, flags);
+ __cache_free(c, (void *)objp, _RET_IP_);
+- local_unlock_irqrestore(slab_lock, flags);
++ unlock_slab_and_free_delayed(flags);
+ }
+ EXPORT_SYMBOL(kfree);
+
+@@ -4033,7 +4078,8 @@ static int alloc_kmemlist(struct kmem_ca
+ }
+ l3->free_limit = (1 + nr_cpus_node(node)) *
+ cachep->batchcount + cachep->num;
+- local_spin_unlock_irq(slab_lock, &l3->list_lock);
++ unlock_l3_and_free_delayed(&l3->list_lock);
++
+ kfree(shared);
+ free_alien_cache(new_alien);
+ continue;
+@@ -4141,8 +4187,8 @@ static int __do_tune_cpucache(struct kme
+ local_spin_lock_irq(slab_lock,
+ &cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
+- local_spin_unlock_irq(slab_lock,
+- &cachep->nodelists[cpu_to_mem(i)]->list_lock);
++
++ unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ kfree(ccold);
+ }
+ kfree(new);
diff --git a/patches/mm-slab-move-debug-out.patch b/patches/mm-slab-move-debug-out.patch
new file mode 100644
index 0000000..5f2ff42
--- /dev/null
+++ b/patches/mm-slab-move-debug-out.patch
@@ -0,0 +1,37 @@
+Subject: mm-slab-move-debug-out.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 20 Jun 2011 10:42:04 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/slab.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -3895,10 +3895,10 @@ void kmem_cache_free(struct kmem_cache *
+ if (!cachep)
+ return;
+
+- local_irq_save(flags);
+ debug_check_no_locks_freed(objp, cachep->object_size);
+ if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
+ debug_check_no_obj_freed(objp, cachep->object_size);
++ local_irq_save(flags);
+ __cache_free(cachep, objp, _RET_IP_);
+ local_irq_restore(flags);
+
+@@ -3924,12 +3924,12 @@ void kfree(const void *objp)
+
+ if (unlikely(ZERO_OR_NULL_PTR(objp)))
+ return;
+- local_irq_save(flags);
+ kfree_debugcheck(objp);
+ c = virt_to_cache(objp);
+ debug_check_no_locks_freed(objp, c->object_size);
+
+ debug_check_no_obj_freed(objp, c->object_size);
++ local_irq_save(flags);
+ __cache_free(c, (void *)objp, _RET_IP_);
+ local_irq_restore(flags);
+ }
diff --git a/patches/mm-slab-wrap-functions.patch b/patches/mm-slab-wrap-functions.patch
new file mode 100644
index 0000000..b6f823a
--- /dev/null
+++ b/patches/mm-slab-wrap-functions.patch
@@ -0,0 +1,458 @@
+Subject: mm-slab-wrap-functions.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 18 Jun 2011 19:44:43 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/slab.c | 163 +++++++++++++++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 114 insertions(+), 49 deletions(-)
+
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -116,6 +116,7 @@
+ #include <linux/kmemcheck.h>
+ #include <linux/memory.h>
+ #include <linux/prefetch.h>
++#include <linux/locallock.h>
+
+ #include <net/sock.h>
+
+@@ -696,12 +697,49 @@ static void slab_set_debugobj_lock_class
+ #endif
+
+ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
++static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
++
++#ifndef CONFIG_PREEMPT_RT_BASE
++# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1)
++#else
++/*
++ * execute func() for all CPUs. On PREEMPT_RT we dont actually have
++ * to run on the remote CPUs - we only have to take their CPU-locks.
++ * (This is a rare operation, so cacheline bouncing is not an issue.)
++ */
++static void
++slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
++{
++ unsigned int i;
++
++ get_cpu_light();
++ for_each_online_cpu(i)
++ func(arg, i);
++ put_cpu_light();
++}
++
++static void lock_slab_on(unsigned int cpu)
++{
++ local_lock_irq_on(slab_lock, cpu);
++}
++
++static void unlock_slab_on(unsigned int cpu)
++{
++ local_unlock_irq_on(slab_lock, cpu);
++}
++#endif
+
+ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
+ {
+ return cachep->array[smp_processor_id()];
+ }
+
++static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep,
++ int cpu)
++{
++ return cachep->array[cpu];
++}
++
+ static inline struct kmem_cache *__find_general_cachep(size_t size,
+ gfp_t gfpflags)
+ {
+@@ -1171,9 +1209,10 @@ static void reap_alien(struct kmem_cache
+ if (l3->alien) {
+ struct array_cache *ac = l3->alien[node];
+
+- if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
++ if (ac && ac->avail &&
++ local_spin_trylock_irq(slab_lock, &ac->lock)) {
+ __drain_alien_cache(cachep, ac, node);
+- spin_unlock_irq(&ac->lock);
++ local_spin_unlock_irq(slab_lock, &ac->lock);
+ }
+ }
+ }
+@@ -1188,9 +1227,9 @@ static void drain_alien_cache(struct kme
+ for_each_online_node(i) {
+ ac = alien[i];
+ if (ac) {
+- spin_lock_irqsave(&ac->lock, flags);
++ local_spin_lock_irqsave(slab_lock, &ac->lock, flags);
+ __drain_alien_cache(cachep, ac, i);
+- spin_unlock_irqrestore(&ac->lock, flags);
++ local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags);
+ }
+ }
+ }
+@@ -1269,11 +1308,11 @@ static int init_cache_nodelists_node(int
+ cachep->nodelists[node] = l3;
+ }
+
+- spin_lock_irq(&cachep->nodelists[node]->list_lock);
++ local_spin_lock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
+ cachep->nodelists[node]->free_limit =
+ (1 + nr_cpus_node(node)) *
+ cachep->batchcount + cachep->num;
+- spin_unlock_irq(&cachep->nodelists[node]->list_lock);
++ local_spin_unlock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
+ }
+ return 0;
+ }
+@@ -1298,7 +1337,7 @@ static void __cpuinit cpuup_canceled(lon
+ if (!l3)
+ goto free_array_cache;
+
+- spin_lock_irq(&l3->list_lock);
++ local_spin_lock_irq(slab_lock, &l3->list_lock);
+
+ /* Free limit for this kmem_list3 */
+ l3->free_limit -= cachep->batchcount;
+@@ -1306,7 +1345,7 @@ static void __cpuinit cpuup_canceled(lon
+ free_block(cachep, nc->entry, nc->avail, node);
+
+ if (!cpumask_empty(mask)) {
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ goto free_array_cache;
+ }
+
+@@ -1320,7 +1359,7 @@ static void __cpuinit cpuup_canceled(lon
+ alien = l3->alien;
+ l3->alien = NULL;
+
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+
+ kfree(shared);
+ if (alien) {
+@@ -1394,7 +1433,7 @@ static int __cpuinit cpuup_prepare(long
+ l3 = cachep->nodelists[node];
+ BUG_ON(!l3);
+
+- spin_lock_irq(&l3->list_lock);
++ local_spin_lock_irq(slab_lock, &l3->list_lock);
+ if (!l3->shared) {
+ /*
+ * We are serialised from CPU_DEAD or
+@@ -1409,7 +1448,7 @@ static int __cpuinit cpuup_prepare(long
+ alien = NULL;
+ }
+ #endif
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ kfree(shared);
+ free_alien_cache(alien);
+ if (cachep->flags & SLAB_DEBUG_OBJECTS)
+@@ -1612,6 +1651,8 @@ void __init kmem_cache_init(void)
+ if (num_possible_nodes() == 1)
+ use_alien_caches = 0;
+
++ local_irq_lock_init(slab_lock);
++
+ for (i = 0; i < NUM_INIT_LISTS; i++)
+ kmem_list3_init(&initkmem_list3[i]);
+
+@@ -2533,7 +2574,7 @@ __kmem_cache_create (struct kmem_cache *
+ #if DEBUG
+ static void check_irq_off(void)
+ {
+- BUG_ON(!irqs_disabled());
++ BUG_ON_NONRT(!irqs_disabled());
+ }
+
+ static void check_irq_on(void)
+@@ -2568,26 +2609,37 @@ static void drain_array(struct kmem_cach
+ struct array_cache *ac,
+ int force, int node);
+
+-static void do_drain(void *arg)
++static void __do_drain(void *arg, unsigned int cpu)
+ {
+ struct kmem_cache *cachep = arg;
+ struct array_cache *ac;
+- int node = numa_mem_id();
++ int node = cpu_to_mem(cpu);
+
+- check_irq_off();
+- ac = cpu_cache_get(cachep);
++ ac = cpu_cache_get_on_cpu(cachep, cpu);
+ spin_lock(&cachep->nodelists[node]->list_lock);
+ free_block(cachep, ac->entry, ac->avail, node);
+ spin_unlock(&cachep->nodelists[node]->list_lock);
+ ac->avail = 0;
+ }
+
++#ifndef CONFIG_PREEMPT_RT_BASE
++static void do_drain(void *arg)
++{
++ __do_drain(arg, smp_processor_id());
++}
++#else
++static void do_drain(void *arg, int this_cpu)
++{
++ __do_drain(arg, this_cpu);
++}
++#endif
++
+ static void drain_cpu_caches(struct kmem_cache *cachep)
+ {
+ struct kmem_list3 *l3;
+ int node;
+
+- on_each_cpu(do_drain, cachep, 1);
++ slab_on_each_cpu(do_drain, cachep);
+ check_irq_on();
+ for_each_online_node(node) {
+ l3 = cachep->nodelists[node];
+@@ -2618,10 +2670,10 @@ static int drain_freelist(struct kmem_ca
+ nr_freed = 0;
+ while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
+
+- spin_lock_irq(&l3->list_lock);
++ local_spin_lock_irq(slab_lock, &l3->list_lock);
+ p = l3->slabs_free.prev;
+ if (p == &l3->slabs_free) {
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ goto out;
+ }
+
+@@ -2635,7 +2687,7 @@ static int drain_freelist(struct kmem_ca
+ * to the cache.
+ */
+ l3->free_objects -= cache->num;
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ slab_destroy(cache, slabp);
+ nr_freed++;
+ }
+@@ -2910,7 +2962,7 @@ static int cache_grow(struct kmem_cache
+ offset *= cachep->colour_off;
+
+ if (local_flags & __GFP_WAIT)
+- local_irq_enable();
++ local_unlock_irq(slab_lock);
+
+ /*
+ * The test for missing atomic flag is performed here, rather than
+@@ -2940,7 +2992,7 @@ static int cache_grow(struct kmem_cache
+ cache_init_objs(cachep, slabp);
+
+ if (local_flags & __GFP_WAIT)
+- local_irq_disable();
++ local_lock_irq(slab_lock);
+ check_irq_off();
+ spin_lock(&l3->list_lock);
+
+@@ -2954,7 +3006,7 @@ opps1:
+ kmem_freepages(cachep, objp);
+ failed:
+ if (local_flags & __GFP_WAIT)
+- local_irq_disable();
++ local_lock_irq(slab_lock);
+ return 0;
+ }
+
+@@ -3368,11 +3420,11 @@ retry:
+ * set and go into memory reserves if necessary.
+ */
+ if (local_flags & __GFP_WAIT)
+- local_irq_enable();
++ local_unlock_irq(slab_lock);
+ kmem_flagcheck(cache, flags);
+ obj = kmem_getpages(cache, local_flags, numa_mem_id());
+ if (local_flags & __GFP_WAIT)
+- local_irq_disable();
++ local_lock_irq(slab_lock);
+ if (obj) {
+ /*
+ * Insert into the appropriate per node queues
+@@ -3492,7 +3544,7 @@ slab_alloc_node(struct kmem_cache *cache
+ cachep = memcg_kmem_get_cache(cachep, flags);
+
+ cache_alloc_debugcheck_before(cachep, flags);
+- local_irq_save(save_flags);
++ local_lock_irqsave(slab_lock, save_flags);
+
+ if (nodeid == NUMA_NO_NODE)
+ nodeid = slab_node;
+@@ -3517,7 +3569,7 @@ slab_alloc_node(struct kmem_cache *cache
+ /* ___cache_alloc_node can fall back to other nodes */
+ ptr = ____cache_alloc_node(cachep, flags, nodeid);
+ out:
+- local_irq_restore(save_flags);
++ local_unlock_irqrestore(slab_lock, save_flags);
+ ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+ kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
+ flags);
+@@ -3579,9 +3631,9 @@ slab_alloc(struct kmem_cache *cachep, gf
+ cachep = memcg_kmem_get_cache(cachep, flags);
+
+ cache_alloc_debugcheck_before(cachep, flags);
+- local_irq_save(save_flags);
++ local_lock_irqsave(slab_lock, save_flags);
+ objp = __do_cache_alloc(cachep, flags);
+- local_irq_restore(save_flags);
++ local_unlock_irqrestore(slab_lock, save_flags);
+ objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+ kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
+ flags);
+@@ -3898,9 +3950,9 @@ void kmem_cache_free(struct kmem_cache *
+ debug_check_no_locks_freed(objp, cachep->object_size);
+ if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
+ debug_check_no_obj_freed(objp, cachep->object_size);
+- local_irq_save(flags);
++ local_lock_irqsave(slab_lock, flags);
+ __cache_free(cachep, objp, _RET_IP_);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(slab_lock, flags);
+
+ trace_kmem_cache_free(_RET_IP_, objp);
+ }
+@@ -3929,9 +3981,9 @@ void kfree(const void *objp)
+ debug_check_no_locks_freed(objp, c->object_size);
+
+ debug_check_no_obj_freed(objp, c->object_size);
+- local_irq_save(flags);
++ local_lock_irqsave(slab_lock, flags);
+ __cache_free(c, (void *)objp, _RET_IP_);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(slab_lock, flags);
+ }
+ EXPORT_SYMBOL(kfree);
+
+@@ -3968,7 +4020,7 @@ static int alloc_kmemlist(struct kmem_ca
+ if (l3) {
+ struct array_cache *shared = l3->shared;
+
+- spin_lock_irq(&l3->list_lock);
++ local_spin_lock_irq(slab_lock, &l3->list_lock);
+
+ if (shared)
+ free_block(cachep, shared->entry,
+@@ -3981,7 +4033,7 @@ static int alloc_kmemlist(struct kmem_ca
+ }
+ l3->free_limit = (1 + nr_cpus_node(node)) *
+ cachep->batchcount + cachep->num;
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ kfree(shared);
+ free_alien_cache(new_alien);
+ continue;
+@@ -4028,17 +4080,28 @@ struct ccupdate_struct {
+ struct array_cache *new[0];
+ };
+
+-static void do_ccupdate_local(void *info)
++static void __do_ccupdate_local(void *info, int cpu)
+ {
+ struct ccupdate_struct *new = info;
+ struct array_cache *old;
+
+- check_irq_off();
+- old = cpu_cache_get(new->cachep);
++ old = cpu_cache_get_on_cpu(new->cachep, cpu);
+
+- new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
+- new->new[smp_processor_id()] = old;
++ new->cachep->array[cpu] = new->new[cpu];
++ new->new[cpu] = old;
++}
++
++#ifndef CONFIG_PREEMPT_RT_BASE
++static void do_ccupdate_local(void *info)
++{
++ __do_ccupdate_local(info, smp_processor_id());
+ }
++#else
++static void do_ccupdate_local(void *info, int cpu)
++{
++ __do_ccupdate_local(info, cpu);
++}
++#endif
+
+ /* Always called with the slab_mutex held */
+ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
+@@ -4064,7 +4127,7 @@ static int __do_tune_cpucache(struct kme
+ }
+ new->cachep = cachep;
+
+- on_each_cpu(do_ccupdate_local, (void *)new, 1);
++ slab_on_each_cpu(do_ccupdate_local, (void *)new);
+
+ check_irq_on();
+ cachep->batchcount = batchcount;
+@@ -4075,9 +4138,11 @@ static int __do_tune_cpucache(struct kme
+ struct array_cache *ccold = new->new[i];
+ if (!ccold)
+ continue;
+- spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
++ local_spin_lock_irq(slab_lock,
++ &cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
+- spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
++ local_spin_unlock_irq(slab_lock,
++ &cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ kfree(ccold);
+ }
+ kfree(new);
+@@ -4192,7 +4257,7 @@ static void drain_array(struct kmem_cach
+ if (ac->touched && !force) {
+ ac->touched = 0;
+ } else {
+- spin_lock_irq(&l3->list_lock);
++ local_spin_lock_irq(slab_lock, &l3->list_lock);
+ if (ac->avail) {
+ tofree = force ? ac->avail : (ac->limit + 4) / 5;
+ if (tofree > ac->avail)
+@@ -4202,7 +4267,7 @@ static void drain_array(struct kmem_cach
+ memmove(ac->entry, &(ac->entry[tofree]),
+ sizeof(void *) * ac->avail);
+ }
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ }
+ }
+
+@@ -4295,7 +4360,7 @@ void get_slabinfo(struct kmem_cache *cac
+ continue;
+
+ check_irq_on();
+- spin_lock_irq(&l3->list_lock);
++ local_spin_lock_irq(slab_lock, &l3->list_lock);
+
+ list_for_each_entry(slabp, &l3->slabs_full, list) {
+ if (slabp->inuse != cachep->num && !error)
+@@ -4320,7 +4385,7 @@ void get_slabinfo(struct kmem_cache *cac
+ if (l3->shared)
+ shared_avail += l3->shared->avail;
+
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ }
+ num_slabs += active_slabs;
+ num_objs = num_slabs * cachep->num;
+@@ -4520,13 +4585,13 @@ static int leaks_show(struct seq_file *m
+ continue;
+
+ check_irq_on();
+- spin_lock_irq(&l3->list_lock);
++ local_spin_lock_irq(slab_lock, &l3->list_lock);
+
+ list_for_each_entry(slabp, &l3->slabs_full, list)
+ handle_slab(n, cachep, slabp);
+ list_for_each_entry(slabp, &l3->slabs_partial, list)
+ handle_slab(n, cachep, slabp);
+- spin_unlock_irq(&l3->list_lock);
++ local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ }
+ name = cachep->name;
+ if (n[0] == n[1]) {
diff --git a/patches/mm-vmalloc-use-get-cpu-light.patch b/patches/mm-vmalloc-use-get-cpu-light.patch
new file mode 100644
index 0000000..61f6d7f
--- /dev/null
+++ b/patches/mm-vmalloc-use-get-cpu-light.patch
@@ -0,0 +1,64 @@
+Subject: mm-vmalloc.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 12 Jul 2011 11:39:36 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/vmalloc.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -782,7 +782,7 @@ static struct vmap_block *new_vmap_block
+ struct vmap_block *vb;
+ struct vmap_area *va;
+ unsigned long vb_idx;
+- int node, err;
++ int node, err, cpu;
+
+ node = numa_node_id();
+
+@@ -821,12 +821,13 @@ static struct vmap_block *new_vmap_block
+ BUG_ON(err);
+ radix_tree_preload_end();
+
+- vbq = &get_cpu_var(vmap_block_queue);
++ cpu = get_cpu_light();
++ vbq = &__get_cpu_var(vmap_block_queue);
+ vb->vbq = vbq;
+ spin_lock(&vbq->lock);
+ list_add_rcu(&vb->free_list, &vbq->free);
+ spin_unlock(&vbq->lock);
+- put_cpu_var(vmap_block_queue);
++ put_cpu_light();
+
+ return vb;
+ }
+@@ -900,7 +901,7 @@ static void *vb_alloc(unsigned long size
+ struct vmap_block *vb;
+ unsigned long addr = 0;
+ unsigned int order;
+- int purge = 0;
++ int purge = 0, cpu;
+
+ BUG_ON(size & ~PAGE_MASK);
+ BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+@@ -916,7 +917,8 @@ static void *vb_alloc(unsigned long size
+
+ again:
+ rcu_read_lock();
+- vbq = &get_cpu_var(vmap_block_queue);
++ cpu = get_cpu_light();
++ vbq = &__get_cpu_var(vmap_block_queue);
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+ int i;
+
+@@ -953,7 +955,7 @@ next:
+ if (purge)
+ purge_fragmented_blocks_thiscpu();
+
+- put_cpu_var(vmap_block_queue);
++ put_cpu_light();
+ rcu_read_unlock();
+
+ if (!addr) {
diff --git a/patches/mmci-remove-bogus-irq-save.patch b/patches/mmci-remove-bogus-irq-save.patch
new file mode 100644
index 0000000..9250895
--- /dev/null
+++ b/patches/mmci-remove-bogus-irq-save.patch
@@ -0,0 +1,39 @@
+Subject: mmci: Remove bogus local_irq_save()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 12:11:12 +0100
+
+On !RT interrupt runs with interrupts disabled. On RT it's in a
+thread, so no need to disable interrupts at all.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/mmc/host/mmci.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+--- a/drivers/mmc/host/mmci.c
++++ b/drivers/mmc/host/mmci.c
+@@ -930,15 +930,12 @@ static irqreturn_t mmci_pio_irq(int irq,
+ struct sg_mapping_iter *sg_miter = &host->sg_miter;
+ struct variant_data *variant = host->variant;
+ void __iomem *base = host->base;
+- unsigned long flags;
+ u32 status;
+
+ status = readl(base + MMCISTATUS);
+
+ dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
+
+- local_irq_save(flags);
+-
+ do {
+ unsigned int remain, len;
+ char *buffer;
+@@ -978,8 +975,6 @@ static irqreturn_t mmci_pio_irq(int irq,
+
+ sg_miter_stop(sg_miter);
+
+- local_irq_restore(flags);
+-
+ /*
+ * If we have less than the fifo 'half-full' threshold to transfer,
+ * trigger a PIO interrupt as soon as any data is available.
diff --git a/patches/mutex-no-spin-on-rt.patch b/patches/mutex-no-spin-on-rt.patch
new file mode 100644
index 0000000..23b771d
--- /dev/null
+++ b/patches/mutex-no-spin-on-rt.patch
@@ -0,0 +1,17 @@
+Subject: mutex-no-spin-on-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:51:45 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/Kconfig.locks | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/Kconfig.locks
++++ b/kernel/Kconfig.locks
+@@ -222,4 +222,4 @@ endif
+
+ config MUTEX_SPIN_ON_OWNER
+ def_bool y
+- depends on SMP && !DEBUG_MUTEXES
++ depends on SMP && !DEBUG_MUTEXES && !PREEMPT_RT_FULL
diff --git a/patches/net-another-local-irq-disable-alloc-atomic-headache.patch b/patches/net-another-local-irq-disable-alloc-atomic-headache.patch
new file mode 100644
index 0000000..e14da83
--- /dev/null
+++ b/patches/net-another-local-irq-disable-alloc-atomic-headache.patch
@@ -0,0 +1,47 @@
+Subject: net: Another local_irq_disable/kmalloc headache
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 26 Sep 2012 16:21:08 +0200
+
+Replace it by a local lock. Though that's pretty inefficient :(
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/skbuff.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -60,6 +60,7 @@
+ #include <linux/scatterlist.h>
+ #include <linux/errqueue.h>
+ #include <linux/prefetch.h>
++#include <linux/locallock.h>
+
+ #include <net/protocol.h>
+ #include <net/dst.h>
+@@ -347,6 +348,7 @@ struct netdev_alloc_cache {
+ unsigned int pagecnt_bias;
+ };
+ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
++static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
+
+ #define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+ #define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+@@ -359,7 +361,7 @@ static void *__netdev_alloc_frag(unsigne
+ int order;
+ unsigned long flags;
+
+- local_irq_save(flags);
++ local_lock_irqsave(netdev_alloc_lock, flags);
+ nc = &__get_cpu_var(netdev_alloc_cache);
+ if (unlikely(!nc->frag.page)) {
+ refill:
+@@ -393,7 +395,7 @@ recycle:
+ nc->frag.offset += fragsz;
+ nc->pagecnt_bias--;
+ end:
+- local_irq_restore(flags);
++ local_unlock_irqrestore(netdev_alloc_lock, flags);
+ return data;
+ }
+
diff --git a/patches/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch b/patches/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch
new file mode 100644
index 0000000..4234945
--- /dev/null
+++ b/patches/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch
@@ -0,0 +1,94 @@
+Subject: net: netfilter: Serialize xt_write_recseq sections on RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 28 Oct 2012 11:18:08 +0100
+
+The netfilter code relies only on the implicit semantics of
+local_bh_disable() for serializing wt_write_recseq sections. RT breaks
+that and needs explicit serialization here.
+
+Reported-by: Peter LaDow <petela@gocougs.wsu.edu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ include/linux/locallock.h | 4 ++++
+ include/linux/netfilter/x_tables.h | 7 +++++++
+ net/netfilter/core.c | 6 ++++++
+ 3 files changed, 17 insertions(+)
+
+--- a/include/linux/locallock.h
++++ b/include/linux/locallock.h
+@@ -25,6 +25,9 @@ struct local_irq_lock {
+ DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
+ .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
+
++#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
++ DECLARE_PER_CPU(struct local_irq_lock, lvar)
++
+ #define local_irq_lock_init(lvar) \
+ do { \
+ int __cpu; \
+@@ -220,6 +223,7 @@ static inline int __local_unlock_irqrest
+ #else /* PREEMPT_RT_BASE */
+
+ #define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
++#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
+
+ static inline void local_irq_lock_init(int lvar) { }
+
+--- a/include/linux/netfilter/x_tables.h
++++ b/include/linux/netfilter/x_tables.h
+@@ -3,6 +3,7 @@
+
+
+ #include <linux/netdevice.h>
++#include <linux/locallock.h>
+ #include <uapi/linux/netfilter/x_tables.h>
+
+ /**
+@@ -284,6 +285,8 @@ extern void xt_free_table_info(struct xt
+ */
+ DECLARE_PER_CPU(seqcount_t, xt_recseq);
+
++DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
++
+ /**
+ * xt_write_recseq_begin - start of a write section
+ *
+@@ -298,6 +301,9 @@ static inline unsigned int xt_write_recs
+ {
+ unsigned int addend;
+
++ /* RT protection */
++ local_lock(xt_write_lock);
++
+ /*
+ * Low order bit of sequence is set if we already
+ * called xt_write_recseq_begin().
+@@ -328,6 +334,7 @@ static inline void xt_write_recseq_end(u
+ /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
+ smp_wmb();
+ __this_cpu_add(xt_recseq.sequence, addend);
++ local_unlock(xt_write_lock);
+ }
+
+ /*
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -20,11 +20,17 @@
+ #include <linux/proc_fs.h>
+ #include <linux/mutex.h>
+ #include <linux/slab.h>
++#include <linux/locallock.h>
+ #include <net/net_namespace.h>
+ #include <net/sock.h>
+
+ #include "nf_internals.h"
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
++EXPORT_PER_CPU_SYMBOL(xt_write_lock);
++#endif
++
+ static DEFINE_MUTEX(afinfo_mutex);
+
+ const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
diff --git a/patches/net-flip-lock-dep-thingy.patch b/patches/net-flip-lock-dep-thingy.patch
new file mode 100644
index 0000000..f4fed61
--- /dev/null
+++ b/patches/net-flip-lock-dep-thingy.patch
@@ -0,0 +1,111 @@
+Subject: net-flip-lock-dep-thingy.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 28 Jun 2011 10:59:58 +0200
+
+=======================================================
+[ INFO: possible circular locking dependency detected ]
+3.0.0-rc3+ #26
+-------------------------------------------------------
+ip/1104 is trying to acquire lock:
+ (local_softirq_lock){+.+...}, at: [<ffffffff81056d12>] __local_lock+0x25/0x68
+
+but task is already holding lock:
+ (sk_lock-AF_INET){+.+...}, at: [<ffffffff81433308>] lock_sock+0x10/0x12
+
+which lock already depends on the new lock.
+
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (sk_lock-AF_INET){+.+...}:
+ [<ffffffff810836e5>] lock_acquire+0x103/0x12e
+ [<ffffffff813e2781>] lock_sock_nested+0x82/0x92
+ [<ffffffff81433308>] lock_sock+0x10/0x12
+ [<ffffffff81433afa>] tcp_close+0x1b/0x355
+ [<ffffffff81453c99>] inet_release+0xc3/0xcd
+ [<ffffffff813dff3f>] sock_release+0x1f/0x74
+ [<ffffffff813dffbb>] sock_close+0x27/0x2b
+ [<ffffffff81129c63>] fput+0x11d/0x1e3
+ [<ffffffff81126577>] filp_close+0x70/0x7b
+ [<ffffffff8112667a>] sys_close+0xf8/0x13d
+ [<ffffffff814ae882>] system_call_fastpath+0x16/0x1b
+
+-> #0 (local_softirq_lock){+.+...}:
+ [<ffffffff81082ecc>] __lock_acquire+0xacc/0xdc8
+ [<ffffffff810836e5>] lock_acquire+0x103/0x12e
+ [<ffffffff814a7e40>] _raw_spin_lock+0x3b/0x4a
+ [<ffffffff81056d12>] __local_lock+0x25/0x68
+ [<ffffffff81056d8b>] local_bh_disable+0x36/0x3b
+ [<ffffffff814a7fc4>] _raw_write_lock_bh+0x16/0x4f
+ [<ffffffff81433c38>] tcp_close+0x159/0x355
+ [<ffffffff81453c99>] inet_release+0xc3/0xcd
+ [<ffffffff813dff3f>] sock_release+0x1f/0x74
+ [<ffffffff813dffbb>] sock_close+0x27/0x2b
+ [<ffffffff81129c63>] fput+0x11d/0x1e3
+ [<ffffffff81126577>] filp_close+0x70/0x7b
+ [<ffffffff8112667a>] sys_close+0xf8/0x13d
+ [<ffffffff814ae882>] system_call_fastpath+0x16/0x1b
+
+other info that might help us debug this:
+
+ Possible unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ lock(sk_lock-AF_INET);
+ lock(local_softirq_lock);
+ lock(sk_lock-AF_INET);
+ lock(local_softirq_lock);
+
+ *** DEADLOCK ***
+
+1 lock held by ip/1104:
+ #0: (sk_lock-AF_INET){+.+...}, at: [<ffffffff81433308>] lock_sock+0x10/0x12
+
+stack backtrace:
+Pid: 1104, comm: ip Not tainted 3.0.0-rc3+ #26
+Call Trace:
+ [<ffffffff81081649>] print_circular_bug+0x1f8/0x209
+ [<ffffffff81082ecc>] __lock_acquire+0xacc/0xdc8
+ [<ffffffff81056d12>] ? __local_lock+0x25/0x68
+ [<ffffffff810836e5>] lock_acquire+0x103/0x12e
+ [<ffffffff81056d12>] ? __local_lock+0x25/0x68
+ [<ffffffff81046c75>] ? get_parent_ip+0x11/0x41
+ [<ffffffff814a7e40>] _raw_spin_lock+0x3b/0x4a
+ [<ffffffff81056d12>] ? __local_lock+0x25/0x68
+ [<ffffffff81046c8c>] ? get_parent_ip+0x28/0x41
+ [<ffffffff81056d12>] __local_lock+0x25/0x68
+ [<ffffffff81056d8b>] local_bh_disable+0x36/0x3b
+ [<ffffffff81433308>] ? lock_sock+0x10/0x12
+ [<ffffffff814a7fc4>] _raw_write_lock_bh+0x16/0x4f
+ [<ffffffff81433c38>] tcp_close+0x159/0x355
+ [<ffffffff81453c99>] inet_release+0xc3/0xcd
+ [<ffffffff813dff3f>] sock_release+0x1f/0x74
+ [<ffffffff813dffbb>] sock_close+0x27/0x2b
+ [<ffffffff81129c63>] fput+0x11d/0x1e3
+ [<ffffffff81126577>] filp_close+0x70/0x7b
+ [<ffffffff8112667a>] sys_close+0xf8/0x13d
+ [<ffffffff814ae882>] system_call_fastpath+0x16/0x1b
+
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/sock.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2287,12 +2287,11 @@ void lock_sock_nested(struct sock *sk, i
+ if (sk->sk_lock.owned)
+ __lock_sock(sk);
+ sk->sk_lock.owned = 1;
+- spin_unlock(&sk->sk_lock.slock);
++ spin_unlock_bh(&sk->sk_lock.slock);
+ /*
+ * The sk_lock has mutex_lock() semantics here:
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+- local_bh_enable();
+ }
+ EXPORT_SYMBOL(lock_sock_nested);
+
diff --git a/patches/net-make-devnet_rename_seq-a-mutex.patch b/patches/net-make-devnet_rename_seq-a-mutex.patch
new file mode 100644
index 0000000..f4f675a
--- /dev/null
+++ b/patches/net-make-devnet_rename_seq-a-mutex.patch
@@ -0,0 +1,150 @@
+From b83c29516a334722b4c38dbc2bc3caf58ce46b88 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 20 Mar 2013 18:06:20 +0100
+Subject: [PATCH] net: make devnet_rename_seq a mutex
+
+On RT write_seqcount_begin() disables preemption and device_rename()
+allocates memory with GFP_KERNEL and grabs later the sysfs_mutex mutex.
+Since I don't see a reason why this can't be a mutex, make it one. We
+probably don't have that much reads at the same time in the hot path.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/netdevice.h | 2 +-
+ net/core/dev.c | 23 +++++++++++------------
+ net/core/sock.c | 8 +++-----
+ 3 files changed, 15 insertions(+), 18 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1579,7 +1579,7 @@ extern int call_netdevice_notifiers(unsi
+
+ extern rwlock_t dev_base_lock; /* Device list lock */
+
+-extern seqcount_t devnet_rename_seq; /* Device rename seq */
++extern struct mutex devnet_rename_mutex;
+
+
+ #define for_each_netdev(net, d) \
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -203,7 +203,7 @@ static struct list_head offload_base __r
+ DEFINE_RWLOCK(dev_base_lock);
+ EXPORT_SYMBOL(dev_base_lock);
+
+-seqcount_t devnet_rename_seq;
++DEFINE_MUTEX(devnet_rename_mutex);
+
+ static inline void dev_base_seq_inc(struct net *net)
+ {
+@@ -1093,10 +1093,11 @@ int dev_change_name(struct net_device *d
+ if (dev->flags & IFF_UP)
+ return -EBUSY;
+
+- write_seqcount_begin(&devnet_rename_seq);
++
++ mutex_lock(&devnet_rename_mutex);
+
+ if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
+- write_seqcount_end(&devnet_rename_seq);
++ mutex_unlock(&devnet_rename_mutex);
+ return 0;
+ }
+
+@@ -1104,7 +1105,7 @@ int dev_change_name(struct net_device *d
+
+ err = dev_get_valid_name(net, dev, newname);
+ if (err < 0) {
+- write_seqcount_end(&devnet_rename_seq);
++ mutex_unlock(&devnet_rename_mutex);
+ return err;
+ }
+
+@@ -1112,11 +1113,11 @@ rollback:
+ ret = device_rename(&dev->dev, dev->name);
+ if (ret) {
+ memcpy(dev->name, oldname, IFNAMSIZ);
+- write_seqcount_end(&devnet_rename_seq);
++ mutex_unlock(&devnet_rename_mutex);
+ return ret;
+ }
+
+- write_seqcount_end(&devnet_rename_seq);
++ mutex_unlock(&devnet_rename_mutex);
+
+ write_lock_bh(&dev_base_lock);
+ hlist_del_rcu(&dev->name_hlist);
+@@ -1135,7 +1136,7 @@ rollback:
+ /* err >= 0 after dev_alloc_name() or stores the first errno */
+ if (err >= 0) {
+ err = ret;
+- write_seqcount_begin(&devnet_rename_seq);
++ mutex_lock(&devnet_rename_mutex);
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ goto rollback;
+ } else {
+@@ -4214,7 +4215,6 @@ static int dev_ifname(struct net *net, s
+ {
+ struct net_device *dev;
+ struct ifreq ifr;
+- unsigned seq;
+
+ /*
+ * Fetch the caller's info block.
+@@ -4223,19 +4223,18 @@ static int dev_ifname(struct net *net, s
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
+
+-retry:
+- seq = read_seqcount_begin(&devnet_rename_seq);
++ mutex_lock(&devnet_rename_mutex);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
+ if (!dev) {
+ rcu_read_unlock();
++ mutex_unlock(&devnet_rename_mutex);
+ return -ENODEV;
+ }
+
+ strcpy(ifr.ifr_name, dev->name);
+ rcu_read_unlock();
+- if (read_seqcount_retry(&devnet_rename_seq, seq))
+- goto retry;
++ mutex_unlock(&devnet_rename_mutex);
+
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -571,7 +571,6 @@ static int sock_getbindtodevice(struct s
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+ char devname[IFNAMSIZ];
+- unsigned seq;
+
+ if (sk->sk_bound_dev_if == 0) {
+ len = 0;
+@@ -582,20 +581,19 @@ static int sock_getbindtodevice(struct s
+ if (len < IFNAMSIZ)
+ goto out;
+
+-retry:
+- seq = read_seqcount_begin(&devnet_rename_seq);
++ mutex_lock(&devnet_rename_mutex);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ ret = -ENODEV;
+ if (!dev) {
+ rcu_read_unlock();
++ mutex_unlock(&devnet_rename_mutex);
+ goto out;
+ }
+
+ strcpy(devname, dev->name);
+ rcu_read_unlock();
+- if (read_seqcount_retry(&devnet_rename_seq, seq))
+- goto retry;
++ mutex_unlock(&devnet_rename_mutex);
+
+ len = strlen(devname) + 1;
+
diff --git a/patches/net-netif-rx-ni-use-local-bh-disable.patch b/patches/net-netif-rx-ni-use-local-bh-disable.patch
new file mode 100644
index 0000000..5d96fc4
--- /dev/null
+++ b/patches/net-netif-rx-ni-use-local-bh-disable.patch
@@ -0,0 +1,31 @@
+Subject: net: Use local_bh_disable in netif_rx_ni()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 28 Oct 2012 15:12:49 +0000
+
+This code triggers the new WARN in __raise_softirq_irqsoff() though it
+actually looks at the softirq pending bit and calls into the softirq
+code, but that fits not well with the context related softirq model of
+RT. It's correct on mainline though, but going through
+local_bh_disable/enable here is not going to hurt badly.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/dev.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3118,11 +3118,9 @@ int netif_rx_ni(struct sk_buff *skb)
+ {
+ int err;
+
+- migrate_disable();
++ local_bh_disable();
+ err = netif_rx(skb);
+- if (local_softirq_pending())
+- thread_do_softirq();
+- migrate_enable();
++ local_bh_enable();
+
+ return err;
+ }
diff --git a/patches/net-netif_rx_ni-migrate-disable.patch b/patches/net-netif_rx_ni-migrate-disable.patch
new file mode 100644
index 0000000..b5b5e75
--- /dev/null
+++ b/patches/net-netif_rx_ni-migrate-disable.patch
@@ -0,0 +1,25 @@
+Subject: net-netif_rx_ni-migrate-disable.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 16:29:27 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/core/dev.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3115,11 +3115,11 @@ int netif_rx_ni(struct sk_buff *skb)
+ {
+ int err;
+
+- preempt_disable();
++ migrate_disable();
+ err = netif_rx(skb);
+ if (local_softirq_pending())
+ thread_do_softirq();
+- preempt_enable();
++ migrate_enable();
+
+ return err;
+ }
diff --git a/patches/net-tx-action-avoid-livelock-on-rt.patch b/patches/net-tx-action-avoid-livelock-on-rt.patch
new file mode 100644
index 0000000..92a988a
--- /dev/null
+++ b/patches/net-tx-action-avoid-livelock-on-rt.patch
@@ -0,0 +1,92 @@
+Subject: net: Avoid livelock in net_tx_action() on RT
+From: Steven Rostedt <srostedt@redhat.com>
+Date: Thu, 06 Oct 2011 10:48:39 -0400
+
+qdisc_lock is taken w/o disabling interrupts or bottom halfs. So code
+holding a qdisc_lock() can be interrupted and softirqs can run on the
+return of interrupt in !RT.
+
+The spin_trylock() in net_tx_action() makes sure, that the softirq
+does not deadlock. When the lock can't be acquired q is requeued and
+the NET_TX softirq is raised. That causes the softirq to run over and
+over.
+
+That works in mainline as do_softirq() has a retry loop limit and
+leaves the softirq processing in the interrupt return path and
+schedules ksoftirqd. The task which holds qdisc_lock cannot be
+preempted, so the lock is released and either ksoftirqd or the next
+softirq in the return from interrupt path can proceed. Though it's a
+bit strange to actually run MAX_SOFTIRQ_RESTART (10) loops before it
+decides to bail out even if it's clear in the first iteration :)
+
+On RT all softirq processing is done in a FIFO thread and we don't
+have a loop limit, so ksoftirqd preempts the lock holder forever and
+unqueues and requeues until the reset button is hit.
+
+Due to the forced threading of ksoftirqd on RT we actually cannot
+deadlock on qdisc_lock because it's a "sleeping lock". So it's safe to
+replace the spin_trylock() with a spin_lock(). When contended,
+ksoftirqd is scheduled out and the lock holder can proceed.
+
+[ tglx: Massaged changelog and code comments ]
+
+Solved-by: Thomas Gleixner <tglx@linuxtronix.de>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Tested-by: Carsten Emde <cbe@osadl.org>
+Cc: Clark Williams <williams@redhat.com>
+Cc: John Kacur <jkacur@redhat.com>
+Cc: Luis Claudio R. Goncalves <lclaudio@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ net/core/dev.c | 32 +++++++++++++++++++++++++++++++-
+ 1 file changed, 31 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3125,6 +3125,36 @@ int netif_rx_ni(struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL(netif_rx_ni);
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++/*
++ * RT runs ksoftirqd as a real time thread and the root_lock is a
++ * "sleeping spinlock". If the trylock fails then we can go into an
++ * infinite loop when ksoftirqd preempted the task which actually
++ * holds the lock, because we requeue q and raise NET_TX softirq
++ * causing ksoftirqd to loop forever.
++ *
++ * It's safe to use spin_lock on RT here as softirqs run in thread
++ * context and cannot deadlock against the thread which is holding
++ * root_lock.
++ *
++ * On !RT the trylock might fail, but there we bail out from the
++ * softirq loop after 10 attempts which we can't do on RT. And the
++ * task holding root_lock cannot be preempted, so the only downside of
++ * that trylock is that we need 10 loops to decide that we should have
++ * given up in the first one :)
++ */
++static inline int take_root_lock(spinlock_t *lock)
++{
++ spin_lock(lock);
++ return 1;
++}
++#else
++static inline int take_root_lock(spinlock_t *lock)
++{
++ return spin_trylock(lock);
++}
++#endif
++
+ static void net_tx_action(struct softirq_action *h)
+ {
+ struct softnet_data *sd = &__get_cpu_var(softnet_data);
+@@ -3163,7 +3193,7 @@ static void net_tx_action(struct softirq
+ head = head->next_sched;
+
+ root_lock = qdisc_lock(q);
+- if (spin_trylock(root_lock)) {
++ if (take_root_lock(root_lock)) {
+ smp_mb__before_clear_bit();
+ clear_bit(__QDISC_STATE_SCHED,
+ &q->state);
diff --git a/patches/net-use-cpu-chill.patch b/patches/net-use-cpu-chill.patch
new file mode 100644
index 0000000..d0451fb
--- /dev/null
+++ b/patches/net-use-cpu-chill.patch
@@ -0,0 +1,62 @@
+Subject: net: Use cpu_chill() instead of cpu_relax()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 07 Mar 2012 21:10:04 +0100
+
+Retry loops on RT might loop forever when the modifying side was
+preempted. Use cpu_chill() instead of cpu_relax() to let the system
+make progress.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ net/packet/af_packet.c | 5 +++--
+ net/rds/ib_rdma.c | 3 ++-
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -88,6 +88,7 @@
+ #include <linux/virtio_net.h>
+ #include <linux/errqueue.h>
+ #include <linux/net_tstamp.h>
++#include <linux/delay.h>
+
+ #ifdef CONFIG_INET
+ #include <net/inet_common.h>
+@@ -553,7 +554,7 @@ static void prb_retire_rx_blk_timer_expi
+ if (BLOCK_NUM_PKTS(pbd)) {
+ while (atomic_read(&pkc->blk_fill_in_prog)) {
+ /* Waiting for skb_copy_bits to finish... */
+- cpu_relax();
++ cpu_chill();
+ }
+ }
+
+@@ -807,7 +808,7 @@ static void prb_retire_current_block(str
+ if (!(status & TP_STATUS_BLK_TMO)) {
+ while (atomic_read(&pkc->blk_fill_in_prog)) {
+ /* Waiting for skb_copy_bits to finish... */
+- cpu_relax();
++ cpu_chill();
+ }
+ }
+ prb_close_block(pkc, pbd, po, status);
+--- a/net/rds/ib_rdma.c
++++ b/net/rds/ib_rdma.c
+@@ -34,6 +34,7 @@
+ #include <linux/slab.h>
+ #include <linux/rculist.h>
+ #include <linux/llist.h>
++#include <linux/delay.h>
+
+ #include "rds.h"
+ #include "ib.h"
+@@ -286,7 +287,7 @@ static inline void wait_clean_list_grace
+ for_each_online_cpu(cpu) {
+ flag = &per_cpu(clean_list_grace, cpu);
+ while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
+- cpu_relax();
++ cpu_chill();
+ }
+ }
+
diff --git a/patches/net-use-cpu-light-in-ip-send-unicast-reply.patch b/patches/net-use-cpu-light-in-ip-send-unicast-reply.patch
new file mode 100644
index 0000000..c934904
--- /dev/null
+++ b/patches/net-use-cpu-light-in-ip-send-unicast-reply.patch
@@ -0,0 +1,30 @@
+Subject: net: Use get_cpu_light() in ip_send_unicast_reply()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 01 Oct 2012 17:12:35 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/ipv4/ip_output.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1508,7 +1508,8 @@ void ip_send_unicast_reply(struct net *n
+ if (IS_ERR(rt))
+ return;
+
+- inet = &get_cpu_var(unicast_sock);
++ get_cpu_light();
++ inet = &__get_cpu_var(unicast_sock);
+
+ inet->tos = arg->tos;
+ sk = &inet->sk;
+@@ -1532,7 +1533,7 @@ void ip_send_unicast_reply(struct net *n
+ ip_push_pending_frames(sk, &fl4);
+ }
+
+- put_cpu_var(unicast_sock);
++ put_cpu_light();
+
+ ip_rt_put(rt);
+ }
diff --git a/patches/net-wireless-warn-nort.patch b/patches/net-wireless-warn-nort.patch
new file mode 100644
index 0000000..005ecd7
--- /dev/null
+++ b/patches/net-wireless-warn-nort.patch
@@ -0,0 +1,20 @@
+Subject: net-wireless-warn-nort.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 21 Jul 2011 21:05:33 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ net/mac80211/rx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -3144,7 +3144,7 @@ void ieee80211_rx(struct ieee80211_hw *h
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+
+- WARN_ON_ONCE(softirq_count() == 0);
++ WARN_ON_ONCE_NONRT(softirq_count() == 0);
+
+ if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
+ goto drop;
diff --git a/patches/ntp-make-ntp-lock-raw-sigh.patch b/patches/ntp-make-ntp-lock-raw-sigh.patch
new file mode 100644
index 0000000..a3ab0e7
--- /dev/null
+++ b/patches/ntp-make-ntp-lock-raw-sigh.patch
@@ -0,0 +1,125 @@
+Subject: ntp: Make ntp_lock raw.
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 10 Apr 2012 11:14:55 +0200
+
+This needs to be revisited. Not sure whether we can avoid to make this
+lock raw, but it'd really like to.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/time/ntp.c | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/kernel/time/ntp.c
++++ b/kernel/time/ntp.c
+@@ -22,7 +22,7 @@
+ * NTP timekeeping variables:
+ */
+
+-DEFINE_SPINLOCK(ntp_lock);
++DEFINE_RAW_SPINLOCK(ntp_lock);
+
+
+ /* USER_HZ period (usecs): */
+@@ -347,7 +347,7 @@ void ntp_clear(void)
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&ntp_lock, flags);
++ raw_spin_lock_irqsave(&ntp_lock, flags);
+
+ time_adjust = 0; /* stop active adjtime() */
+ time_status |= STA_UNSYNC;
+@@ -361,7 +361,7 @@ void ntp_clear(void)
+
+ /* Clear PPS state variables */
+ pps_clear();
+- spin_unlock_irqrestore(&ntp_lock, flags);
++ raw_spin_unlock_irqrestore(&ntp_lock, flags);
+
+ }
+
+@@ -371,9 +371,9 @@ u64 ntp_tick_length(void)
+ unsigned long flags;
+ s64 ret;
+
+- spin_lock_irqsave(&ntp_lock, flags);
++ raw_spin_lock_irqsave(&ntp_lock, flags);
+ ret = tick_length;
+- spin_unlock_irqrestore(&ntp_lock, flags);
++ raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ return ret;
+ }
+
+@@ -394,7 +394,7 @@ int second_overflow(unsigned long secs)
+ int leap = 0;
+ unsigned long flags;
+
+- spin_lock_irqsave(&ntp_lock, flags);
++ raw_spin_lock_irqsave(&ntp_lock, flags);
+
+ /*
+ * Leap second processing. If in leap-insert state at the end of the
+@@ -478,7 +478,7 @@ int second_overflow(unsigned long secs)
+ time_adjust = 0;
+
+ out:
+- spin_unlock_irqrestore(&ntp_lock, flags);
++ raw_spin_unlock_irqrestore(&ntp_lock, flags);
+
+ return leap;
+ }
+@@ -660,7 +660,7 @@ int do_adjtimex(struct timex *txc)
+
+ getnstimeofday(&ts);
+
+- spin_lock_irq(&ntp_lock);
++ raw_spin_lock_irq(&ntp_lock);
+
+ if (txc->modes & ADJ_ADJTIME) {
+ long save_adjust = time_adjust;
+@@ -702,7 +702,7 @@ int do_adjtimex(struct timex *txc)
+ /* fill PPS status fields */
+ pps_fill_timex(txc);
+
+- spin_unlock_irq(&ntp_lock);
++ raw_spin_unlock_irq(&ntp_lock);
+
+ txc->time.tv_sec = ts.tv_sec;
+ txc->time.tv_usec = ts.tv_nsec;
+@@ -900,7 +900,7 @@ void hardpps(const struct timespec *phas
+
+ pts_norm = pps_normalize_ts(*phase_ts);
+
+- spin_lock_irqsave(&ntp_lock, flags);
++ raw_spin_lock_irqsave(&ntp_lock, flags);
+
+ /* clear the error bits, they will be set again if needed */
+ time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
+@@ -913,7 +913,7 @@ void hardpps(const struct timespec *phas
+ * just start the frequency interval */
+ if (unlikely(pps_fbase.tv_sec == 0)) {
+ pps_fbase = *raw_ts;
+- spin_unlock_irqrestore(&ntp_lock, flags);
++ raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ return;
+ }
+
+@@ -928,7 +928,7 @@ void hardpps(const struct timespec *phas
+ time_status |= STA_PPSJITTER;
+ /* restart the frequency calibration interval */
+ pps_fbase = *raw_ts;
+- spin_unlock_irqrestore(&ntp_lock, flags);
++ raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ pr_err("hardpps: PPSJITTER: bad pulse\n");
+ return;
+ }
+@@ -945,7 +945,7 @@ void hardpps(const struct timespec *phas
+
+ hardpps_update_phase(pts_norm.nsec);
+
+- spin_unlock_irqrestore(&ntp_lock, flags);
++ raw_spin_unlock_irqrestore(&ntp_lock, flags);
+ }
+ EXPORT_SYMBOL(hardpps);
+
diff --git a/patches/of-convert-devtree-lock-from-rw_lock-to-raw-spinlock.patch b/patches/of-convert-devtree-lock-from-rw_lock-to-raw-spinlock.patch
new file mode 100644
index 0000000..38caa00
--- /dev/null
+++ b/patches/of-convert-devtree-lock-from-rw_lock-to-raw-spinlock.patch
@@ -0,0 +1,449 @@
+Subject: OF: Convert devtree lock from rw_lock to raw spinlock
+From: Paul Gortmaker <paul.gortmaker@windriver.com>
+Date: Wed, 6 Feb 2013 15:30:56 -0500
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+With the locking cleanup in place (from "OF: Fixup resursive
+locking code paths"), we can now do the conversion from the
+rw_lock to a raw spinlock as required for preempt-rt.
+
+The previous cleanup and this conversion were originally
+separate since they predated when mainline got raw spinlock (in
+commit c2f21ce2e31286a "locking: Implement new raw_spinlock").
+
+So, at that point in time, the cleanup was considered plausible
+for mainline, but not this conversion. In any case, we've kept
+them separate as it makes for easier review and better bisection.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Grant Likely <grant.likely@secretlab.ca>
+Cc: Sam Ravnborg <sam@ravnborg.org>
+Cc: <devicetree-discuss@lists.ozlabs.org>
+Cc: Rob Herring <rob.herring@calxeda.com>
+Link: http://lkml.kernel.org/r/1360182656-15898-1-git-send-email-paul.gortmaker@windriver.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[PG: taken from preempt-rt, update subject & add a commit log]
+Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
+---
+
+[v2: recent commit e81b329 ("powerpc+of: Add /proc device tree
+ updating to of node add/remove") added two more instances of
+ write_unlock that also needed converting to raw_spin_unlock.
+ Retested (boot) on sbc8548, defconfig builds on arm/sparc; no
+ new warnings observed.]
+
+ arch/sparc/kernel/prom_common.c | 4 -
+ drivers/of/base.c | 100 ++++++++++++++++++++++------------------
+ include/linux/of.h | 2
+ 3 files changed, 59 insertions(+), 47 deletions(-)
+
+--- a/arch/sparc/kernel/prom_common.c
++++ b/arch/sparc/kernel/prom_common.c
+@@ -64,7 +64,7 @@ int of_set_property(struct device_node *
+ err = -ENODEV;
+
+ mutex_lock(&of_set_property_mutex);
+- write_lock(&devtree_lock);
++ raw_spin_lock(&devtree_lock);
+ prevp = &dp->properties;
+ while (*prevp) {
+ struct property *prop = *prevp;
+@@ -91,7 +91,7 @@ int of_set_property(struct device_node *
+ }
+ prevp = &(*prevp)->next;
+ }
+- write_unlock(&devtree_lock);
++ raw_spin_unlock(&devtree_lock);
+ mutex_unlock(&of_set_property_mutex);
+
+ /* XXX Upate procfs if necessary... */
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -55,7 +55,7 @@ static DEFINE_MUTEX(of_aliases_mutex);
+ /* use when traversing tree through the allnext, child, sibling,
+ * or parent members of struct device_node.
+ */
+-DEFINE_RWLOCK(devtree_lock);
++DEFINE_RAW_SPINLOCK(devtree_lock);
+
+ int of_n_addr_cells(struct device_node *np)
+ {
+@@ -188,10 +188,11 @@ struct property *of_find_property(const
+ int *lenp)
+ {
+ struct property *pp;
++ unsigned long flags;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ pp = __of_find_property(np, name, lenp);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ return pp;
+ }
+@@ -209,13 +210,13 @@ struct device_node *of_find_all_nodes(st
+ {
+ struct device_node *np;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock(&devtree_lock);
+ np = prev ? prev->allnext : of_allnodes;
+ for (; np != NULL; np = np->allnext)
+ if (of_node_get(np))
+ break;
+ of_node_put(prev);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock(&devtree_lock);
+ return np;
+ }
+ EXPORT_SYMBOL(of_find_all_nodes);
+@@ -274,11 +275,12 @@ static int __of_device_is_compatible(con
+ int of_device_is_compatible(const struct device_node *device,
+ const char *compat)
+ {
++ unsigned long flags;
+ int res;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ res = __of_device_is_compatible(device, compat);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return res;
+ }
+ EXPORT_SYMBOL(of_device_is_compatible);
+@@ -340,13 +342,14 @@ EXPORT_SYMBOL(of_device_is_available);
+ struct device_node *of_get_parent(const struct device_node *node)
+ {
+ struct device_node *np;
++ unsigned long flags;
+
+ if (!node)
+ return NULL;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ np = of_node_get(node->parent);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return np;
+ }
+ EXPORT_SYMBOL(of_get_parent);
+@@ -365,14 +368,15 @@ EXPORT_SYMBOL(of_get_parent);
+ struct device_node *of_get_next_parent(struct device_node *node)
+ {
+ struct device_node *parent;
++ unsigned long flags;
+
+ if (!node)
+ return NULL;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ parent = of_node_get(node->parent);
+ of_node_put(node);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return parent;
+ }
+
+@@ -388,14 +392,15 @@ struct device_node *of_get_next_child(co
+ struct device_node *prev)
+ {
+ struct device_node *next;
++ unsigned long flags;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ next = prev ? prev->sibling : node->child;
+ for (; next; next = next->sibling)
+ if (of_node_get(next))
+ break;
+ of_node_put(prev);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return next;
+ }
+ EXPORT_SYMBOL(of_get_next_child);
+@@ -413,7 +418,7 @@ struct device_node *of_get_next_availabl
+ {
+ struct device_node *next;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock(&devtree_lock);
+ next = prev ? prev->sibling : node->child;
+ for (; next; next = next->sibling) {
+ if (!of_device_is_available(next))
+@@ -422,7 +427,7 @@ struct device_node *of_get_next_availabl
+ break;
+ }
+ of_node_put(prev);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock(&devtree_lock);
+ return next;
+ }
+ EXPORT_SYMBOL(of_get_next_available_child);
+@@ -460,14 +465,15 @@ EXPORT_SYMBOL(of_get_child_by_name);
+ struct device_node *of_find_node_by_path(const char *path)
+ {
+ struct device_node *np = of_allnodes;
++ unsigned long flags;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ for (; np; np = np->allnext) {
+ if (np->full_name && (of_node_cmp(np->full_name, path) == 0)
+ && of_node_get(np))
+ break;
+ }
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return np;
+ }
+ EXPORT_SYMBOL(of_find_node_by_path);
+@@ -487,15 +493,16 @@ struct device_node *of_find_node_by_name
+ const char *name)
+ {
+ struct device_node *np;
++ unsigned long flags;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ np = from ? from->allnext : of_allnodes;
+ for (; np; np = np->allnext)
+ if (np->name && (of_node_cmp(np->name, name) == 0)
+ && of_node_get(np))
+ break;
+ of_node_put(from);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return np;
+ }
+ EXPORT_SYMBOL(of_find_node_by_name);
+@@ -516,15 +523,16 @@ struct device_node *of_find_node_by_type
+ const char *type)
+ {
+ struct device_node *np;
++ unsigned long flags;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ np = from ? from->allnext : of_allnodes;
+ for (; np; np = np->allnext)
+ if (np->type && (of_node_cmp(np->type, type) == 0)
+ && of_node_get(np))
+ break;
+ of_node_put(from);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return np;
+ }
+ EXPORT_SYMBOL(of_find_node_by_type);
+@@ -547,8 +555,9 @@ struct device_node *of_find_compatible_n
+ const char *type, const char *compatible)
+ {
+ struct device_node *np;
++ unsigned long flags;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ np = from ? from->allnext : of_allnodes;
+ for (; np; np = np->allnext) {
+ if (type
+@@ -559,7 +568,7 @@ struct device_node *of_find_compatible_n
+ break;
+ }
+ of_node_put(from);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return np;
+ }
+ EXPORT_SYMBOL(of_find_compatible_node);
+@@ -581,8 +590,9 @@ struct device_node *of_find_node_with_pr
+ {
+ struct device_node *np;
+ struct property *pp;
++ unsigned long flags;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ np = from ? from->allnext : of_allnodes;
+ for (; np; np = np->allnext) {
+ for (pp = np->properties; pp; pp = pp->next) {
+@@ -594,7 +604,7 @@ struct device_node *of_find_node_with_pr
+ }
+ out:
+ of_node_put(from);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return np;
+ }
+ EXPORT_SYMBOL(of_find_node_with_property);
+@@ -635,10 +645,11 @@ const struct of_device_id *of_match_node
+ const struct device_node *node)
+ {
+ const struct of_device_id *match;
++ unsigned long flags;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ match = __of_match_node(matches, node);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return match;
+ }
+ EXPORT_SYMBOL(of_match_node);
+@@ -661,11 +672,12 @@ struct device_node *of_find_matching_nod
+ const struct of_device_id **match)
+ {
+ struct device_node *np;
++ unsigned long flags;
+
+ if (match)
+ *match = NULL;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ np = from ? from->allnext : of_allnodes;
+ for (; np; np = np->allnext) {
+ if (__of_match_node(matches, np) && of_node_get(np)) {
+@@ -675,7 +687,7 @@ struct device_node *of_find_matching_nod
+ }
+ }
+ of_node_put(from);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return np;
+ }
+ EXPORT_SYMBOL(of_find_matching_node_and_match);
+@@ -718,12 +730,12 @@ struct device_node *of_find_node_by_phan
+ {
+ struct device_node *np;
+
+- read_lock(&devtree_lock);
++ raw_spin_lock(&devtree_lock);
+ for (np = of_allnodes; np; np = np->allnext)
+ if (np->phandle == handle)
+ break;
+ of_node_get(np);
+- read_unlock(&devtree_lock);
++ raw_spin_unlock(&devtree_lock);
+ return np;
+ }
+ EXPORT_SYMBOL(of_find_node_by_phandle);
+@@ -1195,18 +1207,18 @@ int of_add_property(struct device_node *
+ return rc;
+
+ prop->next = NULL;
+- write_lock_irqsave(&devtree_lock, flags);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ next = &np->properties;
+ while (*next) {
+ if (strcmp(prop->name, (*next)->name) == 0) {
+ /* duplicate ! don't insert it */
+- write_unlock_irqrestore(&devtree_lock, flags);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return -1;
+ }
+ next = &(*next)->next;
+ }
+ *next = prop;
+- write_unlock_irqrestore(&devtree_lock, flags);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ #ifdef CONFIG_PROC_DEVICETREE
+ /* try to add to proc as well if it was initialized */
+@@ -1236,7 +1248,7 @@ int of_remove_property(struct device_nod
+ if (rc)
+ return rc;
+
+- write_lock_irqsave(&devtree_lock, flags);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ next = &np->properties;
+ while (*next) {
+ if (*next == prop) {
+@@ -1249,7 +1261,7 @@ int of_remove_property(struct device_nod
+ }
+ next = &(*next)->next;
+ }
+- write_unlock_irqrestore(&devtree_lock, flags);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ if (!found)
+ return -ENODEV;
+@@ -1289,7 +1301,7 @@ int of_update_property(struct device_nod
+ if (!oldprop)
+ return of_add_property(np, newprop);
+
+- write_lock_irqsave(&devtree_lock, flags);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ next = &np->properties;
+ while (*next) {
+ if (*next == oldprop) {
+@@ -1303,7 +1315,7 @@ int of_update_property(struct device_nod
+ }
+ next = &(*next)->next;
+ }
+- write_unlock_irqrestore(&devtree_lock, flags);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ if (!found)
+ return -ENODEV;
+@@ -1376,12 +1388,12 @@ int of_attach_node(struct device_node *n
+ if (rc)
+ return rc;
+
+- write_lock_irqsave(&devtree_lock, flags);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+ np->sibling = np->parent->child;
+ np->allnext = of_allnodes;
+ np->parent->child = np;
+ of_allnodes = np;
+- write_unlock_irqrestore(&devtree_lock, flags);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ of_add_proc_dt_entry(np);
+ return 0;
+@@ -1424,17 +1436,17 @@ int of_detach_node(struct device_node *n
+ if (rc)
+ return rc;
+
+- write_lock_irqsave(&devtree_lock, flags);
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+
+ if (of_node_check_flag(np, OF_DETACHED)) {
+ /* someone already detached it */
+- write_unlock_irqrestore(&devtree_lock, flags);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return rc;
+ }
+
+ parent = np->parent;
+ if (!parent) {
+- write_unlock_irqrestore(&devtree_lock, flags);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return rc;
+ }
+
+@@ -1461,7 +1473,7 @@ int of_detach_node(struct device_node *n
+ }
+
+ of_node_set_flag(np, OF_DETACHED);
+- write_unlock_irqrestore(&devtree_lock, flags);
++ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+ of_remove_proc_dt_entry(np);
+ return rc;
+--- a/include/linux/of.h
++++ b/include/linux/of.h
+@@ -92,7 +92,7 @@ static inline void of_node_put(struct de
+ extern struct device_node *of_allnodes;
+ extern struct device_node *of_chosen;
+ extern struct device_node *of_aliases;
+-extern rwlock_t devtree_lock;
++extern raw_spinlock_t devtree_lock;
+
+ static inline bool of_have_populated_dt(void)
+ {
diff --git a/patches/of-fixup-resursive-locking-code-paths.patch b/patches/of-fixup-resursive-locking-code-paths.patch
new file mode 100644
index 0000000..9736d92
--- /dev/null
+++ b/patches/of-fixup-resursive-locking-code-paths.patch
@@ -0,0 +1,209 @@
+Subject: OF: Fixup resursive locking code paths
+From: Paul Gortmaker <paul.gortmaker@windriver.com>
+Date: Fri, 25 Jan 2013 13:21:47 -0500
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+There is no real reason to use a rwlock for devtree_lock. It even
+could be a mutex, but unfortunately it's locked from cpu hotplug
+paths which can't schedule :(
+
+So it needs to become a raw lock on rt as well. The devtree_lock would
+be the only user of a raw_rw_lock, so we are better off cleaning up the
+recursive locking paths which allows us to convert devtree_lock to a
+read_lock.
+
+Here we do the standard thing of introducing __foo() as the "raw"
+version of foo(), so that we can take better control of the locking.
+The "raw" versions are not exported and are for internal use within
+the file itself.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
+Cc: devicetree-discuss@lists.ozlabs.org
+Cc: Grant Likely <grant.likely@secretlab.ca>
+Cc: Rob Herring <rob.herring@calxeda.com>
+Link: http://lkml.kernel.org/r/1359138107-14159-1-git-send-email-paul.gortmaker@windriver.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+
+[This has been living in the RT tree for several releases, and I've
+ put it on top of 3.8-rc4 mainline and tested it independently there
+ on a ppc sbc8548 board as well. So it would be nice to get this in 3.9]
+
+ drivers/of/base.c | 91 +++++++++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 70 insertions(+), 21 deletions(-)
+
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -164,16 +164,14 @@ void of_node_put(struct device_node *nod
+ EXPORT_SYMBOL(of_node_put);
+ #endif /* CONFIG_OF_DYNAMIC */
+
+-struct property *of_find_property(const struct device_node *np,
+- const char *name,
+- int *lenp)
++static struct property *__of_find_property(const struct device_node *np,
++ const char *name, int *lenp)
+ {
+ struct property *pp;
+
+ if (!np)
+ return NULL;
+
+- read_lock(&devtree_lock);
+ for (pp = np->properties; pp; pp = pp->next) {
+ if (of_prop_cmp(pp->name, name) == 0) {
+ if (lenp)
+@@ -181,6 +179,18 @@ struct property *of_find_property(const
+ break;
+ }
+ }
++
++ return pp;
++}
++
++struct property *of_find_property(const struct device_node *np,
++ const char *name,
++ int *lenp)
++{
++ struct property *pp;
++
++ read_lock(&devtree_lock);
++ pp = __of_find_property(np, name, lenp);
+ read_unlock(&devtree_lock);
+
+ return pp;
+@@ -214,8 +224,20 @@ EXPORT_SYMBOL(of_find_all_nodes);
+ * Find a property with a given name for a given node
+ * and return the value.
+ */
++static const void *__of_get_property(const struct device_node *np,
++ const char *name, int *lenp)
++{
++ struct property *pp = __of_find_property(np, name, lenp);
++
++ return pp ? pp->value : NULL;
++}
++
++/*
++ * Find a property with a given name for a given node
++ * and return the value.
++ */
+ const void *of_get_property(const struct device_node *np, const char *name,
+- int *lenp)
++ int *lenp)
+ {
+ struct property *pp = of_find_property(np, name, lenp);
+
+@@ -226,13 +248,13 @@ EXPORT_SYMBOL(of_get_property);
+ /** Checks if the given "compat" string matches one of the strings in
+ * the device's "compatible" property
+ */
+-int of_device_is_compatible(const struct device_node *device,
+- const char *compat)
++static int __of_device_is_compatible(const struct device_node *device,
++ const char *compat)
+ {
+ const char* cp;
+ int cplen, l;
+
+- cp = of_get_property(device, "compatible", &cplen);
++ cp = __of_get_property(device, "compatible", &cplen);
+ if (cp == NULL)
+ return 0;
+ while (cplen > 0) {
+@@ -245,6 +267,20 @@ int of_device_is_compatible(const struct
+
+ return 0;
+ }
++
++/** Checks if the given "compat" string matches one of the strings in
++ * the device's "compatible" property
++ */
++int of_device_is_compatible(const struct device_node *device,
++ const char *compat)
++{
++ int res;
++
++ read_lock(&devtree_lock);
++ res = __of_device_is_compatible(device, compat);
++ read_unlock(&devtree_lock);
++ return res;
++}
+ EXPORT_SYMBOL(of_device_is_compatible);
+
+ /**
+@@ -518,7 +554,8 @@ struct device_node *of_find_compatible_n
+ if (type
+ && !(np->type && (of_node_cmp(np->type, type) == 0)))
+ continue;
+- if (of_device_is_compatible(np, compatible) && of_node_get(np))
++ if (__of_device_is_compatible(np, compatible) &&
++ of_node_get(np))
+ break;
+ }
+ of_node_put(from);
+@@ -562,15 +599,9 @@ out:
+ }
+ EXPORT_SYMBOL(of_find_node_with_property);
+
+-/**
+- * of_match_node - Tell if an device_node has a matching of_match structure
+- * @matches: array of of device match structures to search in
+- * @node: the of device structure to match against
+- *
+- * Low level utility function used by device matching.
+- */
+-const struct of_device_id *of_match_node(const struct of_device_id *matches,
+- const struct device_node *node)
++static
++const struct of_device_id *__of_match_node(const struct of_device_id *matches,
++ const struct device_node *node)
+ {
+ if (!matches)
+ return NULL;
+@@ -584,14 +615,32 @@ const struct of_device_id *of_match_node
+ match &= node->type
+ && !strcmp(matches->type, node->type);
+ if (matches->compatible[0])
+- match &= of_device_is_compatible(node,
+- matches->compatible);
++ match &= __of_device_is_compatible(node,
++ matches->compatible);
+ if (match)
+ return matches;
+ matches++;
+ }
+ return NULL;
+ }
++
++/**
++ * of_match_node - Tell if an device_node has a matching of_match structure
++ * @matches: array of of device match structures to search in
++ * @node: the of device structure to match against
++ *
++ * Low level utility function used by device matching.
++ */
++const struct of_device_id *of_match_node(const struct of_device_id *matches,
++ const struct device_node *node)
++{
++ const struct of_device_id *match;
++
++ read_lock(&devtree_lock);
++ match = __of_match_node(matches, node);
++ read_unlock(&devtree_lock);
++ return match;
++}
+ EXPORT_SYMBOL(of_match_node);
+
+ /**
+@@ -619,7 +668,7 @@ struct device_node *of_find_matching_nod
+ read_lock(&devtree_lock);
+ np = from ? from->allnext : of_allnodes;
+ for (; np; np = np->allnext) {
+- if (of_match_node(matches, np) && of_node_get(np)) {
++ if (__of_match_node(matches, np) && of_node_get(np)) {
+ if (match)
+ *match = matches;
+ break;
diff --git a/patches/oleg-signal-rt-fix.patch b/patches/oleg-signal-rt-fix.patch
new file mode 100644
index 0000000..b69ab2b
--- /dev/null
+++ b/patches/oleg-signal-rt-fix.patch
@@ -0,0 +1,142 @@
+From: Oleg Nesterov <oleg@redhat.com>
+Subject: signal/x86: Delay calling signals in atomic
+
+On x86_64 we must disable preemption before we enable interrupts
+for stack faults, int3 and debugging, because the current task is using
+a per CPU debug stack defined by the IST. If we schedule out, another task
+can come in and use the same stack and cause the stack to be corrupted
+and crash the kernel on return.
+
+When CONFIG_PREEMPT_RT_FULL is enabled, spin_locks become mutexes, and
+one of these is the spin lock used in signal handling.
+
+Some of the debug code (int3) causes do_trap() to send a signal.
+This function calls a spin lock that has been converted to a mutex
+and has the possibility to sleep. If this happens, the above issues with
+the corrupted stack is possible.
+
+Instead of calling the signal right away, for PREEMPT_RT and x86_64,
+the signal information is stored on the stacks task_struct and
+TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
+code will send the signal when preemption is enabled.
+
+[ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT_FULL to
+ ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+
+ arch/x86/include/asm/signal.h | 13 +++++++++++++
+ arch/x86/kernel/signal.c | 8 ++++++++
+ include/linux/sched.h | 4 ++++
+ kernel/signal.c | 37 +++++++++++++++++++++++++++++++++++--
+ 4 files changed, 60 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/signal.h
++++ b/arch/x86/include/asm/signal.h
+@@ -23,6 +23,19 @@ typedef struct {
+ unsigned long sig[_NSIG_WORDS];
+ } sigset_t;
+
++/*
++ * Because some traps use the IST stack, we must keep preemption
++ * disabled while calling do_trap(), but do_trap() may call
++ * force_sig_info() which will grab the signal spin_locks for the
++ * task, which in PREEMPT_RT_FULL are mutexes. By defining
++ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
++ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
++ * trap.
++ */
++#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
++#define ARCH_RT_DELAYS_SIGNAL_SEND
++#endif
++
+ #ifndef CONFIG_COMPAT
+ typedef sigset_t compat_sigset_t;
+ #endif
+--- a/arch/x86/kernel/signal.c
++++ b/arch/x86/kernel/signal.c
+@@ -808,6 +808,14 @@ do_notify_resume(struct pt_regs *regs, v
+ mce_notify_process();
+ #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
+
++#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
++ if (unlikely(current->forced_info.si_signo)) {
++ struct task_struct *t = current;
++ force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
++ t->forced_info.si_signo = 0;
++ }
++#endif
++
+ if (thread_info_flags & _TIF_UPROBE)
+ uprobe_notify_resume(regs);
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1412,6 +1412,10 @@ struct task_struct {
+ sigset_t blocked, real_blocked;
+ sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
+ struct sigpending pending;
++#ifdef CONFIG_PREEMPT_RT_FULL
++ /* TODO: move me into ->restart_block ? */
++ struct siginfo forced_info;
++#endif
+
+ unsigned long sas_ss_sp;
+ size_t sas_ss_size;
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -1302,8 +1302,8 @@ int do_send_sig_info(int sig, struct sig
+ * We don't want to have recursive SIGSEGV's etc, for example,
+ * that is why we also clear SIGNAL_UNKILLABLE.
+ */
+-int
+-force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
++static int
++do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
+ {
+ unsigned long int flags;
+ int ret, blocked, ignored;
+@@ -1328,6 +1328,39 @@ force_sig_info(int sig, struct siginfo *
+ return ret;
+ }
+
++int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
++{
++/*
++ * On some archs, PREEMPT_RT has to delay sending a signal from a trap
++ * since it can not enable preemption, and the signal code's spin_locks
++ * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
++ * send the signal on exit of the trap.
++ */
++#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
++ if (in_atomic()) {
++ if (WARN_ON_ONCE(t != current))
++ return 0;
++ if (WARN_ON_ONCE(t->forced_info.si_signo))
++ return 0;
++
++ if (is_si_special(info)) {
++ WARN_ON_ONCE(info != SEND_SIG_PRIV);
++ t->forced_info.si_signo = sig;
++ t->forced_info.si_errno = 0;
++ t->forced_info.si_code = SI_KERNEL;
++ t->forced_info.si_pid = 0;
++ t->forced_info.si_uid = 0;
++ } else {
++ t->forced_info = *info;
++ }
++
++ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
++ return 0;
++ }
++#endif
++ return do_force_sig_info(sig, info, t);
++}
++
+ /*
+ * Nuke all other threads in the group.
+ */
diff --git a/patches/panic-disable-random-on-rt.patch b/patches/panic-disable-random-on-rt.patch
new file mode 100644
index 0000000..23d4254
--- /dev/null
+++ b/patches/panic-disable-random-on-rt.patch
@@ -0,0 +1,21 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Subject: panic: skip get_random_bytes for RT_FULL in init_oops_id
+
+---
+ kernel/panic.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/panic.c
++++ b/kernel/panic.c
+@@ -371,9 +371,11 @@ static u64 oops_id;
+
+ static int init_oops_id(void)
+ {
++#ifndef CONFIG_PREEMPT_RT_FULL
+ if (!oops_id)
+ get_random_bytes(&oops_id, sizeof(oops_id));
+ else
++#endif
+ oops_id++;
+
+ return 0;
diff --git a/patches/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch b/patches/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch
new file mode 100644
index 0000000..a41bbdf
--- /dev/null
+++ b/patches/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch
@@ -0,0 +1,173 @@
+Subject: rcu: Make ksoftirqd do RCU quiescent states
+From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+Date: Wed, 5 Oct 2011 11:45:18 -0700
+
+Implementing RCU-bh in terms of RCU-preempt makes the system vulnerable
+to network-based denial-of-service attacks. This patch therefore
+makes __do_softirq() invoke rcu_bh_qs(), but only when __do_softirq()
+is running in ksoftirqd context. A wrapper layer in interposed so that
+other calls to __do_softirq() avoid invoking rcu_bh_qs(). The underlying
+function __do_softirq_common() does the actual work.
+
+The reason that rcu_bh_qs() is bad in these non-ksoftirqd contexts is
+that there might be a local_bh_enable() inside an RCU-preempt read-side
+critical section. This local_bh_enable() can invoke __do_softirq()
+directly, so if __do_softirq() were to invoke rcu_bh_qs() (which just
+calls rcu_preempt_qs() in the PREEMPT_RT_FULL case), there would be
+an illegal RCU-preempt quiescent state in the middle of an RCU-preempt
+read-side critical section. Therefore, quiescent states can only happen
+in cases where __do_softirq() is invoked directly from ksoftirqd.
+
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Link: http://lkml.kernel.org/r/20111005184518.GA21601@linux.vnet.ibm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/rcupdate.h | 6 ------
+ kernel/rcutree.c | 9 ++++++++-
+ kernel/rcutree_plugin.h | 7 ++++++-
+ kernel/softirq.c | 20 +++++++++++++-------
+ 4 files changed, 27 insertions(+), 15 deletions(-)
+
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -220,13 +220,7 @@ static inline int rcu_preempt_depth(void
+
+ /* Internal to kernel */
+ extern void rcu_sched_qs(int cpu);
+-
+-#ifndef CONFIG_PREEMPT_RT_FULL
+ extern void rcu_bh_qs(int cpu);
+-#else
+-static inline void rcu_bh_qs(int cpu) { }
+-#endif
+-
+ extern void rcu_check_callbacks(int cpu, int user);
+ struct notifier_block;
+ extern void rcu_idle_enter(void);
+--- a/kernel/rcutree.c
++++ b/kernel/rcutree.c
+@@ -181,7 +181,14 @@ void rcu_sched_qs(int cpu)
+ rdp->passed_quiesce = 1;
+ }
+
+-#ifndef CONFIG_PREEMPT_RT_FULL
++#ifdef CONFIG_PREEMPT_RT_FULL
++static void rcu_preempt_qs(int cpu);
++
++void rcu_bh_qs(int cpu)
++{
++ rcu_preempt_qs(cpu);
++}
++#else
+ void rcu_bh_qs(int cpu)
+ {
+ struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+--- a/kernel/rcutree_plugin.h
++++ b/kernel/rcutree_plugin.h
+@@ -1519,7 +1519,7 @@ static void __cpuinit rcu_prepare_kthrea
+
+ #endif /* #else #ifdef CONFIG_RCU_BOOST */
+
+-#if !defined(CONFIG_RCU_FAST_NO_HZ)
++#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
+
+ /*
+ * Check to see if any future RCU-related work will need to be done
+@@ -1535,6 +1535,9 @@ int rcu_needs_cpu(int cpu, unsigned long
+ *delta_jiffies = ULONG_MAX;
+ return rcu_cpu_has_callbacks(cpu);
+ }
++#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
++
++#if !defined(CONFIG_RCU_FAST_NO_HZ)
+
+ /*
+ * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
+@@ -1651,6 +1654,7 @@ static bool rcu_cpu_has_nonlazy_callback
+ rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
+ * callbacks on this CPU, (2) this CPU has not yet attempted to enter
+@@ -1694,6 +1698,7 @@ int rcu_needs_cpu(int cpu, unsigned long
+ }
+ return 0;
+ }
++#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
+
+ /*
+ * Handler for smp_call_function_single(). The only point of this
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -142,7 +142,7 @@ static void wakeup_softirqd(void)
+ wake_up_process(tsk);
+ }
+
+-static void handle_pending_softirqs(u32 pending, int cpu)
++static void handle_pending_softirqs(u32 pending, int cpu, int need_rcu_bh_qs)
+ {
+ struct softirq_action *h = softirq_vec;
+ unsigned int prev_count = preempt_count();
+@@ -165,7 +165,8 @@ static void handle_pending_softirqs(u32
+ prev_count, (unsigned int) preempt_count());
+ preempt_count() = prev_count;
+ }
+- rcu_bh_qs(cpu);
++ if (need_rcu_bh_qs)
++ rcu_bh_qs(cpu);
+ }
+ local_irq_disable();
+ }
+@@ -325,7 +326,7 @@ restart:
+ /* Reset the pending bitmask before enabling irqs */
+ set_softirq_pending(0);
+
+- handle_pending_softirqs(pending, cpu);
++ handle_pending_softirqs(pending, cpu, 1);
+
+ pending = local_softirq_pending();
+ if (pending && --max_restart)
+@@ -376,7 +377,12 @@ static void ksoftirqd_clr_sched_params(u
+ static DEFINE_LOCAL_IRQ_LOCK(local_softirq_lock);
+ static DEFINE_PER_CPU(struct task_struct *, local_softirq_runner);
+
+-static void __do_softirq(void);
++static void __do_softirq_common(int need_rcu_bh_qs);
++
++void __do_softirq(void)
++{
++ __do_softirq_common(0);
++}
+
+ void __init softirq_early_init(void)
+ {
+@@ -447,7 +453,7 @@ EXPORT_SYMBOL(in_serving_softirq);
+ * Called with bh and local interrupts disabled. For full RT cpu must
+ * be pinned.
+ */
+-static void __do_softirq(void)
++static void __do_softirq_common(int need_rcu_bh_qs)
+ {
+ u32 pending = local_softirq_pending();
+ int cpu = smp_processor_id();
+@@ -461,7 +467,7 @@ static void __do_softirq(void)
+
+ lockdep_softirq_enter();
+
+- handle_pending_softirqs(pending, cpu);
++ handle_pending_softirqs(pending, cpu, need_rcu_bh_qs);
+
+ pending = local_softirq_pending();
+ if (pending)
+@@ -500,7 +506,7 @@ static int __thread_do_softirq(int cpu)
+ * schedule!
+ */
+ if (local_softirq_pending())
+- __do_softirq();
++ __do_softirq_common(cpu >= 0);
+ local_unlock(local_softirq_lock);
+ unpin_current_cpu();
+ preempt_disable();
diff --git a/patches/pci-access-use-__wake_up_all_locked.patch b/patches/pci-access-use-__wake_up_all_locked.patch
new file mode 100644
index 0000000..6eff68a
--- /dev/null
+++ b/patches/pci-access-use-__wake_up_all_locked.patch
@@ -0,0 +1,25 @@
+Subject: pci: Use __wake_up_all_locked pci_unblock_user_cfg_access()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 01 Dec 2011 00:07:16 +0100
+
+The waitqueue is protected by the pci_lock, so we can just avoid to
+lock the waitqueue lock itself. That prevents the
+might_sleep()/scheduling while atomic problem on RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ drivers/pci/access.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/pci/access.c
++++ b/drivers/pci/access.c
+@@ -465,7 +465,7 @@ void pci_cfg_access_unlock(struct pci_de
+ WARN_ON(!dev->block_cfg_access);
+
+ dev->block_cfg_access = 0;
+- wake_up_all(&pci_cfg_wait);
++ wake_up_all_locked(&pci_cfg_wait);
+ raw_spin_unlock_irqrestore(&pci_lock, flags);
+ }
+ EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
diff --git a/patches/perf-make-swevent-hrtimer-irqsafe.patch b/patches/perf-make-swevent-hrtimer-irqsafe.patch
new file mode 100644
index 0000000..2215a3e
--- /dev/null
+++ b/patches/perf-make-swevent-hrtimer-irqsafe.patch
@@ -0,0 +1,68 @@
+From: Yong Zhang <yong.zhang@windriver.com>
+Date: Wed, 11 Jul 2012 22:05:21 +0000
+Subject: perf: Make swevent hrtimer run in irq instead of softirq
+
+Otherwise we get a deadlock like below:
+
+[ 1044.042749] BUG: scheduling while atomic: ksoftirqd/21/141/0x00010003
+[ 1044.042752] INFO: lockdep is turned off.
+[ 1044.042754] Modules linked in:
+[ 1044.042757] Pid: 141, comm: ksoftirqd/21 Tainted: G W 3.4.0-rc2-rt3-23676-ga723175-dirty #29
+[ 1044.042759] Call Trace:
+[ 1044.042761] <IRQ> [<ffffffff8107d8e5>] __schedule_bug+0x65/0x80
+[ 1044.042770] [<ffffffff8168978c>] __schedule+0x83c/0xa70
+[ 1044.042775] [<ffffffff8106bdd2>] ? prepare_to_wait+0x32/0xb0
+[ 1044.042779] [<ffffffff81689a5e>] schedule+0x2e/0xa0
+[ 1044.042782] [<ffffffff81071ebd>] hrtimer_wait_for_timer+0x6d/0xb0
+[ 1044.042786] [<ffffffff8106bb30>] ? wake_up_bit+0x40/0x40
+[ 1044.042790] [<ffffffff81071f20>] hrtimer_cancel+0x20/0x40
+[ 1044.042794] [<ffffffff8111da0c>] perf_swevent_cancel_hrtimer+0x3c/0x50
+[ 1044.042798] [<ffffffff8111da31>] task_clock_event_stop+0x11/0x40
+[ 1044.042802] [<ffffffff8111da6e>] task_clock_event_del+0xe/0x10
+[ 1044.042805] [<ffffffff8111c568>] event_sched_out+0x118/0x1d0
+[ 1044.042809] [<ffffffff8111c649>] group_sched_out+0x29/0x90
+[ 1044.042813] [<ffffffff8111ed7e>] __perf_event_disable+0x18e/0x200
+[ 1044.042817] [<ffffffff8111c343>] remote_function+0x63/0x70
+[ 1044.042821] [<ffffffff810b0aae>] generic_smp_call_function_single_interrupt+0xce/0x120
+[ 1044.042826] [<ffffffff81022bc7>] smp_call_function_single_interrupt+0x27/0x40
+[ 1044.042831] [<ffffffff8168d50c>] call_function_single_interrupt+0x6c/0x80
+[ 1044.042833] <EOI> [<ffffffff811275b0>] ? perf_event_overflow+0x20/0x20
+[ 1044.042840] [<ffffffff8168b970>] ? _raw_spin_unlock_irq+0x30/0x70
+[ 1044.042844] [<ffffffff8168b976>] ? _raw_spin_unlock_irq+0x36/0x70
+[ 1044.042848] [<ffffffff810702e2>] run_hrtimer_softirq+0xc2/0x200
+[ 1044.042853] [<ffffffff811275b0>] ? perf_event_overflow+0x20/0x20
+[ 1044.042857] [<ffffffff81045265>] __do_softirq_common+0xf5/0x3a0
+[ 1044.042862] [<ffffffff81045c3d>] __thread_do_softirq+0x15d/0x200
+[ 1044.042865] [<ffffffff81045dda>] run_ksoftirqd+0xfa/0x210
+[ 1044.042869] [<ffffffff81045ce0>] ? __thread_do_softirq+0x200/0x200
+[ 1044.042873] [<ffffffff81045ce0>] ? __thread_do_softirq+0x200/0x200
+[ 1044.042877] [<ffffffff8106b596>] kthread+0xb6/0xc0
+[ 1044.042881] [<ffffffff8168b97b>] ? _raw_spin_unlock_irq+0x3b/0x70
+[ 1044.042886] [<ffffffff8168d994>] kernel_thread_helper+0x4/0x10
+[ 1044.042889] [<ffffffff8107d98c>] ? finish_task_switch+0x8c/0x110
+[ 1044.042894] [<ffffffff8168b97b>] ? _raw_spin_unlock_irq+0x3b/0x70
+[ 1044.042897] [<ffffffff8168bd5d>] ? retint_restore_args+0xe/0xe
+[ 1044.042900] [<ffffffff8106b4e0>] ? kthreadd+0x1e0/0x1e0
+[ 1044.042902] [<ffffffff8168d990>] ? gs_change+0xb/0xb
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/1341476476-5666-1-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+
+---
+ kernel/events/core.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -5638,6 +5638,7 @@ static void perf_swevent_init_hrtimer(st
+
+ hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hwc->hrtimer.function = perf_swevent_hrtimer;
++ hwc->hrtimer.irqsafe = 1;
+
+ /*
+ * Since hrtimers have a fixed rate, we can do a static freq->period
diff --git a/patches/perf-move-irq-work-to-softirq-in-rt.patch b/patches/perf-move-irq-work-to-softirq-in-rt.patch
new file mode 100644
index 0000000..157a8ba
--- /dev/null
+++ b/patches/perf-move-irq-work-to-softirq-in-rt.patch
@@ -0,0 +1,61 @@
+Subject: x86-no-perf-irq-work-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 13 Jul 2011 14:05:05 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/kernel/irq_work.c | 2 ++
+ kernel/irq_work.c | 2 ++
+ kernel/timer.c | 6 +++++-
+ 3 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/irq_work.c
++++ b/arch/x86/kernel/irq_work.c
+@@ -18,6 +18,7 @@ void smp_irq_work_interrupt(struct pt_re
+ irq_exit();
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ void arch_irq_work_raise(void)
+ {
+ #ifdef CONFIG_X86_LOCAL_APIC
+@@ -28,3 +29,4 @@ void arch_irq_work_raise(void)
+ apic_wait_icr_idle();
+ #endif
+ }
++#endif
+--- a/kernel/irq_work.c
++++ b/kernel/irq_work.c
+@@ -107,8 +107,10 @@ void irq_work_run(void)
+ if (llist_empty(this_list))
+ return;
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ BUG_ON(!in_irq());
+ BUG_ON(!irqs_disabled());
++#endif
+
+ llnode = llist_del_all(this_list);
+ while (llnode != NULL) {
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -1419,7 +1419,7 @@ void update_process_times(int user_tick)
+ scheduler_tick();
+ run_local_timers();
+ rcu_check_callbacks(cpu, user_tick);
+-#ifdef CONFIG_IRQ_WORK
++#if defined(CONFIG_IRQ_WORK) && !defined(CONFIG_PREEMPT_RT_FULL)
+ if (in_irq())
+ irq_work_run();
+ #endif
+@@ -1433,6 +1433,10 @@ static void run_timer_softirq(struct sof
+ {
+ struct tvec_base *base = __this_cpu_read(tvec_bases);
+
++#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
++ irq_work_run();
++#endif
++
+ printk_tick();
+ hrtimer_run_pending();
+
diff --git a/patches/peter_zijlstra-frob-migrate_disable-2.patch b/patches/peter_zijlstra-frob-migrate_disable-2.patch
new file mode 100644
index 0000000..3c05ad9
--- /dev/null
+++ b/patches/peter_zijlstra-frob-migrate_disable-2.patch
@@ -0,0 +1,174 @@
+Subject: sched: Generic migrate_disable
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu Aug 11 15:14:58 CEST 2011
+
+Make migrate_disable() be a preempt_disable() for !rt kernels. This
+allows generic code to use it but still enforces that these code
+sections stay relatively small.
+
+A preemptible migrate_disable() accessible for general use would allow
+people growing arbitrary per-cpu crap instead of clean these things
+up.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/n/tip-275i87sl8e1jcamtchmehonm@git.kernel.org
+---
+ include/linux/preempt.h | 21 +++++++++------------
+ include/linux/sched.h | 13 +++++++++++++
+ include/linux/smp.h | 9 ++-------
+ kernel/sched/core.c | 6 ++++--
+ kernel/trace/trace.c | 2 +-
+ lib/smp_processor_id.c | 2 +-
+ 6 files changed, 30 insertions(+), 23 deletions(-)
+
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -108,28 +108,25 @@ do { \
+
+ #endif /* CONFIG_PREEMPT_COUNT */
+
+-#ifdef CONFIG_SMP
+-extern void migrate_disable(void);
+-extern void migrate_enable(void);
+-#else
+-# define migrate_disable() do { } while (0)
+-# define migrate_enable() do { } while (0)
+-#endif
+-
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ # define preempt_disable_rt() preempt_disable()
+ # define preempt_enable_rt() preempt_enable()
+ # define preempt_disable_nort() do { } while (0)
+ # define preempt_enable_nort() do { } while (0)
+-# define migrate_disable_rt() migrate_disable()
+-# define migrate_enable_rt() migrate_enable()
++# ifdef CONFIG_SMP
++ extern void migrate_disable(void);
++ extern void migrate_enable(void);
++# else /* CONFIG_SMP */
++# define migrate_disable() do { } while (0)
++# define migrate_enable() do { } while (0)
++# endif /* CONFIG_SMP */
+ #else
+ # define preempt_disable_rt() do { } while (0)
+ # define preempt_enable_rt() do { } while (0)
+ # define preempt_disable_nort() preempt_disable()
+ # define preempt_enable_nort() preempt_enable()
+-# define migrate_disable_rt() do { } while (0)
+-# define migrate_enable_rt() do { } while (0)
++# define migrate_disable() preempt_disable()
++# define migrate_enable() preempt_enable()
+ #endif
+
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1279,7 +1279,9 @@ struct task_struct {
+ #endif
+
+ unsigned int policy;
++#ifdef CONFIG_PREEMPT_RT_FULL
+ int migrate_disable;
++#endif
+ int nr_cpus_allowed;
+ cpumask_t cpus_allowed;
+
+@@ -2810,11 +2812,22 @@ static inline void set_task_cpu(struct t
+
+ #endif /* CONFIG_SMP */
+
++static inline int __migrate_disabled(struct task_struct *p)
++{
++#ifdef CONFIG_PREEMPT_RT_FULL
++ return p->migrate_disable;
++#else
++ return 0;
++#endif
++}
++
+ /* Future-safe accessor for struct task_struct's cpus_allowed. */
+ static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
+ {
++#ifdef CONFIG_PREEMPT_RT_FULL
+ if (p->migrate_disable)
+ return cpumask_of(task_cpu(p));
++#endif
+
+ return &p->cpus_allowed;
+ }
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -218,13 +218,8 @@ static inline void kick_all_cpus_sync(vo
+ #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
+ #define put_cpu() preempt_enable()
+
+-#ifndef CONFIG_PREEMPT_RT_FULL
+-# define get_cpu_light() get_cpu()
+-# define put_cpu_light() put_cpu()
+-#else
+-# define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
+-# define put_cpu_light() migrate_enable()
+-#endif
++#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
++#define put_cpu_light() migrate_enable()
+
+ /*
+ * Callback to arch code if there's nosmp or maxcpus=0 on the
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4734,7 +4734,7 @@ void __cpuinit init_idle(struct task_str
+ #ifdef CONFIG_SMP
+ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ {
+- if (!p->migrate_disable) {
++ if (!__migrate_disabled(p)) {
+ if (p->sched_class && p->sched_class->set_cpus_allowed)
+ p->sched_class->set_cpus_allowed(p, new_mask);
+ p->nr_cpus_allowed = cpumask_weight(new_mask);
+@@ -4790,7 +4790,7 @@ int set_cpus_allowed_ptr(struct task_str
+ do_set_cpus_allowed(p, new_mask);
+
+ /* Can the task run on the task's current CPU? If so, we're done */
+- if (cpumask_test_cpu(task_cpu(p), new_mask) || p->migrate_disable)
++ if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
+ goto out;
+
+ dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+@@ -4809,6 +4809,7 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+
++#ifdef CONFIG_PREEMPT_RT_FULL
+ void migrate_disable(void)
+ {
+ struct task_struct *p = current;
+@@ -4901,6 +4902,7 @@ void migrate_enable(void)
+ preempt_enable();
+ }
+ EXPORT_SYMBOL(migrate_enable);
++#endif /* CONFIG_PREEMPT_RT_FULL */
+
+ /*
+ * Move (not current) task off this cpu, onto dest cpu. We're doing
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -1178,7 +1178,7 @@ tracing_generic_entry_update(struct trac
+ ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
+ (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
+
+- entry->migrate_disable = (tsk) ? tsk->migrate_disable & 0xFF : 0;
++ entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
+ }
+ EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
+
+--- a/lib/smp_processor_id.c
++++ b/lib/smp_processor_id.c
+@@ -41,7 +41,7 @@ notrace unsigned int debug_smp_processor
+
+ printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x %08x] "
+ "code: %s/%d\n", preempt_count() - 1,
+- current->migrate_disable, current->comm, current->pid);
++ __migrate_disabled(current), current->comm, current->pid);
+ print_symbol("caller is %s\n", (long)__builtin_return_address(0));
+ dump_stack();
+
diff --git a/patches/peter_zijlstra-frob-migrate_disable.patch b/patches/peter_zijlstra-frob-migrate_disable.patch
new file mode 100644
index 0000000..a6bb210
--- /dev/null
+++ b/patches/peter_zijlstra-frob-migrate_disable.patch
@@ -0,0 +1,67 @@
+Subject: sched: Optimize migrate_disable
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu Aug 11 15:03:35 CEST 2011
+
+Change from task_rq_lock() to raw_spin_lock(&rq->lock) to avoid a few
+atomic ops. See comment on why it should be safe.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/n/tip-cbz6hkl5r5mvwtx5s3tor2y6@git.kernel.org
+---
+ kernel/sched/core.c | 24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4829,7 +4829,19 @@ void migrate_disable(void)
+ preempt_enable();
+ return;
+ }
+- rq = task_rq_lock(p, &flags);
++
++ /*
++ * Since this is always current we can get away with only locking
++ * rq->lock, the ->cpus_allowed value can normally only be changed
++ * while holding both p->pi_lock and rq->lock, but seeing that this
++ * it current, we cannot actually be waking up, so all code that
++ * relies on serialization against p->pi_lock is out of scope.
++ *
++ * Taking rq->lock serializes us against things like
++ * set_cpus_allowed_ptr() that can still happen concurrently.
++ */
++ rq = this_rq();
++ raw_spin_lock_irqsave(&rq->lock, flags);
+ p->migrate_disable = 1;
+ mask = tsk_cpus_allowed(p);
+
+@@ -4840,7 +4852,7 @@ void migrate_disable(void)
+ p->sched_class->set_cpus_allowed(p, mask);
+ p->nr_cpus_allowed = cpumask_weight(mask);
+ }
+- task_rq_unlock(rq, p, &flags);
++ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ preempt_enable();
+ }
+ EXPORT_SYMBOL(migrate_disable);
+@@ -4868,7 +4880,11 @@ void migrate_enable(void)
+ return;
+ }
+
+- rq = task_rq_lock(p, &flags);
++ /*
++ * See comment in migrate_disable().
++ */
++ rq = this_rq();
++ raw_spin_lock_irqsave(&rq->lock, flags);
+ p->migrate_disable = 0;
+ mask = tsk_cpus_allowed(p);
+
+@@ -4880,7 +4896,7 @@ void migrate_enable(void)
+ p->nr_cpus_allowed = cpumask_weight(mask);
+ }
+
+- task_rq_unlock(rq, p, &flags);
++ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ unpin_current_cpu();
+ preempt_enable();
+ }
diff --git a/patches/peter_zijlstra-frob-pagefault_disable.patch b/patches/peter_zijlstra-frob-pagefault_disable.patch
new file mode 100644
index 0000000..cd63205
--- /dev/null
+++ b/patches/peter_zijlstra-frob-pagefault_disable.patch
@@ -0,0 +1,342 @@
+Subject: mm: pagefault_disabled()
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu Aug 11 15:31:31 CEST 2011
+
+Wrap the test for pagefault_disabled() into a helper, this allows us
+to remove the need for current->pagefault_disabled on !-rt kernels.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/n/tip-3yy517m8zsi9fpsf14xfaqkw@git.kernel.org
+---
+ arch/alpha/mm/fault.c | 2 +-
+ arch/arm/mm/fault.c | 2 +-
+ arch/avr32/mm/fault.c | 3 +--
+ arch/cris/mm/fault.c | 2 +-
+ arch/frv/mm/fault.c | 2 +-
+ arch/ia64/mm/fault.c | 2 +-
+ arch/m32r/mm/fault.c | 2 +-
+ arch/m68k/mm/fault.c | 2 +-
+ arch/microblaze/mm/fault.c | 2 +-
+ arch/mips/mm/fault.c | 2 +-
+ arch/mn10300/mm/fault.c | 2 +-
+ arch/parisc/mm/fault.c | 2 +-
+ arch/powerpc/mm/fault.c | 2 +-
+ arch/s390/mm/fault.c | 8 ++++----
+ arch/score/mm/fault.c | 2 +-
+ arch/sh/mm/fault.c | 2 +-
+ arch/sparc/mm/fault_32.c | 2 +-
+ arch/sparc/mm/fault_64.c | 2 +-
+ arch/tile/mm/fault.c | 2 +-
+ arch/um/kernel/trap.c | 2 +-
+ arch/x86/mm/fault.c | 2 +-
+ arch/xtensa/mm/fault.c | 2 +-
+ include/linux/sched.h | 14 ++++++++++++++
+ kernel/fork.c | 2 ++
+ 24 files changed, 41 insertions(+), 26 deletions(-)
+
+--- a/arch/alpha/mm/fault.c
++++ b/arch/alpha/mm/fault.c
+@@ -108,7 +108,7 @@ do_page_fault(unsigned long address, uns
+
+ /* If we're in an interrupt context, or have no user context,
+ we must not take the fault. */
+- if (!mm || in_atomic() || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ #ifdef CONFIG_ALPHA_LARGE_VMALLOC
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -279,7 +279,7 @@ do_page_fault(unsigned long addr, unsign
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ /*
+--- a/arch/avr32/mm/fault.c
++++ b/arch/avr32/mm/fault.c
+@@ -81,8 +81,7 @@ asmlinkage void do_page_fault(unsigned l
+ * If we're in an interrupt or have no user context, we must
+ * not take the fault...
+ */
+- if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM) ||
+- current->pagefault_disabled)
++ if (!mm || regs->sr & SYSREG_BIT(GM) || pagefault_disabled())
+ goto no_context;
+
+ local_irq_enable();
+--- a/arch/cris/mm/fault.c
++++ b/arch/cris/mm/fault.c
+@@ -114,7 +114,7 @@ do_page_fault(unsigned long address, str
+ * user context, we must not take the fault.
+ */
+
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ retry:
+--- a/arch/frv/mm/fault.c
++++ b/arch/frv/mm/fault.c
+@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datamm
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ down_read(&mm->mmap_sem);
+--- a/arch/ia64/mm/fault.c
++++ b/arch/ia64/mm/fault.c
+@@ -98,7 +98,7 @@ ia64_do_page_fault (unsigned long addres
+ /*
+ * If we're in an interrupt or have no user context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ #ifdef CONFIG_VIRTUAL_MEM_MAP
+--- a/arch/m32r/mm/fault.c
++++ b/arch/m32r/mm/fault.c
+@@ -114,7 +114,7 @@ asmlinkage void do_page_fault(struct pt_
+ * If we're in an interrupt or have no user context or are running in an
+ * atomic region then we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto bad_area_nosemaphore;
+
+ /* When running in the kernel we expect faults to occur only to
+--- a/arch/m68k/mm/fault.c
++++ b/arch/m68k/mm/fault.c
+@@ -85,7 +85,7 @@ int do_page_fault(struct pt_regs *regs,
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ retry:
+--- a/arch/microblaze/mm/fault.c
++++ b/arch/microblaze/mm/fault.c
+@@ -108,7 +108,7 @@ void do_page_fault(struct pt_regs *regs,
+ if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
+ is_write = 0;
+
+- if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
++ if (unlikely(!mm || pagefault_disabled())) {
+ if (kernel_mode(regs))
+ goto bad_area_nosemaphore;
+
+--- a/arch/mips/mm/fault.c
++++ b/arch/mips/mm/fault.c
+@@ -89,7 +89,7 @@ asmlinkage void __kprobes do_page_fault(
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto bad_area_nosemaphore;
+
+ retry:
+--- a/arch/mn10300/mm/fault.c
++++ b/arch/mn10300/mm/fault.c
+@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ retry:
+--- a/arch/parisc/mm/fault.c
++++ b/arch/parisc/mm/fault.c
+@@ -176,7 +176,7 @@ void do_page_fault(struct pt_regs *regs,
+ unsigned long acc_type;
+ int fault;
+
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ down_read(&mm->mmap_sem);
+--- a/arch/powerpc/mm/fault.c
++++ b/arch/powerpc/mm/fault.c
+@@ -259,7 +259,7 @@ int __kprobes do_page_fault(struct pt_re
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
+- if (in_atomic() || mm == NULL || current->pagefault_disabled) {
++ if (!mm || pagefault_disabled()) {
+ if (!user_mode(regs))
+ return SIGSEGV;
+ /* in_atomic() in user mode is really bad,
+--- a/arch/s390/mm/fault.c
++++ b/arch/s390/mm/fault.c
+@@ -296,8 +296,8 @@ static inline int do_exception(struct pt
+ * user context.
+ */
+ fault = VM_FAULT_BADCONTEXT;
+- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm ||
+- tsk->pagefault_disabled))
++ if (unlikely(!user_space_fault(trans_exc_code) ||
++ !mm || pagefault_disabled()))
+ goto out;
+
+ address = trans_exc_code & __FAIL_ADDR_MASK;
+@@ -436,8 +436,8 @@ void __kprobes do_asce_exception(struct
+ clear_tsk_thread_flag(current, TIF_PER_TRAP);
+
+ trans_exc_code = regs->int_parm_long;
+- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm ||
+- current->pagefault_disabled()));
++ if (unlikely(!user_space_fault(trans_exc_code) || !mm ||
++ pagefault_disabled()))
+ goto no_context;
+
+ down_read(&mm->mmap_sem);
+--- a/arch/score/mm/fault.c
++++ b/arch/score/mm/fault.c
+@@ -72,7 +72,7 @@ asmlinkage void do_page_fault(struct pt_
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto bad_area_nosemaphore;
+
+ down_read(&mm->mmap_sem);
+--- a/arch/sh/mm/fault.c
++++ b/arch/sh/mm/fault.c
+@@ -440,7 +440,7 @@ asmlinkage void __kprobes do_page_fault(
+ * If we're in an interrupt, have no user context or are running
+ * in an atomic region then we must not take the fault:
+ */
+- if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
++ if (unlikely(!mm || pagefault_disabled())) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
+--- a/arch/sparc/mm/fault_32.c
++++ b/arch/sparc/mm/fault_32.c
+@@ -200,7 +200,7 @@ asmlinkage void do_sparc_fault(struct pt
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled)
++ if (!mm || pagefault_disabled())
+ goto no_context;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+--- a/arch/sparc/mm/fault_64.c
++++ b/arch/sparc/mm/fault_64.c
+@@ -321,7 +321,7 @@ asmlinkage void __kprobes do_sparc64_fau
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_enabled)
++ if (!mm || pagefault_disabled())
+ goto intr_or_no_mm;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+--- a/arch/tile/mm/fault.c
++++ b/arch/tile/mm/fault.c
+@@ -360,7 +360,7 @@ static int handle_page_fault(struct pt_r
+ * If we're in an interrupt, have no user context or are running in an
+ * atomic region then we must not take the fault.
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled) {
++ if (!mm || pagefault_disabled()) {
+ vma = NULL; /* happy compiler */
+ goto bad_area_nosemaphore;
+ }
+--- a/arch/um/kernel/trap.c
++++ b/arch/um/kernel/trap.c
+@@ -39,7 +39,7 @@ int handle_page_fault(unsigned long addr
+ * If the fault was during atomic operation, don't take the fault, just
+ * fail.
+ */
+- if (in_atomic() || current->pagefault_disabled)
++ if (pagefault_disabled())
+ goto out_nosemaphore;
+
+ retry:
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -1108,7 +1108,7 @@ __do_page_fault(struct pt_regs *regs, un
+ * If we're in an interrupt, have no user context or are running
+ * in an atomic region then we must not take the fault:
+ */
+- if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
++ if (unlikely(!mm || pagefault_disabled())) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
+ }
+--- a/arch/xtensa/mm/fault.c
++++ b/arch/xtensa/mm/fault.c
+@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
+ /* If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+- if (in_atomic() || !mm || current->pagefault_disabled) {
++ if (!mm || pagefault_disabled()) {
+ bad_page_fault(regs, address, SIGSEGV);
+ return;
+ }
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -51,6 +51,7 @@ struct sched_param {
+ #include <linux/cred.h>
+ #include <linux/llist.h>
+ #include <linux/uidgid.h>
++#include <linux/hardirq.h>
+
+ #include <asm/processor.h>
+
+@@ -1452,7 +1453,9 @@ struct task_struct {
+ /* mutex deadlock detection */
+ struct mutex_waiter *blocked_on;
+ #endif
++#ifdef CONFIG_PREEMPT_RT_FULL
+ int pagefault_disabled;
++#endif
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ unsigned int irq_events;
+ unsigned long hardirq_enable_ip;
+@@ -1628,6 +1631,17 @@ static inline void set_numabalancing_sta
+ }
+ #endif
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++static inline bool cur_pf_disabled(void) { return current->pagefault_disabled; }
++#else
++static inline bool cur_pf_disabled(void) { return false; }
++#endif
++
++static inline bool pagefault_disabled(void)
++{
++ return in_atomic() || cur_pf_disabled();
++}
++
+ /*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1285,7 +1285,9 @@ static struct task_struct *copy_process(
+ p->hardirq_context = 0;
+ p->softirq_context = 0;
+ #endif
++#ifdef CONFIG_PREEMPT_RT_FULL
+ p->pagefault_disabled = 0;
++#endif
+ #ifdef CONFIG_LOCKDEP
+ p->lockdep_depth = 0; /* no locks held yet */
+ p->curr_chain_key = 0;
diff --git a/patches/peter_zijlstra-frob-rcu.patch b/patches/peter_zijlstra-frob-rcu.patch
new file mode 100644
index 0000000..8a26a15
--- /dev/null
+++ b/patches/peter_zijlstra-frob-rcu.patch
@@ -0,0 +1,166 @@
+Subject: rcu: Frob softirq test
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Sat Aug 13 00:23:17 CEST 2011
+
+With RT_FULL we get the below wreckage:
+
+[ 126.060484] =======================================================
+[ 126.060486] [ INFO: possible circular locking dependency detected ]
+[ 126.060489] 3.0.1-rt10+ #30
+[ 126.060490] -------------------------------------------------------
+[ 126.060492] irq/24-eth0/1235 is trying to acquire lock:
+[ 126.060495] (&(lock)->wait_lock#2){+.+...}, at: [<ffffffff81501c81>] rt_mutex_slowunlock+0x16/0x55
+[ 126.060503]
+[ 126.060504] but task is already holding lock:
+[ 126.060506] (&p->pi_lock){-...-.}, at: [<ffffffff81074fdc>] try_to_wake_up+0x35/0x429
+[ 126.060511]
+[ 126.060511] which lock already depends on the new lock.
+[ 126.060513]
+[ 126.060514]
+[ 126.060514] the existing dependency chain (in reverse order) is:
+[ 126.060516]
+[ 126.060516] -> #1 (&p->pi_lock){-...-.}:
+[ 126.060519] [<ffffffff810afe9e>] lock_acquire+0x145/0x18a
+[ 126.060524] [<ffffffff8150291e>] _raw_spin_lock_irqsave+0x4b/0x85
+[ 126.060527] [<ffffffff810b5aa4>] task_blocks_on_rt_mutex+0x36/0x20f
+[ 126.060531] [<ffffffff815019bb>] rt_mutex_slowlock+0xd1/0x15a
+[ 126.060534] [<ffffffff81501ae3>] rt_mutex_lock+0x2d/0x2f
+[ 126.060537] [<ffffffff810d9020>] rcu_boost+0xad/0xde
+[ 126.060541] [<ffffffff810d90ce>] rcu_boost_kthread+0x7d/0x9b
+[ 126.060544] [<ffffffff8109a760>] kthread+0x99/0xa1
+[ 126.060547] [<ffffffff81509b14>] kernel_thread_helper+0x4/0x10
+[ 126.060551]
+[ 126.060552] -> #0 (&(lock)->wait_lock#2){+.+...}:
+[ 126.060555] [<ffffffff810af1b8>] __lock_acquire+0x1157/0x1816
+[ 126.060558] [<ffffffff810afe9e>] lock_acquire+0x145/0x18a
+[ 126.060561] [<ffffffff8150279e>] _raw_spin_lock+0x40/0x73
+[ 126.060564] [<ffffffff81501c81>] rt_mutex_slowunlock+0x16/0x55
+[ 126.060566] [<ffffffff81501ce7>] rt_mutex_unlock+0x27/0x29
+[ 126.060569] [<ffffffff810d9f86>] rcu_read_unlock_special+0x17e/0x1c4
+[ 126.060573] [<ffffffff810da014>] __rcu_read_unlock+0x48/0x89
+[ 126.060576] [<ffffffff8106847a>] select_task_rq_rt+0xc7/0xd5
+[ 126.060580] [<ffffffff8107511c>] try_to_wake_up+0x175/0x429
+[ 126.060583] [<ffffffff81075425>] wake_up_process+0x15/0x17
+[ 126.060585] [<ffffffff81080a51>] wakeup_softirqd+0x24/0x26
+[ 126.060590] [<ffffffff81081df9>] irq_exit+0x49/0x55
+[ 126.060593] [<ffffffff8150a3bd>] smp_apic_timer_interrupt+0x8a/0x98
+[ 126.060597] [<ffffffff81509793>] apic_timer_interrupt+0x13/0x20
+[ 126.060600] [<ffffffff810d5952>] irq_forced_thread_fn+0x1b/0x44
+[ 126.060603] [<ffffffff810d582c>] irq_thread+0xde/0x1af
+[ 126.060606] [<ffffffff8109a760>] kthread+0x99/0xa1
+[ 126.060608] [<ffffffff81509b14>] kernel_thread_helper+0x4/0x10
+[ 126.060611]
+[ 126.060612] other info that might help us debug this:
+[ 126.060614]
+[ 126.060615] Possible unsafe locking scenario:
+[ 126.060616]
+[ 126.060617] CPU0 CPU1
+[ 126.060619] ---- ----
+[ 126.060620] lock(&p->pi_lock);
+[ 126.060623] lock(&(lock)->wait_lock);
+[ 126.060625] lock(&p->pi_lock);
+[ 126.060627] lock(&(lock)->wait_lock);
+[ 126.060629]
+[ 126.060629] *** DEADLOCK ***
+[ 126.060630]
+[ 126.060632] 1 lock held by irq/24-eth0/1235:
+[ 126.060633] #0: (&p->pi_lock){-...-.}, at: [<ffffffff81074fdc>] try_to_wake_up+0x35/0x429
+[ 126.060638]
+[ 126.060638] stack backtrace:
+[ 126.060641] Pid: 1235, comm: irq/24-eth0 Not tainted 3.0.1-rt10+ #30
+[ 126.060643] Call Trace:
+[ 126.060644] <IRQ> [<ffffffff810acbde>] print_circular_bug+0x289/0x29a
+[ 126.060651] [<ffffffff810af1b8>] __lock_acquire+0x1157/0x1816
+[ 126.060655] [<ffffffff810ab3aa>] ? trace_hardirqs_off_caller+0x1f/0x99
+[ 126.060658] [<ffffffff81501c81>] ? rt_mutex_slowunlock+0x16/0x55
+[ 126.060661] [<ffffffff810afe9e>] lock_acquire+0x145/0x18a
+[ 126.060664] [<ffffffff81501c81>] ? rt_mutex_slowunlock+0x16/0x55
+[ 126.060668] [<ffffffff8150279e>] _raw_spin_lock+0x40/0x73
+[ 126.060671] [<ffffffff81501c81>] ? rt_mutex_slowunlock+0x16/0x55
+[ 126.060674] [<ffffffff810d9655>] ? rcu_report_qs_rsp+0x87/0x8c
+[ 126.060677] [<ffffffff81501c81>] rt_mutex_slowunlock+0x16/0x55
+[ 126.060680] [<ffffffff810d9ea3>] ? rcu_read_unlock_special+0x9b/0x1c4
+[ 126.060683] [<ffffffff81501ce7>] rt_mutex_unlock+0x27/0x29
+[ 126.060687] [<ffffffff810d9f86>] rcu_read_unlock_special+0x17e/0x1c4
+[ 126.060690] [<ffffffff810da014>] __rcu_read_unlock+0x48/0x89
+[ 126.060693] [<ffffffff8106847a>] select_task_rq_rt+0xc7/0xd5
+[ 126.060696] [<ffffffff810683da>] ? select_task_rq_rt+0x27/0xd5
+[ 126.060701] [<ffffffff810a852a>] ? clockevents_program_event+0x8e/0x90
+[ 126.060704] [<ffffffff8107511c>] try_to_wake_up+0x175/0x429
+[ 126.060708] [<ffffffff810a95dc>] ? tick_program_event+0x1f/0x21
+[ 126.060711] [<ffffffff81075425>] wake_up_process+0x15/0x17
+[ 126.060715] [<ffffffff81080a51>] wakeup_softirqd+0x24/0x26
+[ 126.060718] [<ffffffff81081df9>] irq_exit+0x49/0x55
+[ 126.060721] [<ffffffff8150a3bd>] smp_apic_timer_interrupt+0x8a/0x98
+[ 126.060724] [<ffffffff81509793>] apic_timer_interrupt+0x13/0x20
+[ 126.060726] <EOI> [<ffffffff81072855>] ? migrate_disable+0x75/0x12d
+[ 126.060733] [<ffffffff81080a61>] ? local_bh_disable+0xe/0x1f
+[ 126.060736] [<ffffffff81080a70>] ? local_bh_disable+0x1d/0x1f
+[ 126.060739] [<ffffffff810d5952>] irq_forced_thread_fn+0x1b/0x44
+[ 126.060742] [<ffffffff81502ac0>] ? _raw_spin_unlock_irq+0x3b/0x59
+[ 126.060745] [<ffffffff810d582c>] irq_thread+0xde/0x1af
+[ 126.060748] [<ffffffff810d5937>] ? irq_thread_fn+0x3a/0x3a
+[ 126.060751] [<ffffffff810d574e>] ? irq_finalize_oneshot+0xd1/0xd1
+[ 126.060754] [<ffffffff810d574e>] ? irq_finalize_oneshot+0xd1/0xd1
+[ 126.060757] [<ffffffff8109a760>] kthread+0x99/0xa1
+[ 126.060761] [<ffffffff81509b14>] kernel_thread_helper+0x4/0x10
+[ 126.060764] [<ffffffff81069ed7>] ? finish_task_switch+0x87/0x10a
+[ 126.060768] [<ffffffff81502ec4>] ? retint_restore_args+0xe/0xe
+[ 126.060771] [<ffffffff8109a6c7>] ? __init_kthread_worker+0x8c/0x8c
+[ 126.060774] [<ffffffff81509b10>] ? gs_change+0xb/0xb
+
+Because irq_exit() does:
+
+void irq_exit(void)
+{
+ account_system_vtime(current);
+ trace_hardirq_exit();
+ sub_preempt_count(IRQ_EXIT_OFFSET);
+ if (!in_interrupt() && local_softirq_pending())
+ invoke_softirq();
+
+ ...
+}
+
+Which triggers a wakeup, which uses RCU, now if the interrupted task has
+t->rcu_read_unlock_special set, the rcu usage from the wakeup will end
+up in rcu_read_unlock_special(). rcu_read_unlock_special() will test
+for in_irq(), which will fail as we just decremented preempt_count
+with IRQ_EXIT_OFFSET, and in_sering_softirq(), which for
+PREEMPT_RT_FULL reads:
+
+int in_serving_softirq(void)
+{
+ int res;
+
+ preempt_disable();
+ res = __get_cpu_var(local_softirq_runner) == current;
+ preempt_enable();
+ return res;
+}
+
+Which will thus also fail, resulting in the above wreckage.
+
+The 'somewhat' ugly solution is to open-code the preempt_count() test
+in rcu_read_unlock_special().
+
+Also, we're not at all sure how ->rcu_read_unlock_special gets set
+here... so this is very likely a bandaid and more thought is required.
+
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+---
+ kernel/rcutree_plugin.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/rcutree_plugin.h
++++ b/kernel/rcutree_plugin.h
+@@ -351,7 +351,7 @@ void rcu_read_unlock_special(struct task
+ }
+
+ /* Hardware IRQ handlers cannot block. */
+- if (in_irq() || in_serving_softirq()) {
++ if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
+ local_irq_restore(flags);
+ return;
+ }
diff --git a/patches/peterz-raw_pagefault_disable.patch b/patches/peterz-raw_pagefault_disable.patch
new file mode 100644
index 0000000..0ca7733
--- /dev/null
+++ b/patches/peterz-raw_pagefault_disable.patch
@@ -0,0 +1,147 @@
+Subject: mm: raw_pagefault_disable
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri Aug 05 17:16:58 CEST 2011
+
+Adding migrate_disable() to pagefault_disable() to preserve the
+per-cpu thing for kmap_atomic might not have been the best of choices.
+But short of adding preempt_disable/migrate_disable foo all over the
+kmap code it still seems the best way.
+
+It does however yield the below borkage as well as wreck !-rt builds
+since !-rt does rely on pagefault_disable() not preempting. So fix all
+that up by adding raw_pagefault_disable().
+
+ <NMI> [<ffffffff81076d5c>] warn_slowpath_common+0x85/0x9d
+ [<ffffffff81076e17>] warn_slowpath_fmt+0x46/0x48
+ [<ffffffff814f7fca>] ? _raw_spin_lock+0x6c/0x73
+ [<ffffffff810cac87>] ? watchdog_overflow_callback+0x9b/0xd0
+ [<ffffffff810caca3>] watchdog_overflow_callback+0xb7/0xd0
+ [<ffffffff810f51bb>] __perf_event_overflow+0x11c/0x1fe
+ [<ffffffff810f298f>] ? perf_event_update_userpage+0x149/0x151
+ [<ffffffff810f2846>] ? perf_event_task_disable+0x7c/0x7c
+ [<ffffffff810f5b7c>] perf_event_overflow+0x14/0x16
+ [<ffffffff81046e02>] x86_pmu_handle_irq+0xcb/0x108
+ [<ffffffff814f9a6b>] perf_event_nmi_handler+0x46/0x91
+ [<ffffffff814fb2ba>] notifier_call_chain+0x79/0xa6
+ [<ffffffff814fb34d>] __atomic_notifier_call_chain+0x66/0x98
+ [<ffffffff814fb2e7>] ? notifier_call_chain+0xa6/0xa6
+ [<ffffffff814fb393>] atomic_notifier_call_chain+0x14/0x16
+ [<ffffffff814fb3c3>] notify_die+0x2e/0x30
+ [<ffffffff814f8f75>] do_nmi+0x7e/0x22b
+ [<ffffffff814f8bca>] nmi+0x1a/0x2c
+ [<ffffffff814fb130>] ? sub_preempt_count+0x4b/0xaa
+ <<EOE>> <IRQ> [<ffffffff812d44cc>] delay_tsc+0xac/0xd1
+ [<ffffffff812d4399>] __delay+0xf/0x11
+ [<ffffffff812d95d9>] do_raw_spin_lock+0xd2/0x13c
+ [<ffffffff814f813e>] _raw_spin_lock_irqsave+0x6b/0x85
+ [<ffffffff8106772a>] ? task_rq_lock+0x35/0x8d
+ [<ffffffff8106772a>] task_rq_lock+0x35/0x8d
+ [<ffffffff8106fe2f>] migrate_disable+0x65/0x12c
+ [<ffffffff81114e69>] pagefault_disable+0xe/0x1f
+ [<ffffffff81039c73>] dump_trace+0x21f/0x2e2
+ [<ffffffff8103ad79>] show_trace_log_lvl+0x54/0x5d
+ [<ffffffff8103ad97>] show_trace+0x15/0x17
+ [<ffffffff814f4f5f>] dump_stack+0x77/0x80
+ [<ffffffff812d94b0>] spin_bug+0x9c/0xa3
+ [<ffffffff81067745>] ? task_rq_lock+0x50/0x8d
+ [<ffffffff812d954e>] do_raw_spin_lock+0x47/0x13c
+ [<ffffffff814f7fbe>] _raw_spin_lock+0x60/0x73
+ [<ffffffff81067745>] ? task_rq_lock+0x50/0x8d
+ [<ffffffff81067745>] task_rq_lock+0x50/0x8d
+ [<ffffffff8106fe2f>] migrate_disable+0x65/0x12c
+ [<ffffffff81114e69>] pagefault_disable+0xe/0x1f
+ [<ffffffff81039c73>] dump_trace+0x21f/0x2e2
+ [<ffffffff8104369b>] save_stack_trace+0x2f/0x4c
+ [<ffffffff810a7848>] save_trace+0x3f/0xaf
+ [<ffffffff810aa2bd>] mark_lock+0x228/0x530
+ [<ffffffff810aac27>] __lock_acquire+0x662/0x1812
+ [<ffffffff8103dad4>] ? native_sched_clock+0x37/0x6d
+ [<ffffffff810a790e>] ? trace_hardirqs_off_caller+0x1f/0x99
+ [<ffffffff810693f6>] ? sched_rt_period_timer+0xbd/0x218
+ [<ffffffff810ac403>] lock_acquire+0x145/0x18a
+ [<ffffffff810693f6>] ? sched_rt_period_timer+0xbd/0x218
+ [<ffffffff814f7f9e>] _raw_spin_lock+0x40/0x73
+ [<ffffffff810693f6>] ? sched_rt_period_timer+0xbd/0x218
+ [<ffffffff810693f6>] sched_rt_period_timer+0xbd/0x218
+ [<ffffffff8109aa39>] __run_hrtimer+0x1e4/0x347
+ [<ffffffff81069339>] ? can_migrate_task.clone.82+0x14a/0x14a
+ [<ffffffff8109b97c>] hrtimer_interrupt+0xee/0x1d6
+ [<ffffffff814fb23d>] ? add_preempt_count+0xae/0xb2
+ [<ffffffff814ffb38>] smp_apic_timer_interrupt+0x85/0x98
+ [<ffffffff814fef13>] apic_timer_interrupt+0x13/0x20
+
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/n/tip-31keae8mkjiv8esq4rl76cib@git.kernel.org
+---
+ include/linux/uaccess.h | 30 ++++++++++++++++++++++++++++--
+ mm/memory.c | 2 ++
+ 2 files changed, 30 insertions(+), 2 deletions(-)
+
+--- a/include/linux/uaccess.h
++++ b/include/linux/uaccess.h
+@@ -8,8 +8,34 @@
+ * These routines enable/disable the pagefault handler in that
+ * it will not take any MM locks and go straight to the fixup table.
+ */
++static inline void raw_pagefault_disable(void)
++{
++ inc_preempt_count();
++ barrier();
++}
++
++static inline void raw_pagefault_enable(void)
++{
++ barrier();
++ dec_preempt_count();
++ barrier();
++ preempt_check_resched();
++}
++
++#ifndef CONFIG_PREEMPT_RT_FULL
++static inline void pagefault_disable(void)
++{
++ raw_pagefault_disable();
++}
++
++static inline void pagefault_enable(void)
++{
++ raw_pagefault_enable();
++}
++#else
+ extern void pagefault_disable(void);
+ extern void pagefault_enable(void);
++#endif
+
+ #ifndef ARCH_HAS_NOCACHE_UACCESS
+
+@@ -50,9 +76,9 @@ static inline unsigned long __copy_from_
+ mm_segment_t old_fs = get_fs(); \
+ \
+ set_fs(KERNEL_DS); \
+- pagefault_disable(); \
++ raw_pagefault_disable(); \
+ ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
+- pagefault_enable(); \
++ raw_pagefault_enable(); \
+ set_fs(old_fs); \
+ ret; \
+ })
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3669,6 +3669,7 @@ unlock:
+ return 0;
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
+ void pagefault_disable(void)
+ {
+ inc_preempt_count();
+@@ -3697,6 +3698,7 @@ void pagefault_enable(void)
+ preempt_check_resched();
+ }
+ EXPORT_SYMBOL(pagefault_enable);
++#endif
+
+ /*
+ * By the time we get here, we already hold the mm semaphore
diff --git a/patches/peterz-srcu-crypto-chain.patch b/patches/peterz-srcu-crypto-chain.patch
new file mode 100644
index 0000000..93d29d0
--- /dev/null
+++ b/patches/peterz-srcu-crypto-chain.patch
@@ -0,0 +1,182 @@
+Subject: crypto: Convert crypto notifier chain to SRCU
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 05 Oct 2012 09:03:24 +0100
+
+The crypto notifier deadlocks on RT. Though this can be a real deadlock
+on mainline as well due to fifo fair rwsems.
+
+The involved parties here are:
+
+[ 82.172678] swapper/0 S 0000000000000001 0 1 0 0x00000000
+[ 82.172682] ffff88042f18fcf0 0000000000000046 ffff88042f18fc80 ffffffff81491238
+[ 82.172685] 0000000000011cc0 0000000000011cc0 ffff88042f18c040 ffff88042f18ffd8
+[ 82.172688] 0000000000011cc0 0000000000011cc0 ffff88042f18ffd8 0000000000011cc0
+[ 82.172689] Call Trace:
+[ 82.172697] [<ffffffff81491238>] ? _raw_spin_unlock_irqrestore+0x6c/0x7a
+[ 82.172701] [<ffffffff8148fd3f>] schedule+0x64/0x66
+[ 82.172704] [<ffffffff8148ec6b>] schedule_timeout+0x27/0xd0
+[ 82.172708] [<ffffffff81043c0c>] ? unpin_current_cpu+0x1a/0x6c
+[ 82.172713] [<ffffffff8106e491>] ? migrate_enable+0x12f/0x141
+[ 82.172716] [<ffffffff8148fbbd>] wait_for_common+0xbb/0x11f
+[ 82.172719] [<ffffffff810709f2>] ? try_to_wake_up+0x182/0x182
+[ 82.172722] [<ffffffff8148fc96>] wait_for_completion_interruptible+0x1d/0x2e
+[ 82.172726] [<ffffffff811debfd>] crypto_wait_for_test+0x49/0x6b
+[ 82.172728] [<ffffffff811ded32>] crypto_register_alg+0x53/0x5a
+[ 82.172730] [<ffffffff811ded6c>] crypto_register_algs+0x33/0x72
+[ 82.172734] [<ffffffff81ad7686>] ? aes_init+0x12/0x12
+[ 82.172737] [<ffffffff81ad76ea>] aesni_init+0x64/0x66
+[ 82.172741] [<ffffffff81000318>] do_one_initcall+0x7f/0x13b
+[ 82.172744] [<ffffffff81ac4d34>] kernel_init+0x199/0x22c
+[ 82.172747] [<ffffffff81ac44ef>] ? loglevel+0x31/0x31
+[ 82.172752] [<ffffffff814987c4>] kernel_thread_helper+0x4/0x10
+[ 82.172755] [<ffffffff81491574>] ? retint_restore_args+0x13/0x13
+[ 82.172759] [<ffffffff81ac4b9b>] ? start_kernel+0x3ca/0x3ca
+[ 82.172761] [<ffffffff814987c0>] ? gs_change+0x13/0x13
+
+[ 82.174186] cryptomgr_test S 0000000000000001 0 41 2 0x00000000
+[ 82.174189] ffff88042c971980 0000000000000046 ffffffff81d74830 0000000000000292
+[ 82.174192] 0000000000011cc0 0000000000011cc0 ffff88042c96eb80 ffff88042c971fd8
+[ 82.174195] 0000000000011cc0 0000000000011cc0 ffff88042c971fd8 0000000000011cc0
+[ 82.174195] Call Trace:
+[ 82.174198] [<ffffffff8148fd3f>] schedule+0x64/0x66
+[ 82.174201] [<ffffffff8148ec6b>] schedule_timeout+0x27/0xd0
+[ 82.174204] [<ffffffff81043c0c>] ? unpin_current_cpu+0x1a/0x6c
+[ 82.174206] [<ffffffff8106e491>] ? migrate_enable+0x12f/0x141
+[ 82.174209] [<ffffffff8148fbbd>] wait_for_common+0xbb/0x11f
+[ 82.174212] [<ffffffff810709f2>] ? try_to_wake_up+0x182/0x182
+[ 82.174215] [<ffffffff8148fc96>] wait_for_completion_interruptible+0x1d/0x2e
+[ 82.174218] [<ffffffff811e4883>] cryptomgr_notify+0x280/0x385
+[ 82.174221] [<ffffffff814943de>] notifier_call_chain+0x6b/0x98
+[ 82.174224] [<ffffffff8108a11c>] ? rt_down_read+0x10/0x12
+[ 82.174227] [<ffffffff810677cd>] __blocking_notifier_call_chain+0x70/0x8d
+[ 82.174230] [<ffffffff810677fe>] blocking_notifier_call_chain+0x14/0x16
+[ 82.174234] [<ffffffff811dd272>] crypto_probing_notify+0x24/0x50
+[ 82.174236] [<ffffffff811dd7a1>] crypto_alg_mod_lookup+0x3e/0x74
+[ 82.174238] [<ffffffff811dd949>] crypto_alloc_base+0x36/0x8f
+[ 82.174241] [<ffffffff811e9408>] cryptd_alloc_ablkcipher+0x6e/0xb5
+[ 82.174243] [<ffffffff811dd591>] ? kzalloc.clone.5+0xe/0x10
+[ 82.174246] [<ffffffff8103085d>] ablk_init_common+0x1d/0x38
+[ 82.174249] [<ffffffff8103852a>] ablk_ecb_init+0x15/0x17
+[ 82.174251] [<ffffffff811dd8c6>] __crypto_alloc_tfm+0xc7/0x114
+[ 82.174254] [<ffffffff811e0caa>] ? crypto_lookup_skcipher+0x1f/0xe4
+[ 82.174256] [<ffffffff811e0dcf>] crypto_alloc_ablkcipher+0x60/0xa5
+[ 82.174258] [<ffffffff811e5bde>] alg_test_skcipher+0x24/0x9b
+[ 82.174261] [<ffffffff8106d96d>] ? finish_task_switch+0x3f/0xfa
+[ 82.174263] [<ffffffff811e6b8e>] alg_test+0x16f/0x1d7
+[ 82.174267] [<ffffffff811e45ac>] ? cryptomgr_probe+0xac/0xac
+[ 82.174269] [<ffffffff811e45d8>] cryptomgr_test+0x2c/0x47
+[ 82.174272] [<ffffffff81061161>] kthread+0x7e/0x86
+[ 82.174275] [<ffffffff8106d9dd>] ? finish_task_switch+0xaf/0xfa
+[ 82.174278] [<ffffffff814987c4>] kernel_thread_helper+0x4/0x10
+[ 82.174281] [<ffffffff81491574>] ? retint_restore_args+0x13/0x13
+[ 82.174284] [<ffffffff810610e3>] ? __init_kthread_worker+0x8c/0x8c
+[ 82.174287] [<ffffffff814987c0>] ? gs_change+0x13/0x13
+
+[ 82.174329] cryptomgr_probe D 0000000000000002 0 47 2 0x00000000
+[ 82.174332] ffff88042c991b70 0000000000000046 ffff88042c991bb0 0000000000000006
+[ 82.174335] 0000000000011cc0 0000000000011cc0 ffff88042c98ed00 ffff88042c991fd8
+[ 82.174338] 0000000000011cc0 0000000000011cc0 ffff88042c991fd8 0000000000011cc0
+[ 82.174338] Call Trace:
+[ 82.174342] [<ffffffff8148fd3f>] schedule+0x64/0x66
+[ 82.174344] [<ffffffff814901ad>] __rt_mutex_slowlock+0x85/0xbe
+[ 82.174347] [<ffffffff814902d2>] rt_mutex_slowlock+0xec/0x159
+[ 82.174351] [<ffffffff81089c4d>] rt_mutex_fastlock.clone.8+0x29/0x2f
+[ 82.174353] [<ffffffff81490372>] rt_mutex_lock+0x33/0x37
+[ 82.174356] [<ffffffff8108a0f2>] __rt_down_read+0x50/0x5a
+[ 82.174358] [<ffffffff8108a11c>] ? rt_down_read+0x10/0x12
+[ 82.174360] [<ffffffff8108a11c>] rt_down_read+0x10/0x12
+[ 82.174363] [<ffffffff810677b5>] __blocking_notifier_call_chain+0x58/0x8d
+[ 82.174366] [<ffffffff810677fe>] blocking_notifier_call_chain+0x14/0x16
+[ 82.174369] [<ffffffff811dd272>] crypto_probing_notify+0x24/0x50
+[ 82.174372] [<ffffffff811debd6>] crypto_wait_for_test+0x22/0x6b
+[ 82.174374] [<ffffffff811decd3>] crypto_register_instance+0xb4/0xc0
+[ 82.174377] [<ffffffff811e9b76>] cryptd_create+0x378/0x3b6
+[ 82.174379] [<ffffffff811de512>] ? __crypto_lookup_template+0x5b/0x63
+[ 82.174382] [<ffffffff811e4545>] cryptomgr_probe+0x45/0xac
+[ 82.174385] [<ffffffff811e4500>] ? crypto_alloc_pcomp+0x1b/0x1b
+[ 82.174388] [<ffffffff81061161>] kthread+0x7e/0x86
+[ 82.174391] [<ffffffff8106d9dd>] ? finish_task_switch+0xaf/0xfa
+[ 82.174394] [<ffffffff814987c4>] kernel_thread_helper+0x4/0x10
+[ 82.174398] [<ffffffff81491574>] ? retint_restore_args+0x13/0x13
+[ 82.174401] [<ffffffff810610e3>] ? __init_kthread_worker+0x8c/0x8c
+[ 82.174403] [<ffffffff814987c0>] ? gs_change+0x13/0x13
+
+cryptomgr_test spawns the cryptomgr_probe thread from the notifier
+call. The probe thread fires the same notifier as the test thread and
+deadlocks on the rwsem on RT.
+
+Now this is a potential deadlock in mainline as well, because we have
+fifo fair rwsems. If another thread blocks with a down_write() on the
+notifier chain before the probe thread issues the down_read() it will
+block the probe thread and the whole party is dead locked.
+
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ crypto/algapi.c | 4 ++--
+ crypto/api.c | 6 +++---
+ crypto/internal.h | 4 ++--
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/crypto/algapi.c
++++ b/crypto/algapi.c
+@@ -683,13 +683,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2);
+
+ int crypto_register_notifier(struct notifier_block *nb)
+ {
+- return blocking_notifier_chain_register(&crypto_chain, nb);
++ return srcu_notifier_chain_register(&crypto_chain, nb);
+ }
+ EXPORT_SYMBOL_GPL(crypto_register_notifier);
+
+ int crypto_unregister_notifier(struct notifier_block *nb)
+ {
+- return blocking_notifier_chain_unregister(&crypto_chain, nb);
++ return srcu_notifier_chain_unregister(&crypto_chain, nb);
+ }
+ EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
+
+--- a/crypto/api.c
++++ b/crypto/api.c
+@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list);
+ DECLARE_RWSEM(crypto_alg_sem);
+ EXPORT_SYMBOL_GPL(crypto_alg_sem);
+
+-BLOCKING_NOTIFIER_HEAD(crypto_chain);
++SRCU_NOTIFIER_HEAD(crypto_chain);
+ EXPORT_SYMBOL_GPL(crypto_chain);
+
+ static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
+@@ -237,10 +237,10 @@ int crypto_probing_notify(unsigned long
+ {
+ int ok;
+
+- ok = blocking_notifier_call_chain(&crypto_chain, val, v);
++ ok = srcu_notifier_call_chain(&crypto_chain, val, v);
+ if (ok == NOTIFY_DONE) {
+ request_module("cryptomgr");
+- ok = blocking_notifier_call_chain(&crypto_chain, val, v);
++ ok = srcu_notifier_call_chain(&crypto_chain, val, v);
+ }
+
+ return ok;
+--- a/crypto/internal.h
++++ b/crypto/internal.h
+@@ -48,7 +48,7 @@ struct crypto_larval {
+
+ extern struct list_head crypto_alg_list;
+ extern struct rw_semaphore crypto_alg_sem;
+-extern struct blocking_notifier_head crypto_chain;
++extern struct srcu_notifier_head crypto_chain;
+
+ #ifdef CONFIG_PROC_FS
+ void __init crypto_init_proc(void);
+@@ -136,7 +136,7 @@ static inline int crypto_is_moribund(str
+
+ static inline void crypto_notify(unsigned long val, void *v)
+ {
+- blocking_notifier_call_chain(&crypto_chain, val, v);
++ srcu_notifier_call_chain(&crypto_chain, val, v);
+ }
+
+ #endif /* _CRYPTO_INTERNAL_H */
diff --git a/patches/pid-h-include-atomic-h.patch b/patches/pid-h-include-atomic-h.patch
new file mode 100644
index 0000000..61dcbae
--- /dev/null
+++ b/patches/pid-h-include-atomic-h.patch
@@ -0,0 +1,19 @@
+Subject: rwsem-inlcude-fix.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 15 Jul 2011 21:24:27 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/pid.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/linux/pid.h
++++ b/include/linux/pid.h
+@@ -2,6 +2,7 @@
+ #define _LINUX_PID_H
+
+ #include <linux/rcupdate.h>
++#include <linux/atomic.h>
+
+ enum pid_type
+ {
diff --git a/patches/ping-sysrq.patch b/patches/ping-sysrq.patch
new file mode 100644
index 0000000..30ce365
--- /dev/null
+++ b/patches/ping-sysrq.patch
@@ -0,0 +1,121 @@
+Subject: net: sysrq via icmp
+From: Carsten Emde <C.Emde@osadl.org>
+Date: Tue, 19 Jul 2011 13:51:17 +0100
+
+There are (probably rare) situations when a system crashed and the system
+console becomes unresponsive but the network icmp layer still is alive.
+Wouldn't it be wonderful, if we then could submit a sysreq command via ping?
+
+This patch provides this facility. Please consult the updated documentation
+Documentation/sysrq.txt for details.
+
+Signed-off-by: Carsten Emde <C.Emde@osadl.org>
+
+---
+ Documentation/sysrq.txt | 11 +++++++++--
+ include/net/netns/ipv4.h | 1 +
+ net/ipv4/icmp.c | 30 ++++++++++++++++++++++++++++++
+ net/ipv4/sysctl_net_ipv4.c | 7 +++++++
+ 4 files changed, 47 insertions(+), 2 deletions(-)
+
+--- a/Documentation/sysrq.txt
++++ b/Documentation/sysrq.txt
+@@ -57,10 +57,17 @@ On PowerPC - Press 'ALT - Print Screen (
+ On other - If you know of the key combos for other architectures, please
+ let me know so I can add them to this section.
+
+-On all - write a character to /proc/sysrq-trigger. e.g.:
+-
++On all - write a character to /proc/sysrq-trigger, e.g.:
+ echo t > /proc/sysrq-trigger
+
++On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
++ echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
++ Send an ICMP echo request with this pattern plus the particular
++ SysRq command key. Example:
++ # ping -c1 -s57 -p0102030468
++ will trigger the SysRq-H (help) command.
++
++
+ * What are the 'command' keys?
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ 'b' - Will immediately reboot the system without syncing or unmounting
+--- a/include/net/netns/ipv4.h
++++ b/include/net/netns/ipv4.h
+@@ -56,6 +56,7 @@ struct netns_ipv4 {
+
+ int sysctl_icmp_echo_ignore_all;
+ int sysctl_icmp_echo_ignore_broadcasts;
++ int sysctl_icmp_echo_sysrq;
+ int sysctl_icmp_ignore_bogus_error_responses;
+ int sysctl_icmp_ratelimit;
+ int sysctl_icmp_ratemask;
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -69,6 +69,7 @@
+ #include <linux/jiffies.h>
+ #include <linux/kernel.h>
+ #include <linux/fcntl.h>
++#include <linux/sysrq.h>
+ #include <linux/socket.h>
+ #include <linux/in.h>
+ #include <linux/inet.h>
+@@ -768,6 +769,30 @@ static void icmp_redirect(struct sk_buff
+ }
+
+ /*
++ * 32bit and 64bit have different timestamp length, so we check for
++ * the cookie at offset 20 and verify it is repeated at offset 50
++ */
++#define CO_POS0 20
++#define CO_POS1 50
++#define CO_SIZE sizeof(int)
++#define ICMP_SYSRQ_SIZE 57
++
++/*
++ * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
++ * pattern and if it matches send the next byte as a trigger to sysrq.
++ */
++static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
++{
++ int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
++ char *p = skb->data;
++
++ if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
++ !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
++ p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
++ handle_sysrq(p[CO_POS0 + CO_SIZE]);
++}
++
++/*
+ * Handle ICMP_ECHO ("ping") requests.
+ *
+ * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
+@@ -794,6 +819,11 @@ static void icmp_echo(struct sk_buff *sk
+ icmp_param.data_len = skb->len;
+ icmp_param.head_len = sizeof(struct icmphdr);
+ icmp_reply(&icmp_param, skb);
++
++ if (skb->len == ICMP_SYSRQ_SIZE &&
++ net->ipv4.sysctl_icmp_echo_sysrq) {
++ icmp_check_sysrq(net, skb);
++ }
+ }
+ }
+
+--- a/net/ipv4/sysctl_net_ipv4.c
++++ b/net/ipv4/sysctl_net_ipv4.c
+@@ -815,6 +815,13 @@ static struct ctl_table ipv4_net_table[]
+ .proc_handler = proc_dointvec
+ },
+ {
++ .procname = "icmp_echo_sysrq",
++ .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec
++ },
++ {
+ .procname = "icmp_ignore_bogus_error_responses",
+ .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
+ .maxlen = sizeof(int),
diff --git a/patches/posix-timers-avoid-wakeups-when-no-timers-are-active.patch b/patches/posix-timers-avoid-wakeups-when-no-timers-are-active.patch
new file mode 100644
index 0000000..c92c2f6
--- /dev/null
+++ b/patches/posix-timers-avoid-wakeups-when-no-timers-are-active.patch
@@ -0,0 +1,57 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:44 -0500
+Subject: posix-timers: Avoid wakeups when no timers are active
+
+Waking the thread even when no timers are scheduled is useless.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/posix-cpu-timers.c | 21 ++++++++++++++++++---
+ 1 file changed, 18 insertions(+), 3 deletions(-)
+
+--- a/kernel/posix-cpu-timers.c
++++ b/kernel/posix-cpu-timers.c
+@@ -1387,6 +1387,21 @@ wait_to_die:
+ return 0;
+ }
+
++static inline int __fastpath_timer_check(struct task_struct *tsk)
++{
++ /* tsk == current, ensure it is safe to use ->signal/sighand */
++ if (unlikely(tsk->exit_state))
++ return 0;
++
++ if (!task_cputime_zero(&tsk->cputime_expires))
++ return 1;
++
++ if (!task_cputime_zero(&tsk->signal->cputime_expires))
++ return 1;
++
++ return 0;
++}
++
+ void run_posix_cpu_timers(struct task_struct *tsk)
+ {
+ unsigned long cpu = smp_processor_id();
+@@ -1399,7 +1414,7 @@ void run_posix_cpu_timers(struct task_st
+ tasklist = per_cpu(posix_timer_tasklist, cpu);
+
+ /* check to see if we're already queued */
+- if (!tsk->posix_timer_list) {
++ if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
+ get_task_struct(tsk);
+ if (tasklist) {
+ tsk->posix_timer_list = tasklist;
+@@ -1411,9 +1426,9 @@ void run_posix_cpu_timers(struct task_st
+ tsk->posix_timer_list = tsk;
+ }
+ per_cpu(posix_timer_tasklist, cpu) = tsk;
++
++ wake_up_process(per_cpu(posix_timer_task, cpu));
+ }
+- /* XXX signal the thread somehow */
+- wake_up_process(per_cpu(posix_timer_task, cpu));
+ }
+
+ /*
diff --git a/patches/posix-timers-no-broadcast.patch b/patches/posix-timers-no-broadcast.patch
new file mode 100644
index 0000000..e0d9d76
--- /dev/null
+++ b/patches/posix-timers-no-broadcast.patch
@@ -0,0 +1,33 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:29:20 -0500
+Subject: posix-timers: Prevent broadcast signals
+
+Posix timers should not send broadcast signals and kernel only
+signals. Prevent it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/posix-timers.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/posix-timers.c
++++ b/kernel/posix-timers.c
+@@ -439,6 +439,7 @@ static enum hrtimer_restart posix_timer_
+ static struct pid *good_sigevent(sigevent_t * event)
+ {
+ struct task_struct *rtn = current->group_leader;
++ int sig = event->sigev_signo;
+
+ if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+ (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
+@@ -447,7 +448,8 @@ static struct pid *good_sigevent(sigeven
+ return NULL;
+
+ if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
+- ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
++ (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
++ sig_kernel_coredump(sig)))
+ return NULL;
+
+ return task_pid(rtn);
diff --git a/patches/posix-timers-shorten-cpu-timers-thread.patch b/patches/posix-timers-shorten-cpu-timers-thread.patch
new file mode 100644
index 0000000..52ff469
--- /dev/null
+++ b/patches/posix-timers-shorten-cpu-timers-thread.patch
@@ -0,0 +1,26 @@
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+Date: Fri, 3 Jul 2009 08:30:00 -0500
+Subject: posix-timers: Shorten posix_cpu_timers/<CPU> kernel thread names
+
+Shorten the softirq kernel thread names because they always overflow the
+limited comm length, appearing as "posix_cpu_timer" CPU# times.
+
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/posix-cpu-timers.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/posix-cpu-timers.c
++++ b/kernel/posix-cpu-timers.c
+@@ -1430,7 +1430,7 @@ static int posix_cpu_thread_call(struct
+ switch (action) {
+ case CPU_UP_PREPARE:
+ p = kthread_create(posix_cpu_timers_thread, hcpu,
+- "posix_cpu_timers/%d",cpu);
++ "posixcputmr/%d",cpu);
+ if (IS_ERR(p))
+ return NOTIFY_BAD;
+ p->flags |= PF_NOFREEZE;
diff --git a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch
new file mode 100644
index 0000000..91fb81c
--- /dev/null
+++ b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch
@@ -0,0 +1,303 @@
+From: John Stultz <johnstul@us.ibm.com>
+Date: Fri, 3 Jul 2009 08:29:58 -0500
+Subject: posix-timers: thread posix-cpu-timers on -rt
+
+posix-cpu-timer code takes non -rt safe locks in hard irq
+context. Move it to a thread.
+
+[ 3.0 fixes from Peter Zijlstra <peterz@infradead.org> ]
+
+Signed-off-by: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/init_task.h | 7 +
+ include/linux/sched.h | 3
+ init/main.c | 1
+ kernel/fork.c | 3
+ kernel/posix-cpu-timers.c | 182 ++++++++++++++++++++++++++++++++++++++++++++--
+ 5 files changed, 190 insertions(+), 6 deletions(-)
+
+--- a/include/linux/init_task.h
++++ b/include/linux/init_task.h
+@@ -141,6 +141,12 @@ extern struct task_group root_task_group
+ # define INIT_PERF_EVENTS(tsk)
+ #endif
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++# define INIT_TIMER_LIST .posix_timer_list = NULL,
++#else
++# define INIT_TIMER_LIST
++#endif
++
+ #define INIT_TASK_COMM "swapper"
+
+ /*
+@@ -196,6 +202,7 @@ extern struct task_group root_task_group
+ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
+ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
+ .timer_slack_ns = 50000, /* 50 usec default slack */ \
++ INIT_TIMER_LIST \
+ .pids = { \
+ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
+ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1377,6 +1377,9 @@ struct task_struct {
+
+ struct task_cputime cputime_expires;
+ struct list_head cpu_timers[3];
++#ifdef CONFIG_PREEMPT_RT_BASE
++ struct task_struct *posix_timer_list;
++#endif
+
+ /* process credentials */
+ const struct cred __rcu *real_cred; /* objective and real subjective task
+--- a/init/main.c
++++ b/init/main.c
+@@ -70,6 +70,7 @@
+ #include <linux/perf_event.h>
+ #include <linux/file.h>
+ #include <linux/ptrace.h>
++#include <linux/posix-timers.h>
+
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1112,6 +1112,9 @@ void mm_init_owner(struct mm_struct *mm,
+ */
+ static void posix_cpu_timers_init(struct task_struct *tsk)
+ {
++#ifdef CONFIG_PREEMPT_RT_BASE
++ tsk->posix_timer_list = NULL;
++#endif
+ tsk->cputime_expires.prof_exp = 0;
+ tsk->cputime_expires.virt_exp = 0;
+ tsk->cputime_expires.sched_exp = 0;
+--- a/kernel/posix-cpu-timers.c
++++ b/kernel/posix-cpu-timers.c
+@@ -661,7 +661,7 @@ static int posix_cpu_timer_set(struct k_
+ /*
+ * Disarm any old timer after extracting its expiry time.
+ */
+- BUG_ON(!irqs_disabled());
++ BUG_ON_NONRT(!irqs_disabled());
+
+ ret = 0;
+ old_incr = timer->it.cpu.incr;
+@@ -1177,7 +1177,7 @@ void posix_cpu_timer_schedule(struct k_i
+ /*
+ * Now re-arm for the new expiry time.
+ */
+- BUG_ON(!irqs_disabled());
++ BUG_ON_NONRT(!irqs_disabled());
+ arm_timer(timer);
+ spin_unlock(&p->sighand->siglock);
+
+@@ -1241,10 +1241,11 @@ static inline int fastpath_timer_check(s
+ sig = tsk->signal;
+ if (sig->cputimer.running) {
+ struct task_cputime group_sample;
++ unsigned long flags;
+
+- raw_spin_lock(&sig->cputimer.lock);
++ raw_spin_lock_irqsave(&sig->cputimer.lock, flags);
+ group_sample = sig->cputimer.cputime;
+- raw_spin_unlock(&sig->cputimer.lock);
++ raw_spin_unlock_irqrestore(&sig->cputimer.lock, flags);
+
+ if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+ return 1;
+@@ -1258,13 +1259,13 @@ static inline int fastpath_timer_check(s
+ * already updated our counts. We need to check if any timers fire now.
+ * Interrupts are disabled.
+ */
+-void run_posix_cpu_timers(struct task_struct *tsk)
++static void __run_posix_cpu_timers(struct task_struct *tsk)
+ {
+ LIST_HEAD(firing);
+ struct k_itimer *timer, *next;
+ unsigned long flags;
+
+- BUG_ON(!irqs_disabled());
++ BUG_ON_NONRT(!irqs_disabled());
+
+ /*
+ * The fast path checks that there are no expired thread or thread
+@@ -1322,6 +1323,175 @@ void run_posix_cpu_timers(struct task_st
+ }
+ }
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++#include <linux/kthread.h>
++#include <linux/cpu.h>
++DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
++DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
++
++static int posix_cpu_timers_thread(void *data)
++{
++ int cpu = (long)data;
++
++ BUG_ON(per_cpu(posix_timer_task,cpu) != current);
++
++ while (!kthread_should_stop()) {
++ struct task_struct *tsk = NULL;
++ struct task_struct *next = NULL;
++
++ if (cpu_is_offline(cpu))
++ goto wait_to_die;
++
++ /* grab task list */
++ raw_local_irq_disable();
++ tsk = per_cpu(posix_timer_tasklist, cpu);
++ per_cpu(posix_timer_tasklist, cpu) = NULL;
++ raw_local_irq_enable();
++
++ /* its possible the list is empty, just return */
++ if (!tsk) {
++ set_current_state(TASK_INTERRUPTIBLE);
++ schedule();
++ __set_current_state(TASK_RUNNING);
++ continue;
++ }
++
++ /* Process task list */
++ while (1) {
++ /* save next */
++ next = tsk->posix_timer_list;
++
++ /* run the task timers, clear its ptr and
++ * unreference it
++ */
++ __run_posix_cpu_timers(tsk);
++ tsk->posix_timer_list = NULL;
++ put_task_struct(tsk);
++
++ /* check if this is the last on the list */
++ if (next == tsk)
++ break;
++ tsk = next;
++ }
++ }
++ return 0;
++
++wait_to_die:
++ /* Wait for kthread_stop */
++ set_current_state(TASK_INTERRUPTIBLE);
++ while (!kthread_should_stop()) {
++ schedule();
++ set_current_state(TASK_INTERRUPTIBLE);
++ }
++ __set_current_state(TASK_RUNNING);
++ return 0;
++}
++
++void run_posix_cpu_timers(struct task_struct *tsk)
++{
++ unsigned long cpu = smp_processor_id();
++ struct task_struct *tasklist;
++
++ BUG_ON(!irqs_disabled());
++ if(!per_cpu(posix_timer_task, cpu))
++ return;
++ /* get per-cpu references */
++ tasklist = per_cpu(posix_timer_tasklist, cpu);
++
++ /* check to see if we're already queued */
++ if (!tsk->posix_timer_list) {
++ get_task_struct(tsk);
++ if (tasklist) {
++ tsk->posix_timer_list = tasklist;
++ } else {
++ /*
++ * The list is terminated by a self-pointing
++ * task_struct
++ */
++ tsk->posix_timer_list = tsk;
++ }
++ per_cpu(posix_timer_tasklist, cpu) = tsk;
++ }
++ /* XXX signal the thread somehow */
++ wake_up_process(per_cpu(posix_timer_task, cpu));
++}
++
++/*
++ * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
++ * Here we can start up the necessary migration thread for the new CPU.
++ */
++static int posix_cpu_thread_call(struct notifier_block *nfb,
++ unsigned long action, void *hcpu)
++{
++ int cpu = (long)hcpu;
++ struct task_struct *p;
++ struct sched_param param;
++
++ switch (action) {
++ case CPU_UP_PREPARE:
++ p = kthread_create(posix_cpu_timers_thread, hcpu,
++ "posix_cpu_timers/%d",cpu);
++ if (IS_ERR(p))
++ return NOTIFY_BAD;
++ p->flags |= PF_NOFREEZE;
++ kthread_bind(p, cpu);
++ /* Must be high prio to avoid getting starved */
++ param.sched_priority = MAX_RT_PRIO-1;
++ sched_setscheduler(p, SCHED_FIFO, &param);
++ per_cpu(posix_timer_task,cpu) = p;
++ break;
++ case CPU_ONLINE:
++ /* Strictly unneccessary, as first user will wake it. */
++ wake_up_process(per_cpu(posix_timer_task,cpu));
++ break;
++#ifdef CONFIG_HOTPLUG_CPU
++ case CPU_UP_CANCELED:
++ /* Unbind it from offline cpu so it can run. Fall thru. */
++ kthread_bind(per_cpu(posix_timer_task, cpu),
++ cpumask_any(cpu_online_mask));
++ kthread_stop(per_cpu(posix_timer_task,cpu));
++ per_cpu(posix_timer_task,cpu) = NULL;
++ break;
++ case CPU_DEAD:
++ kthread_stop(per_cpu(posix_timer_task,cpu));
++ per_cpu(posix_timer_task,cpu) = NULL;
++ break;
++#endif
++ }
++ return NOTIFY_OK;
++}
++
++/* Register at highest priority so that task migration (migrate_all_tasks)
++ * happens before everything else.
++ */
++static struct notifier_block posix_cpu_thread_notifier = {
++ .notifier_call = posix_cpu_thread_call,
++ .priority = 10
++};
++
++static int __init posix_cpu_thread_init(void)
++{
++ void *hcpu = (void *)(long)smp_processor_id();
++ /* Start one for boot CPU. */
++ unsigned long cpu;
++
++ /* init the per-cpu posix_timer_tasklets */
++ for_each_possible_cpu(cpu)
++ per_cpu(posix_timer_tasklist, cpu) = NULL;
++
++ posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
++ posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
++ register_cpu_notifier(&posix_cpu_thread_notifier);
++ return 0;
++}
++early_initcall(posix_cpu_thread_init);
++#else /* CONFIG_PREEMPT_RT_BASE */
++void run_posix_cpu_timers(struct task_struct *tsk)
++{
++ __run_posix_cpu_timers(tsk);
++}
++#endif /* CONFIG_PREEMPT_RT_BASE */
++
+ /*
+ * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
+ * The tsk->sighand->siglock must be held by the caller.
diff --git a/patches/power-disable-highmem-on-rt.patch b/patches/power-disable-highmem-on-rt.patch
new file mode 100644
index 0000000..445bcf8
--- /dev/null
+++ b/patches/power-disable-highmem-on-rt.patch
@@ -0,0 +1,20 @@
+Subject: power-disable-highmem-on-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 17:08:34 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/powerpc/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -291,7 +291,7 @@ menu "Kernel options"
+
+ config HIGHMEM
+ bool "High memory support"
+- depends on PPC32
++ depends on PPC32 && !PREEMPT_RT_FULL
+
+ source kernel/Kconfig.hz
+ source kernel/Kconfig.preempt
diff --git a/patches/power-use-generic-rwsem-on-rt.patch b/patches/power-use-generic-rwsem-on-rt.patch
new file mode 100644
index 0000000..bb60e58
--- /dev/null
+++ b/patches/power-use-generic-rwsem-on-rt.patch
@@ -0,0 +1,23 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Subject: Powerpc: Use generic rwsem on RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/powerpc/Kconfig | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -60,10 +60,11 @@ config LOCKDEP_SUPPORT
+
+ config RWSEM_GENERIC_SPINLOCK
+ bool
++ default y if PREEMPT_RT_FULL
+
+ config RWSEM_XCHGADD_ALGORITHM
+ bool
+- default y
++ default y if !PREEMPT_RT_FULL
+
+ config GENERIC_LOCKBREAK
+ bool
diff --git a/patches/powerpc-fsl-msi-use-a-different-locklcass-for-the-ca.patch b/patches/powerpc-fsl-msi-use-a-different-locklcass-for-the-ca.patch
new file mode 100644
index 0000000..982d9c6
--- /dev/null
+++ b/patches/powerpc-fsl-msi-use-a-different-locklcass-for-the-ca.patch
@@ -0,0 +1,35 @@
+From bfc2bc8577d31ad04ae2f0619e50794e7cde9536 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 21 Mar 2013 10:00:28 +0100
+Subject: [PATCH 1/3] powerpc/fsl-msi: use a different locklcass for the
+ cascade interrupt
+
+lockdep thinks that it might deadlock because it grabs a lock of the
+same class while calling the generic_irq_handler(). This annotation will
+inform lockdep that it will not.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ arch/powerpc/sysdev/fsl_msi.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/sysdev/fsl_msi.c
++++ b/arch/powerpc/sysdev/fsl_msi.c
+@@ -333,6 +333,8 @@ static int fsl_of_msi_remove(struct plat
+ return 0;
+ }
+
++static struct lock_class_key fsl_msi_irq_class;
++
+ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
+ int offset, int irq_index)
+ {
+@@ -351,7 +353,7 @@ static int fsl_msi_setup_hwirq(struct fs
+ dev_err(&dev->dev, "No memory for MSI cascade data\n");
+ return -ENOMEM;
+ }
+-
++ irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
+ msi->msi_virqs[irq_index] = virt_msir;
+ cascade_data->index = offset;
+ cascade_data->msi_data = msi;
diff --git a/patches/powerpc-preempt-lazy-support.patch b/patches/powerpc-preempt-lazy-support.patch
new file mode 100644
index 0000000..1e0c13f
--- /dev/null
+++ b/patches/powerpc-preempt-lazy-support.patch
@@ -0,0 +1,166 @@
+Subject: powerpc-preempt-lazy-support.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 01 Nov 2012 10:14:11 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/powerpc/Kconfig | 1 +
+ arch/powerpc/include/asm/thread_info.h | 10 ++++++++--
+ arch/powerpc/kernel/asm-offsets.c | 1 +
+ arch/powerpc/kernel/entry_32.S | 17 ++++++++++++-----
+ arch/powerpc/kernel/entry_64.S | 12 +++++++++---
+ 5 files changed, 31 insertions(+), 10 deletions(-)
+
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -142,6 +142,7 @@ config PPC
+ select GENERIC_CLOCKEVENTS
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
++ select HAVE_PREEMPT_LAZY
+ select HAVE_MOD_ARCH_SPECIFIC
+ select MODULES_USE_ELF_RELA
+ select CLONE_BACKWARDS
+--- a/arch/powerpc/include/asm/thread_info.h
++++ b/arch/powerpc/include/asm/thread_info.h
+@@ -43,6 +43,8 @@ struct thread_info {
+ int cpu; /* cpu we're on */
+ int preempt_count; /* 0 => preemptable,
+ <0 => BUG */
++ int preempt_lazy_count; /* 0 => preemptable,
++ <0 => BUG */
+ struct restart_block restart_block;
+ unsigned long local_flags; /* private flags for thread */
+
+@@ -97,7 +99,7 @@ static inline struct thread_info *curren
+ #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */
+ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
+ #define TIF_SINGLESTEP 8 /* singlestepping active */
+-#define TIF_MEMDIE 9 /* is terminating due to OOM killer */
++#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
+ #define TIF_SECCOMP 10 /* secure computing */
+ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
+ #define TIF_NOERROR 12 /* Force successful syscall return */
+@@ -106,6 +108,7 @@ static inline struct thread_info *curren
+ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */
+ #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation
+ for stack store? */
++#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
+
+ /* as above, but as bit values */
+ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+@@ -124,12 +127,15 @@ static inline struct thread_info *curren
+ #define _TIF_UPROBE (1<<TIF_UPROBE)
+ #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+ #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
++#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
+ #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
+
+ #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+- _TIF_NOTIFY_RESUME | _TIF_UPROBE)
++ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
++ _TIF_NEED_RESCHED_LAZY)
+ #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
+ /* Bits in local_flags */
+ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -124,6 +124,7 @@ int main(void)
+ DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+ DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
+ DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
++ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
+ DEFINE(TI_TASK, offsetof(struct thread_info, task));
+ DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+
+--- a/arch/powerpc/kernel/entry_32.S
++++ b/arch/powerpc/kernel/entry_32.S
+@@ -892,7 +892,14 @@ resume_kernel:
+ cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
+ bne restore
+ andi. r8,r8,_TIF_NEED_RESCHED
++ bne+ 1f
++ lwz r0,TI_PREEMPT_LAZY(r9)
++ cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
++ bne restore
++ lwz r0,TI_FLAGS(r9)
++ andi. r0,r0,_TIF_NEED_RESCHED_LAZY
+ beq+ restore
++1:
+ lwz r3,_MSR(r1)
+ andi. r0,r3,MSR_EE /* interrupts off? */
+ beq restore /* don't schedule if so */
+@@ -903,11 +910,11 @@ resume_kernel:
+ */
+ bl trace_hardirqs_off
+ #endif
+-1: bl preempt_schedule_irq
++2: bl preempt_schedule_irq
+ CURRENT_THREAD_INFO(r9, r1)
+ lwz r3,TI_FLAGS(r9)
+- andi. r0,r3,_TIF_NEED_RESCHED
+- bne- 1b
++ andi. r0,r3,_TIF_NEED_RESCHED_MASK
++ bne- 2b
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ /* And now, to properly rebalance the above, we tell lockdep they
+ * are being turned back on, which will happen when we return
+@@ -1228,7 +1235,7 @@ global_dbcr0:
+ #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
+
+ do_work: /* r10 contains MSR_KERNEL here */
+- andi. r0,r9,_TIF_NEED_RESCHED
++ andi. r0,r9,_TIF_NEED_RESCHED_MASK
+ beq do_user_signal
+
+ do_resched: /* r10 contains MSR_KERNEL here */
+@@ -1249,7 +1256,7 @@ recheck:
+ MTMSRD(r10) /* disable interrupts */
+ CURRENT_THREAD_INFO(r9, r1)
+ lwz r9,TI_FLAGS(r9)
+- andi. r0,r9,_TIF_NEED_RESCHED
++ andi. r0,r9,_TIF_NEED_RESCHED_MASK
+ bne- do_resched
+ andi. r0,r9,_TIF_USER_WORK_MASK
+ beq restore_user
+--- a/arch/powerpc/kernel/entry_64.S
++++ b/arch/powerpc/kernel/entry_64.S
+@@ -592,7 +592,7 @@ _GLOBAL(ret_from_except_lite)
+ andi. r0,r4,_TIF_USER_WORK_MASK
+ beq restore
+
+- andi. r0,r4,_TIF_NEED_RESCHED
++ andi. r0,r4,_TIF_NEED_RESCHED_MASK
+ beq 1f
+ bl .restore_interrupts
+ bl .schedule
+@@ -642,10 +642,16 @@ resume_kernel:
+
+ #ifdef CONFIG_PREEMPT
+ /* Check if we need to preempt */
++ lwz r8,TI_PREEMPT(r9)
+ andi. r0,r4,_TIF_NEED_RESCHED
++ bne+ check_count
++
++ andi. r0,r4,_TIF_NEED_RESCHED_LAZY
+ beq+ restore
++ lwz r8,TI_PREEMPT_LAZY(r9)
++
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+- lwz r8,TI_PREEMPT(r9)
++check_count:
+ cmpwi cr1,r8,0
+ ld r0,SOFTE(r1)
+ cmpdi r0,0
+@@ -662,7 +668,7 @@ resume_kernel:
+ /* Re-test flags and eventually loop */
+ CURRENT_THREAD_INFO(r9, r1)
+ ld r4,TI_FLAGS(r9)
+- andi. r0,r4,_TIF_NEED_RESCHED
++ andi. r0,r4,_TIF_NEED_RESCHED_MASK
+ bne 1b
+
+ /*
diff --git a/patches/ppc-mark-low-level-handlers-no-thread.patch b/patches/ppc-mark-low-level-handlers-no-thread.patch
new file mode 100644
index 0000000..f4471a6
--- /dev/null
+++ b/patches/ppc-mark-low-level-handlers-no-thread.patch
@@ -0,0 +1,35 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jun 2012 19:53:17 +0200
+Subject: powerpc: Mark low level irq handlers NO_THREAD
+
+These low level handlers cannot be threaded. Mark them NO_THREAD
+
+Reported-by: leroy christophe <christophe.leroy@c-s.fr>
+Tested-by: leroy christophe <christophe.leroy@c-s.fr>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/powerpc/platforms/8xx/m8xx_setup.c | 1 +
+ arch/powerpc/sysdev/cpm1.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
++++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
+@@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(in
+
+ static struct irqaction tbint_irqaction = {
+ .handler = timebase_interrupt,
++ .flags = IRQF_NO_THREAD,
+ .name = "tbint",
+ };
+
+--- a/arch/powerpc/sysdev/cpm1.c
++++ b/arch/powerpc/sysdev/cpm1.c
+@@ -120,6 +120,7 @@ static irqreturn_t cpm_error_interrupt(i
+
+ static struct irqaction cpm_error_irqaction = {
+ .handler = cpm_error_interrupt,
++ .flags = IRQF_NO_THREAD,
+ .name = "error",
+ };
+
diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch
new file mode 100644
index 0000000..8a3cfa1
--- /dev/null
+++ b/patches/preempt-lazy-support.patch
@@ -0,0 +1,581 @@
+Subject: sched: Add support for lazy preemption
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 26 Oct 2012 18:50:54 +0100
+
+It has become an obsession to mitigate the determinism vs. throughput
+loss of RT. Looking at the mainline semantics of preemption points
+gives a hint why RT sucks throughput wise for ordinary SCHED_OTHER
+tasks. One major issue is the wakeup of tasks which are right away
+preempting the waking task while the waking task holds a lock on which
+the woken task will block right after having preempted the wakee. In
+mainline this is prevented due to the implicit preemption disable of
+spin/rw_lock held regions. On RT this is not possible due to the fully
+preemptible nature of sleeping spinlocks.
+
+Though for a SCHED_OTHER task preempting another SCHED_OTHER task this
+is really not a correctness issue. RT folks are concerned about
+SCHED_FIFO/RR tasks preemption and not about the purely fairness
+driven SCHED_OTHER preemption latencies.
+
+So I introduced a lazy preemption mechanism which only applies to
+SCHED_OTHER tasks preempting another SCHED_OTHER task. Aside of the
+existing preempt_count each tasks sports now a preempt_lazy_count
+which is manipulated on lock acquiry and release. This is slightly
+incorrect as for lazyness reasons I coupled this on
+migrate_disable/enable so some other mechanisms get the same treatment
+(e.g. get_cpu_light).
+
+Now on the scheduler side instead of setting NEED_RESCHED this sets
+NEED_RESCHED_LAZY in case of a SCHED_OTHER/SCHED_OTHER preemption and
+therefor allows to exit the waking task the lock held region before
+the woken task preempts. That also works better for cross CPU wakeups
+as the other side can stay in the adaptive spinning loop.
+
+For RT class preemption there is no change. This simply sets
+NEED_RESCHED and forgoes the lazy preemption counter.
+
+ Initial test do not expose any observable latency increasement, but
+history shows that I've been proven wrong before :)
+
+The lazy preemption mode is per default on, but with
+CONFIG_SCHED_DEBUG enabled it can be disabled via:
+
+ # echo NO_PREEMPT_LAZY >/sys/kernel/debug/sched_features
+
+and reenabled via
+
+ # echo PREEMPT_LAZY >/sys/kernel/debug/sched_features
+
+The test results so far are very machine and workload dependent, but
+there is a clear trend that it enhances the non RT workload
+performance.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/ftrace_event.h | 1
+ include/linux/preempt.h | 38 ++++++++++++++++++++++++++-
+ include/linux/sched.h | 51 ++++++++++++++++++++++++++++++++----
+ kernel/Kconfig.preempt | 6 ++++
+ kernel/sched/core.c | 60 ++++++++++++++++++++++++++++++++++++++++++-
+ kernel/sched/fair.c | 16 +++++------
+ kernel/sched/features.h | 3 ++
+ kernel/sched/sched.h | 9 ++++++
+ kernel/trace/trace.c | 41 +++++++++++++++++------------
+ kernel/trace/trace.h | 2 +
+ kernel/trace/trace_output.c | 13 +++++++--
+ 11 files changed, 206 insertions(+), 34 deletions(-)
+
+--- a/include/linux/ftrace_event.h
++++ b/include/linux/ftrace_event.h
+@@ -51,6 +51,7 @@ struct trace_entry {
+ int pid;
+ unsigned short migrate_disable;
+ unsigned short padding;
++ unsigned char preempt_lazy_count;
+ };
+
+ #define FTRACE_MAX_EVENT \
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -23,15 +23,38 @@
+
+ #define preempt_count() (current_thread_info()->preempt_count)
+
++#ifdef CONFIG_PREEMPT_LAZY
++#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
++#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
++#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
++#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
++#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
++#else
++#define add_preempt_lazy_count(val) do { } while (0)
++#define sub_preempt_lazy_count(val) do { } while (0)
++#define inc_preempt_lazy_count() do { } while (0)
++#define dec_preempt_lazy_count() do { } while (0)
++#define preempt_lazy_count() (0)
++#endif
++
+ #ifdef CONFIG_PREEMPT
+
+ asmlinkage void preempt_schedule(void);
+
++# ifdef CONFIG_PREEMPT_LAZY
++#define preempt_check_resched() \
++do { \
++ if (unlikely(test_thread_flag(TIF_NEED_RESCHED) || \
++ test_thread_flag(TIF_NEED_RESCHED_LAZY))) \
++ preempt_schedule(); \
++} while (0)
++# else
+ #define preempt_check_resched() \
+ do { \
+- if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
++ if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+ preempt_schedule(); \
+ } while (0)
++# endif
+
+ #else /* !CONFIG_PREEMPT */
+
+@@ -48,6 +71,12 @@ do { \
+ barrier(); \
+ } while (0)
+
++#define preempt_lazy_disable() \
++do { \
++ inc_preempt_lazy_count(); \
++ barrier(); \
++} while (0)
++
+ #define sched_preempt_enable_no_resched() \
+ do { \
+ barrier(); \
+@@ -68,6 +97,13 @@ do { \
+ barrier(); \
+ preempt_check_resched(); \
+ } while (0)
++
++#define preempt_lazy_enable() \
++do { \
++ dec_preempt_lazy_count(); \
++ barrier(); \
++ preempt_check_resched(); \
++} while (0)
+
+ /* For debugging and tracer internals only! */
+ #define add_preempt_count_notrace(val) \
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2691,6 +2691,52 @@ static inline int test_tsk_need_resched(
+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+ }
+
++#ifdef CONFIG_PREEMPT_LAZY
++static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
++}
++
++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
++}
++
++static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
++}
++
++static inline int need_resched_lazy(void)
++{
++ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++}
++
++static inline int need_resched_now(void)
++{
++ return test_thread_flag(TIF_NEED_RESCHED);
++}
++
++static inline int need_resched(void)
++{
++ return test_thread_flag(TIF_NEED_RESCHED) ||
++ test_thread_flag(TIF_NEED_RESCHED_LAZY);
++}
++#else
++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
++static inline int need_resched_lazy(void) { return 0; }
++
++static inline int need_resched_now(void)
++{
++ return test_thread_flag(TIF_NEED_RESCHED);
++}
++
++static inline int need_resched(void)
++{
++ return test_thread_flag(TIF_NEED_RESCHED);
++}
++#endif
++
+ static inline int restart_syscall(void)
+ {
+ set_tsk_thread_flag(current, TIF_SIGPENDING);
+@@ -2722,11 +2768,6 @@ static inline int signal_pending_state(l
+ return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
+ }
+
+-static inline int need_resched(void)
+-{
+- return unlikely(test_thread_flag(TIF_NEED_RESCHED));
+-}
+-
+ /*
+ * cond_resched() and cond_resched_lock(): latency reduction via
+ * explicit rescheduling in places that are safe. The return
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -6,6 +6,12 @@ config PREEMPT_RT_BASE
+ bool
+ select PREEMPT
+
++config HAVE_PREEMPT_LAZY
++ bool
++
++config PREEMPT_LAZY
++ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
++
+ choice
+ prompt "Preemption Model"
+ default PREEMPT_NONE
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -543,6 +543,37 @@ void resched_task(struct task_struct *p)
+ smp_send_reschedule(cpu);
+ }
+
++#ifdef CONFIG_PREEMPT_LAZY
++void resched_task_lazy(struct task_struct *p)
++{
++ int cpu;
++
++ if (!sched_feat(PREEMPT_LAZY)) {
++ resched_task(p);
++ return;
++ }
++
++ assert_raw_spin_locked(&task_rq(p)->lock);
++
++ if (test_tsk_need_resched(p))
++ return;
++
++ if (test_tsk_need_resched_lazy(p))
++ return;
++
++ set_tsk_need_resched_lazy(p);
++
++ cpu = task_cpu(p);
++ if (cpu == smp_processor_id())
++ return;
++
++ /* NEED_RESCHED_LAZY must be visible before we test polling */
++ smp_mb();
++ if (!tsk_is_polling(p))
++ smp_send_reschedule(cpu);
++}
++#endif
++
+ void resched_cpu(int cpu)
+ {
+ struct rq *rq = cpu_rq(cpu);
+@@ -659,6 +690,17 @@ void resched_task(struct task_struct *p)
+ assert_raw_spin_locked(&task_rq(p)->lock);
+ set_tsk_need_resched(p);
+ }
++#ifdef CONFIG_PREEMPT_LAZY
++void resched_task_lazy(struct task_struct *p)
++{
++ if (!sched_feat(PREEMPT_LAZY)) {
++ resched_task(p);
++ return;
++ }
++ assert_raw_spin_locked(&task_rq(p)->lock);
++ set_tsk_need_resched_lazy(p);
++}
++#endif
+ #endif /* CONFIG_SMP */
+
+ #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
+@@ -1718,6 +1760,9 @@ void sched_fork(struct task_struct *p)
+ /* Want to start with kernel preemption disabled. */
+ task_thread_info(p)->preempt_count = 1;
+ #endif
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++ task_thread_info(p)->preempt_lazy_count = 0;
++#endif
+ #ifdef CONFIG_SMP
+ plist_node_init(&p->pushable_tasks, MAX_PRIO);
+ #endif
+@@ -2926,6 +2971,7 @@ void migrate_disable(void)
+ return;
+ }
+
++ preempt_lazy_disable();
+ pin_current_cpu();
+ p->migrate_disable = 1;
+ preempt_enable();
+@@ -2981,6 +3027,7 @@ void migrate_enable(void)
+
+ unpin_current_cpu();
+ preempt_enable();
++ preempt_lazy_enable();
+ }
+ EXPORT_SYMBOL(migrate_enable);
+ #else
+@@ -3115,6 +3162,7 @@ need_resched:
+ put_prev_task(rq, prev);
+ next = pick_next_task(rq);
+ clear_tsk_need_resched(prev);
++ clear_tsk_need_resched_lazy(prev);
+ rq->skip_clock_update = 0;
+
+ if (likely(prev != next)) {
+@@ -3251,6 +3299,14 @@ asmlinkage void __sched notrace preempt_
+ if (likely(ti->preempt_count || irqs_disabled()))
+ return;
+
++#ifdef CONFIG_PREEMPT_LAZY
++ /*
++ * Check for lazy preemption
++ */
++ if (ti->preempt_lazy_count && !test_thread_flag(TIF_NEED_RESCHED))
++ return;
++#endif
++
+ do {
+ add_preempt_count_notrace(PREEMPT_ACTIVE);
+ /*
+@@ -4862,7 +4918,9 @@ void __cpuinit init_idle(struct task_str
+
+ /* Set the preempt count _outside_ the spinlocks! */
+ task_thread_info(idle)->preempt_count = 0;
+-
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++ task_thread_info(idle)->preempt_lazy_count = 0;
++#endif
+ /*
+ * The idle tasks have their own, simple scheduling class:
+ */
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1827,7 +1827,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq
+ ideal_runtime = sched_slice(cfs_rq, curr);
+ delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
+ if (delta_exec > ideal_runtime) {
+- resched_task(rq_of(cfs_rq)->curr);
++ resched_task_lazy(rq_of(cfs_rq)->curr);
+ /*
+ * The current task ran long enough, ensure it doesn't get
+ * re-elected due to buddy favours.
+@@ -1851,7 +1851,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq
+ return;
+
+ if (delta > ideal_runtime)
+- resched_task(rq_of(cfs_rq)->curr);
++ resched_task_lazy(rq_of(cfs_rq)->curr);
+ }
+
+ static void
+@@ -1971,7 +1971,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc
+ * validating it and just reschedule.
+ */
+ if (queued) {
+- resched_task(rq_of(cfs_rq)->curr);
++ resched_task_lazy(rq_of(cfs_rq)->curr);
+ return;
+ }
+ /*
+@@ -2160,7 +2160,7 @@ static void __account_cfs_rq_runtime(str
+ * hierarchy can be throttled
+ */
+ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
+- resched_task(rq_of(cfs_rq)->curr);
++ resched_task_lazy(rq_of(cfs_rq)->curr);
+ }
+
+ static __always_inline
+@@ -2745,7 +2745,7 @@ static void hrtick_start_fair(struct rq
+
+ if (delta < 0) {
+ if (rq->curr == p)
+- resched_task(p);
++ resched_task_lazy(p);
+ return;
+ }
+
+@@ -3577,7 +3577,7 @@ static void check_preempt_wakeup(struct
+ return;
+
+ preempt:
+- resched_task(curr);
++ resched_task_lazy(curr);
+ /*
+ * Only set the backward buddy when the current task is still
+ * on the rq. This can happen when a wakeup gets interleaved
+@@ -5772,7 +5772,7 @@ static void task_fork_fair(struct task_s
+ * 'current' within the tree based on its new key value.
+ */
+ swap(curr->vruntime, se->vruntime);
+- resched_task(rq->curr);
++ resched_task_lazy(rq->curr);
+ }
+
+ se->vruntime -= cfs_rq->min_vruntime;
+@@ -5797,7 +5797,7 @@ prio_changed_fair(struct rq *rq, struct
+ */
+ if (rq->curr == p) {
+ if (p->prio > oldprio)
+- resched_task(rq->curr);
++ resched_task_lazy(rq->curr);
+ } else
+ check_preempt_curr(rq, p, 0);
+ }
+--- a/kernel/sched/features.h
++++ b/kernel/sched/features.h
+@@ -65,6 +65,9 @@ SCHED_FEAT(NONTASK_POWER, true)
+ SCHED_FEAT(TTWU_QUEUE, true)
+ #else
+ SCHED_FEAT(TTWU_QUEUE, false)
++# ifdef CONFIG_PREEMPT_LAZY
++SCHED_FEAT(PREEMPT_LAZY, true)
++# endif
+ #endif
+
+ SCHED_FEAT(FORCE_SD_OVERLAP, false)
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -897,6 +897,15 @@ extern void init_sched_fair_class(void);
+ extern void resched_task(struct task_struct *p);
+ extern void resched_cpu(int cpu);
+
++#ifdef CONFIG_PREEMPT_LAZY
++extern void resched_task_lazy(struct task_struct *tsk);
++#else
++static inline void resched_task_lazy(struct task_struct *tsk)
++{
++ resched_task(tsk);
++}
++#endif
++
+ extern struct rt_bandwidth def_rt_bandwidth;
+ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -1166,6 +1166,7 @@ tracing_generic_entry_update(struct trac
+ struct task_struct *tsk = current;
+
+ entry->preempt_count = pc & 0xff;
++ entry->preempt_lazy_count = preempt_lazy_count();
+ entry->pid = (tsk) ? tsk->pid : 0;
+ entry->padding = 0;
+ entry->flags =
+@@ -1176,7 +1177,8 @@ tracing_generic_entry_update(struct trac
+ #endif
+ ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
+ ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
+- (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
++ (need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
++ (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0);
+
+ entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
+ }
+@@ -2031,15 +2033,17 @@ get_total_entries(struct trace_array *tr
+
+ static void print_lat_help_header(struct seq_file *m)
+ {
+- seq_puts(m, "# _------=> CPU# \n");
+- seq_puts(m, "# / _-----=> irqs-off \n");
+- seq_puts(m, "# | / _----=> need-resched \n");
+- seq_puts(m, "# || / _---=> hardirq/softirq \n");
+- seq_puts(m, "# ||| / _--=> preempt-depth \n");
+- seq_puts(m, "# |||| / _--=> migrate-disable\n");
+- seq_puts(m, "# ||||| / delay \n");
+- seq_puts(m, "# cmd pid |||||| time | caller \n");
+- seq_puts(m, "# \\ / ||||| \\ | / \n");
++ seq_puts(m, "# _--------=> CPU# \n");
++ seq_puts(m, "# / _-------=> irqs-off \n");
++ seq_puts(m, "# | / _------=> need-resched \n");
++ seq_puts(m, "# || / _-----=> need-resched_lazy \n");
++ seq_puts(m, "# ||| / _----=> hardirq/softirq \n");
++ seq_puts(m, "# |||| / _---=> preempt-depth \n");
++ seq_puts(m, "# ||||| / _--=> preempt-lazy-depth\n");
++ seq_puts(m, "# |||||| / _-=> migrate-disable \n");
++ seq_puts(m, "# ||||||| / delay \n");
++ seq_puts(m, "# cmd pid |||||||| time | caller \n");
++ seq_puts(m, "# \\ / |||||||| \\ | / \n");
+ }
+
+ static void print_event_info(struct trace_array *tr, struct seq_file *m)
+@@ -2063,13 +2067,16 @@ static void print_func_help_header(struc
+ static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
+ {
+ print_event_info(tr, m);
+- seq_puts(m, "# _-----=> irqs-off\n");
+- seq_puts(m, "# / _----=> need-resched\n");
+- seq_puts(m, "# | / _---=> hardirq/softirq\n");
+- seq_puts(m, "# || / _--=> preempt-depth\n");
+- seq_puts(m, "# ||| / delay\n");
+- seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
+- seq_puts(m, "# | | | |||| | |\n");
++ seq_puts(m, "# _-------=> irqs-off \n");
++ seq_puts(m, "# / _------=> need-resched \n");
++ seq_puts(m, "# |/ _-----=> need-resched_lazy \n");
++ seq_puts(m, "# ||/ _----=> hardirq/softirq \n");
++ seq_puts(m, "# |||/ _---=> preempt-depth \n");
++ seq_puts(m, "# ||||/ _--=> preempt-lazy-depth\n");
++ seq_puts(m, "# ||||| / _-=> migrate-disable \n");
++ seq_puts(m, "# |||||| / delay\n");
++ seq_puts(m, "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n");
++ seq_puts(m, "# | | | ||||||| | |\n");
+ }
+
+ void
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -116,6 +116,7 @@ struct uprobe_trace_entry_head {
+ * NEED_RESCHED - reschedule is requested
+ * HARDIRQ - inside an interrupt handler
+ * SOFTIRQ - inside a softirq handler
++ * NEED_RESCHED_LAZY - lazy reschedule is requested
+ */
+ enum trace_flag_type {
+ TRACE_FLAG_IRQS_OFF = 0x01,
+@@ -123,6 +124,7 @@ enum trace_flag_type {
+ TRACE_FLAG_NEED_RESCHED = 0x04,
+ TRACE_FLAG_HARDIRQ = 0x08,
+ TRACE_FLAG_SOFTIRQ = 0x10,
++ TRACE_FLAG_NEED_RESCHED_LAZY = 0x20,
+ };
+
+ #define TRACE_BUF_SIZE 1024
+--- a/kernel/trace/trace_output.c
++++ b/kernel/trace/trace_output.c
+@@ -564,6 +564,7 @@ int trace_print_lat_fmt(struct trace_seq
+ {
+ char hardsoft_irq;
+ char need_resched;
++ char need_resched_lazy;
+ char irqs_off;
+ int hardirq;
+ int softirq;
+@@ -578,14 +579,17 @@ int trace_print_lat_fmt(struct trace_seq
+ '.';
+ need_resched =
+ (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
++ need_resched_lazy =
++ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
+ hardsoft_irq =
+ (hardirq && softirq) ? 'H' :
+ hardirq ? 'h' :
+ softirq ? 's' :
+ '.';
+
+- if (!trace_seq_printf(s, "%c%c%c",
+- irqs_off, need_resched, hardsoft_irq))
++ if (!trace_seq_printf(s, "%c%c%c%c",
++ irqs_off, need_resched, need_resched_lazy,
++ hardsoft_irq))
+ return 0;
+
+ if (entry->preempt_count)
+@@ -593,6 +597,11 @@ int trace_print_lat_fmt(struct trace_seq
+ else
+ ret = trace_seq_putc(s, '.');
+
++ if (entry->preempt_lazy_count)
++ ret = trace_seq_printf(s, "%x", entry->preempt_lazy_count);
++ else
++ ret = trace_seq_putc(s, '.');
++
+ if (entry->migrate_disable)
+ ret = trace_seq_printf(s, "%x", entry->migrate_disable);
+ else
diff --git a/patches/preempt-nort-rt-variants.patch b/patches/preempt-nort-rt-variants.patch
new file mode 100644
index 0000000..7a045c5
--- /dev/null
+++ b/patches/preempt-nort-rt-variants.patch
@@ -0,0 +1,52 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 24 Jul 2009 12:38:56 +0200
+Subject: preempt: Provide preempt_*_(no)rt variants
+
+RT needs a few preempt_disable/enable points which are not necessary
+otherwise. Implement variants to avoid #ifdeffery.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/preempt.h | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -54,11 +54,15 @@ do { \
+ dec_preempt_count(); \
+ } while (0)
+
+-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
++#ifndef CONFIG_PREEMPT_RT_BASE
++# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
++#else
++# define preempt_enable_no_resched() preempt_enable()
++#endif
+
+ #define preempt_enable() \
+ do { \
+- preempt_enable_no_resched(); \
++ sched_preempt_enable_no_resched(); \
+ barrier(); \
+ preempt_check_resched(); \
+ } while (0)
+@@ -104,6 +108,18 @@ do { \
+
+ #endif /* CONFIG_PREEMPT_COUNT */
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define preempt_disable_rt() preempt_disable()
++# define preempt_enable_rt() preempt_enable()
++# define preempt_disable_nort() do { } while (0)
++# define preempt_enable_nort() do { } while (0)
++#else
++# define preempt_disable_rt() do { } while (0)
++# define preempt_enable_rt() do { } while (0)
++# define preempt_disable_nort() preempt_disable()
++# define preempt_enable_nort() preempt_enable()
++#endif
++
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+
+ struct preempt_notifier;
diff --git a/patches/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch b/patches/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch
new file mode 100644
index 0000000..b8be161
--- /dev/null
+++ b/patches/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch
@@ -0,0 +1,35 @@
+Subject: printk: %27force_early_printk%27 boot param to help with debugging
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 02 Sep 2011 14:41:29 +0200
+
+Subject: printk: 'force_early_printk' boot param to help with debugging
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri Sep 02 14:29:33 CEST 2011
+
+Gives me an option to screw printk and actually see what the machine
+says.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/1314967289.1301.11.camel@twins
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/n/tip-ykb97nsfmobq44xketrxs977@git.kernel.org
+---
+ kernel/printk.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -779,6 +779,13 @@ asmlinkage void early_printk(const char
+ */
+ static bool __read_mostly printk_killswitch;
+
++static int __init force_early_printk_setup(char *str)
++{
++ printk_killswitch = true;
++ return 0;
++}
++early_param("force_early_printk", force_early_printk_setup);
++
+ void printk_kill(void)
+ {
+ printk_killswitch = true;
diff --git a/patches/printk-kill.patch b/patches/printk-kill.patch
new file mode 100644
index 0000000..f864fac
--- /dev/null
+++ b/patches/printk-kill.patch
@@ -0,0 +1,115 @@
+Subject: printk-kill.patch
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 22 Jul 2011 17:58:40 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/printk.h | 3 ++-
+ kernel/printk.c | 33 +++++++++++++++++++++++++++++++++
+ kernel/watchdog.c | 15 +++++++++++++--
+ 3 files changed, 48 insertions(+), 3 deletions(-)
+
+--- a/include/linux/printk.h
++++ b/include/linux/printk.h
+@@ -99,9 +99,11 @@ int no_printk(const char *fmt, ...)
+ extern asmlinkage __printf(1, 2)
+ void early_printk(const char *fmt, ...);
+ void early_vprintk(const char *fmt, va_list ap);
++extern void printk_kill(void);
+ #else
+ static inline __printf(1, 2) __cold
+ void early_printk(const char *s, ...) { }
++static inline void printk_kill(void) { }
+ #endif
+
+ extern int printk_needs_cpu(int cpu);
+@@ -138,7 +140,6 @@ extern int __printk_ratelimit(const char
+ #define printk_ratelimit() __printk_ratelimit(__func__)
+ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ unsigned int interval_msec);
+-
+ extern int printk_delay_msec;
+ extern int dmesg_restrict;
+ extern int kptr_restrict;
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -770,6 +770,32 @@ asmlinkage void early_printk(const char
+ early_vprintk(fmt, ap);
+ va_end(ap);
+ }
++
++/*
++ * This is independent of any log levels - a global
++ * kill switch that turns off all of printk.
++ *
++ * Used by the NMI watchdog if early-printk is enabled.
++ */
++static bool __read_mostly printk_killswitch;
++
++void printk_kill(void)
++{
++ printk_killswitch = true;
++}
++
++static int forced_early_printk(const char *fmt, va_list ap)
++{
++ if (!printk_killswitch)
++ return 0;
++ early_vprintk(fmt, ap);
++ return 1;
++}
++#else
++static inline int forced_early_printk(const char *fmt, va_list ap)
++{
++ return 0;
++}
+ #endif
+
+ #ifdef CONFIG_BOOT_PRINTK_DELAY
+@@ -1511,6 +1537,13 @@ asmlinkage int vprintk_emit(int facility
+ int this_cpu;
+ int printed_len = 0;
+
++ /*
++ * Fall back to early_printk if a debugging subsystem has
++ * killed printk output
++ */
++ if (unlikely(forced_early_printk(fmt, args)))
++ return 1;
++
+ boot_delay_msec(level);
+ printk_delay();
+
+--- a/kernel/watchdog.c
++++ b/kernel/watchdog.c
+@@ -206,6 +206,8 @@ static int is_softlockup(unsigned long t
+
+ #ifdef CONFIG_HARDLOCKUP_DETECTOR
+
++static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
++
+ static struct perf_event_attr wd_hw_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+@@ -240,10 +242,19 @@ static void watchdog_overflow_callback(s
+ if (__this_cpu_read(hard_watchdog_warn) == true)
+ return;
+
+- if (hardlockup_panic)
++ /*
++ * If early-printk is enabled then make sure we do not
++ * lock up in printk() and kill console logging:
++ */
++ printk_kill();
++
++ if (hardlockup_panic) {
+ panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+- else
++ } else {
++ raw_spin_lock(&watchdog_output_lock);
+ WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
++ raw_spin_unlock(&watchdog_output_lock);
++ }
+
+ __this_cpu_write(hard_watchdog_warn, true);
+ return;
diff --git a/patches/printk-rt-aware.patch b/patches/printk-rt-aware.patch
new file mode 100644
index 0000000..b9c221a
--- /dev/null
+++ b/patches/printk-rt-aware.patch
@@ -0,0 +1,101 @@
+Subject: printk-rt-aware.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 19 Sep 2012 14:50:37 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/printk.c | 33 +++++++++++++++++++++++++++++----
+ 1 file changed, 29 insertions(+), 4 deletions(-)
+
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -1315,6 +1315,7 @@ static void call_console_drivers(int lev
+ if (!console_drivers)
+ return;
+
++ migrate_disable();
+ for_each_console(con) {
+ if (exclusive_console && con != exclusive_console)
+ continue;
+@@ -1327,6 +1328,7 @@ static void call_console_drivers(int lev
+ continue;
+ con->write(con, text, len);
+ }
++ migrate_enable();
+ }
+
+ /*
+@@ -1386,12 +1388,18 @@ static inline int can_use_console(unsign
+ * interrupts disabled. It should return with 'lockbuf_lock'
+ * released but interrupts still disabled.
+ */
+-static int console_trylock_for_printk(unsigned int cpu)
++static int console_trylock_for_printk(unsigned int cpu, unsigned long flags)
+ __releases(&logbuf_lock)
+ {
+ int retval = 0, wake = 0;
++#ifdef CONFIG_PREEMPT_RT_FULL
++ int lock = !early_boot_irqs_disabled && !irqs_disabled_flags(flags) &&
++ (preempt_count() <= 1);
++#else
++ int lock = 1;
++#endif
+
+- if (console_trylock()) {
++ if (lock && console_trylock()) {
+ retval = 1;
+
+ /*
+@@ -1670,8 +1678,15 @@ asmlinkage int vprintk_emit(int facility
+ * The console_trylock_for_printk() function will release 'logbuf_lock'
+ * regardless of whether it actually gets the console semaphore or not.
+ */
+- if (console_trylock_for_printk(this_cpu))
++ if (console_trylock_for_printk(this_cpu, flags)) {
++#ifndef CONFIG_PREEMPT_RT_FULL
++ console_unlock();
++#else
++ raw_local_irq_restore(flags);
+ console_unlock();
++ raw_local_irq_save(flags);
++#endif
++ }
+
+ lockdep_on();
+ out_restore_irqs:
+@@ -2060,11 +2075,16 @@ static void console_cont_flush(char *tex
+ goto out;
+
+ len = cont_print_text(text, size);
++#ifndef CONFIG_PREEMPT_RT_FULL
+ raw_spin_unlock(&logbuf_lock);
+ stop_critical_timings();
+ call_console_drivers(cont.level, text, len);
+ start_critical_timings();
+ local_irq_restore(flags);
++#else
++ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
++ call_console_drivers(cont.level, text, len);
++#endif
+ return;
+ out:
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+@@ -2147,12 +2167,17 @@ skip:
+ console_idx = log_next(console_idx);
+ console_seq++;
+ console_prev = msg->flags;
+- raw_spin_unlock(&logbuf_lock);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
++ raw_spin_unlock(&logbuf_lock);
+ stop_critical_timings(); /* don't trace print latency */
+ call_console_drivers(level, text, len);
+ start_critical_timings();
+ local_irq_restore(flags);
++#else
++ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
++ call_console_drivers(level, text, len);
++#endif
+ }
+ console_locked = 0;
+
diff --git a/patches/radix-tree-rt-aware.patch b/patches/radix-tree-rt-aware.patch
new file mode 100644
index 0000000..f92a9ee
--- /dev/null
+++ b/patches/radix-tree-rt-aware.patch
@@ -0,0 +1,68 @@
+Subject: radix-tree-rt-aware.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:33:18 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/radix-tree.h | 8 +++++++-
+ lib/radix-tree.c | 5 ++++-
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+--- a/include/linux/radix-tree.h
++++ b/include/linux/radix-tree.h
+@@ -230,7 +230,13 @@ unsigned long radix_tree_next_hole(struc
+ unsigned long index, unsigned long max_scan);
+ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+ unsigned long index, unsigned long max_scan);
++
++#ifndef CONFIG_PREEMPT_RT_FULL
+ int radix_tree_preload(gfp_t gfp_mask);
++#else
++static inline int radix_tree_preload(gfp_t gm) { return 0; }
++#endif
++
+ void radix_tree_init(void);
+ void *radix_tree_tag_set(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+@@ -255,7 +261,7 @@ unsigned long radix_tree_locate_item(str
+
+ static inline void radix_tree_preload_end(void)
+ {
+- preempt_enable();
++ preempt_enable_nort();
+ }
+
+ /**
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -215,12 +215,13 @@ radix_tree_node_alloc(struct radix_tree_
+ * succeed in getting a node here (and never reach
+ * kmem_cache_alloc)
+ */
+- rtp = &__get_cpu_var(radix_tree_preloads);
++ rtp = &get_cpu_var(radix_tree_preloads);
+ if (rtp->nr) {
+ ret = rtp->nodes[rtp->nr - 1];
+ rtp->nodes[rtp->nr - 1] = NULL;
+ rtp->nr--;
+ }
++ put_cpu_var(radix_tree_preloads);
+ }
+ if (ret == NULL)
+ ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+@@ -255,6 +256,7 @@ radix_tree_node_free(struct radix_tree_n
+ call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * Load up this CPU's radix_tree_node buffer with sufficient objects to
+ * ensure that the addition of a single element in the tree cannot fail. On
+@@ -289,6 +291,7 @@ out:
+ return ret;
+ }
+ EXPORT_SYMBOL(radix_tree_preload);
++#endif
+
+ /*
+ * Return the maximum key which can be store into a
diff --git a/patches/random-make-it-work-on-rt.patch b/patches/random-make-it-work-on-rt.patch
new file mode 100644
index 0000000..825b744
--- /dev/null
+++ b/patches/random-make-it-work-on-rt.patch
@@ -0,0 +1,114 @@
+Subject: random: Make it work on rt
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 21 Aug 2012 20:38:50 +0200
+
+Delegate the random insertion to the forced threaded interrupt
+handler. Store the return IP of the hard interrupt handler in the irq
+descriptor and feed it into the random generator as a source of
+entropy.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ drivers/char/random.c | 10 ++++++----
+ include/linux/irqdesc.h | 1 +
+ include/linux/random.h | 2 +-
+ kernel/irq/handle.c | 8 +++++++-
+ kernel/irq/manage.c | 6 ++++++
+ 5 files changed, 21 insertions(+), 6 deletions(-)
+
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -742,18 +742,16 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
+
+ static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
+
+-void add_interrupt_randomness(int irq, int irq_flags)
++void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
+ {
+ struct entropy_store *r;
+ struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness);
+- struct pt_regs *regs = get_irq_regs();
+ unsigned long now = jiffies;
+ __u32 input[4], cycles = get_cycles();
+
+ input[0] = cycles ^ jiffies;
+ input[1] = irq;
+- if (regs) {
+- __u64 ip = instruction_pointer(regs);
++ if (ip) {
+ input[2] = ip;
+ input[3] = ip >> 32;
+ }
+@@ -767,7 +765,11 @@ void add_interrupt_randomness(int irq, i
+ fast_pool->last = now;
+
+ r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
++#ifndef CONFIG_PREEMPT_RT_FULL
+ __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL);
++#else
++ mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL);
++#endif
+ /*
+ * If we don't have a valid cycle counter, and we see
+ * back-to-back timer interrupts, then skip giving credit for
+--- a/include/linux/irqdesc.h
++++ b/include/linux/irqdesc.h
+@@ -52,6 +52,7 @@ struct irq_desc {
+ unsigned int irq_count; /* For detecting broken IRQs */
+ unsigned long last_unhandled; /* Aging timer for unhandled count */
+ unsigned int irqs_unhandled;
++ u64 random_ip;
+ raw_spinlock_t lock;
+ struct cpumask *percpu_enabled;
+ #ifdef CONFIG_SMP
+--- a/include/linux/random.h
++++ b/include/linux/random.h
+@@ -12,7 +12,7 @@
+ extern void add_device_randomness(const void *, unsigned int);
+ extern void add_input_randomness(unsigned int type, unsigned int code,
+ unsigned int value);
+-extern void add_interrupt_randomness(int irq, int irq_flags);
++extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
+
+ extern void get_random_bytes(void *buf, int nbytes);
+ extern void get_random_bytes_arch(void *buf, int nbytes);
+--- a/kernel/irq/handle.c
++++ b/kernel/irq/handle.c
+@@ -132,6 +132,8 @@ static void irq_wake_thread(struct irq_d
+ irqreturn_t
+ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
+ {
++ struct pt_regs *regs = get_irq_regs();
++ u64 ip = regs ? instruction_pointer(regs) : 0;
+ irqreturn_t retval = IRQ_NONE;
+ unsigned int flags = 0, irq = desc->irq_data.irq;
+
+@@ -172,7 +174,11 @@ handle_irq_event_percpu(struct irq_desc
+ action = action->next;
+ } while (action);
+
+- add_interrupt_randomness(irq, flags);
++#ifndef CONFIG_PREEMPT_RT_FULL
++ add_interrupt_randomness(irq, flags, ip);
++#else
++ desc->random_ip = ip;
++#endif
+
+ if (!noirqdebug)
+ note_interrupt(irq, desc, retval);
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -879,6 +879,12 @@ static int irq_thread(void *data)
+ if (!noirqdebug)
+ note_interrupt(action->irq, desc, action_ret);
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++ migrate_disable();
++ add_interrupt_randomness(action->irq, 0,
++ desc->random_ip ^ (unsigned long) action);
++ migrate_enable();
++#endif
+ wake_threads_waitq(desc);
+ }
+
diff --git a/patches/rcu-disable-rcu-fast-no-hz-on-rt.patch b/patches/rcu-disable-rcu-fast-no-hz-on-rt.patch
new file mode 100644
index 0000000..02063eb
--- /dev/null
+++ b/patches/rcu-disable-rcu-fast-no-hz-on-rt.patch
@@ -0,0 +1,24 @@
+Subject: rcu: Disable RCU_FAST_NO_HZ on RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 28 Oct 2012 13:26:09 +0000
+
+This uses a timer_list timer from the irq disabled guts of the idle
+code. Disable it for now to prevent wreckage.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ init/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -579,7 +579,7 @@ config RCU_FANOUT_EXACT
+
+ config RCU_FAST_NO_HZ
+ bool "Accelerate last non-dyntick-idle CPU's grace periods"
+- depends on NO_HZ && SMP
++ depends on NO_HZ && SMP && !PREEMPT_RT_FULL
+ default n
+ help
+ This option causes RCU to attempt to accelerate grace periods in
diff --git a/patches/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch b/patches/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch
new file mode 100644
index 0000000..beff51c
--- /dev/null
+++ b/patches/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch
@@ -0,0 +1,258 @@
+Subject: rcu: Merge RCU-bh into RCU-preempt
+Date: Wed, 5 Oct 2011 11:59:38 -0700
+From: Thomas Gleixner <tglx@linutronix.de>
+
+The Linux kernel has long RCU-bh read-side critical sections that
+intolerably increase scheduling latency under mainline's RCU-bh rules,
+which include RCU-bh read-side critical sections being non-preemptible.
+This patch therefore arranges for RCU-bh to be implemented in terms of
+RCU-preempt for CONFIG_PREEMPT_RT_FULL=y.
+
+This has the downside of defeating the purpose of RCU-bh, namely,
+handling the case where the system is subjected to a network-based
+denial-of-service attack that keeps at least one CPU doing full-time
+softirq processing. This issue will be fixed by a later commit.
+
+The current commit will need some work to make it appropriate for
+mainline use, for example, it needs to be extended to cover Tiny RCU.
+
+[ paulmck: Added a useful changelog ]
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Link: http://lkml.kernel.org/r/20111005185938.GA20403@linux.vnet.ibm.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/rcupdate.h | 25 +++++++++++++++++++++++++
+ include/linux/rcutree.h | 18 ++++++++++++++++--
+ kernel/rcupdate.c | 2 ++
+ kernel/rcutree.c | 10 ++++++++++
+ 4 files changed, 53 insertions(+), 2 deletions(-)
+
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -120,6 +120,9 @@ extern void call_rcu(struct rcu_head *he
+
+ #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++#define call_rcu_bh call_rcu
++#else
+ /**
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+@@ -143,6 +146,7 @@ extern void call_rcu(struct rcu_head *he
+ */
+ extern void call_rcu_bh(struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
++#endif
+
+ /**
+ * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+@@ -216,7 +220,13 @@ static inline int rcu_preempt_depth(void
+
+ /* Internal to kernel */
+ extern void rcu_sched_qs(int cpu);
++
++#ifndef CONFIG_PREEMPT_RT_FULL
+ extern void rcu_bh_qs(int cpu);
++#else
++static inline void rcu_bh_qs(int cpu) { }
++#endif
++
+ extern void rcu_check_callbacks(int cpu, int user);
+ struct notifier_block;
+ extern void rcu_idle_enter(void);
+@@ -366,7 +376,14 @@ static inline int rcu_read_lock_held(voi
+ * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
+ * hell.
+ */
++#ifdef CONFIG_PREEMPT_RT_FULL
++static inline int rcu_read_lock_bh_held(void)
++{
++ return rcu_read_lock_held();
++}
++#else
+ extern int rcu_read_lock_bh_held(void);
++#endif
+
+ /**
+ * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
+@@ -814,10 +831,14 @@ static inline void rcu_read_unlock(void)
+ static inline void rcu_read_lock_bh(void)
+ {
+ local_bh_disable();
++#ifdef CONFIG_PREEMPT_RT_FULL
++ rcu_read_lock();
++#else
+ __acquire(RCU_BH);
+ rcu_lock_acquire(&rcu_bh_lock_map);
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "rcu_read_lock_bh() used illegally while idle");
++#endif
+ }
+
+ /*
+@@ -827,10 +848,14 @@ static inline void rcu_read_lock_bh(void
+ */
+ static inline void rcu_read_unlock_bh(void)
+ {
++#ifdef CONFIG_PREEMPT_RT_FULL
++ rcu_read_unlock();
++#else
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "rcu_read_unlock_bh() used illegally while idle");
+ rcu_lock_release(&rcu_bh_lock_map);
+ __release(RCU_BH);
++#endif
+ local_bh_enable();
+ }
+
+--- a/include/linux/rcutree.h
++++ b/include/linux/rcutree.h
+@@ -45,7 +45,11 @@ static inline void rcu_virt_note_context
+ rcu_note_context_switch(cpu);
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define synchronize_rcu_bh synchronize_rcu
++#else
+ extern void synchronize_rcu_bh(void);
++#endif
+ extern void synchronize_sched_expedited(void);
+ extern void synchronize_rcu_expedited(void);
+
+@@ -73,20 +77,30 @@ static inline void synchronize_rcu_bh_ex
+ }
+
+ extern void rcu_barrier(void);
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define rcu_barrier_bh rcu_barrier
++#else
+ extern void rcu_barrier_bh(void);
++#endif
+ extern void rcu_barrier_sched(void);
+
+ extern unsigned long rcutorture_testseq;
+ extern unsigned long rcutorture_vernum;
+ extern long rcu_batches_completed(void);
+-extern long rcu_batches_completed_bh(void);
+ extern long rcu_batches_completed_sched(void);
+
+ extern void rcu_force_quiescent_state(void);
+-extern void rcu_bh_force_quiescent_state(void);
+ extern void rcu_sched_force_quiescent_state(void);
+
+ extern void rcu_scheduler_starting(void);
+ extern int rcu_scheduler_active __read_mostly;
+
++#ifndef CONFIG_PREEMPT_RT_FULL
++extern void rcu_bh_force_quiescent_state(void);
++extern long rcu_batches_completed_bh(void);
++#else
++# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
++# define rcu_batches_completed_bh rcu_batches_completed
++#endif
++
+ #endif /* __LINUX_RCUTREE_H */
+--- a/kernel/rcupdate.c
++++ b/kernel/rcupdate.c
+@@ -156,6 +156,7 @@ int debug_lockdep_rcu_enabled(void)
+ }
+ EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /**
+ * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
+ *
+@@ -182,6 +183,7 @@ int rcu_read_lock_bh_held(void)
+ return in_softirq() || irqs_disabled();
+ }
+ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
++#endif
+
+ #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+--- a/kernel/rcutree.c
++++ b/kernel/rcutree.c
+@@ -181,6 +181,7 @@ void rcu_sched_qs(int cpu)
+ rdp->passed_quiesce = 1;
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ void rcu_bh_qs(int cpu)
+ {
+ struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+@@ -189,6 +190,7 @@ void rcu_bh_qs(int cpu)
+ trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
+ rdp->passed_quiesce = 1;
+ }
++#endif
+
+ /*
+ * Note a context switch. This is a quiescent state for RCU-sched,
+@@ -242,6 +244,7 @@ long rcu_batches_completed_sched(void)
+ }
+ EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * Return the number of RCU BH batches processed thus far for debug & stats.
+ */
+@@ -259,6 +262,7 @@ void rcu_bh_force_quiescent_state(void)
+ force_quiescent_state(&rcu_bh_state);
+ }
+ EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
++#endif
+
+ /*
+ * Record the number of times rcutorture tests have been initiated and
+@@ -2183,6 +2187,7 @@ void call_rcu_sched(struct rcu_head *hea
+ }
+ EXPORT_SYMBOL_GPL(call_rcu_sched);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * Queue an RCU callback for invocation after a quicker grace period.
+ */
+@@ -2191,6 +2196,7 @@ void call_rcu_bh(struct rcu_head *head,
+ __call_rcu(head, func, &rcu_bh_state, -1, 0);
+ }
+ EXPORT_SYMBOL_GPL(call_rcu_bh);
++#endif
+
+ /*
+ * Because a context switch is a grace period for RCU-sched and RCU-bh,
+@@ -2268,6 +2274,7 @@ void synchronize_sched(void)
+ }
+ EXPORT_SYMBOL_GPL(synchronize_sched);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /**
+ * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
+ *
+@@ -2294,6 +2301,7 @@ void synchronize_rcu_bh(void)
+ wait_rcu_gp(call_rcu_bh);
+ }
+ EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
++#endif
+
+ static int synchronize_sched_expedited_cpu_stop(void *data)
+ {
+@@ -2682,6 +2690,7 @@ static void _rcu_barrier(struct rcu_stat
+ mutex_unlock(&rsp->barrier_mutex);
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /**
+ * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
+ */
+@@ -2690,6 +2699,7 @@ void rcu_barrier_bh(void)
+ _rcu_barrier(&rcu_bh_state);
+ }
+ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
++#endif
+
+ /**
+ * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
diff --git a/patches/rcu-tiny-merge-bh.patch b/patches/rcu-tiny-merge-bh.patch
new file mode 100644
index 0000000..ac9c70c
--- /dev/null
+++ b/patches/rcu-tiny-merge-bh.patch
@@ -0,0 +1,24 @@
+Subject: rcu-more-fallout.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 14 Nov 2011 10:57:54 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/rcutiny.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/rcutiny.c
++++ b/kernel/rcutiny.c
+@@ -371,6 +371,7 @@ void call_rcu_sched(struct rcu_head *hea
+ }
+ EXPORT_SYMBOL_GPL(call_rcu_sched);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * Post an RCU bottom-half callback to be invoked after any subsequent
+ * quiescent state.
+@@ -380,3 +381,4 @@ void call_rcu_bh(struct rcu_head *head,
+ __call_rcu(head, func, &rcu_bh_ctrlblk);
+ }
+ EXPORT_SYMBOL_GPL(call_rcu_bh);
++#endif
diff --git a/patches/rcu-tiny-solve-rt-mistery.patch b/patches/rcu-tiny-solve-rt-mistery.patch
new file mode 100644
index 0000000..097c930
--- /dev/null
+++ b/patches/rcu-tiny-solve-rt-mistery.patch
@@ -0,0 +1,42 @@
+Subject: rcu: rcutiny: Prevent RCU stall
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 16 Oct 2012 18:36:51 +0200
+
+rcu_read_unlock_special() checks in_serving_softirq() and leaves early
+when true. On RT this is obviously wrong as softirq processing context
+can be preempted and therefor such a task can be on the gp_tasks
+list. Leaving early here will leave the task on the list and therefor
+block RCU processing forever.
+
+This cannot happen on mainline because softirq processing context
+cannot be preempted and therefor this can never happen at all.
+
+In fact this check looks quite questionable in general. Neither irq
+context nor softirq processing context in mainline can ever be
+preempted in mainline so the special unlock case should not ever be
+invoked in such context. Now the only explanation might be a
+rcu_read_unlock() being interrupted and therefor leave the rcu nest
+count at 0 before the special unlock bit has been cleared. That looks
+fragile. At least it's missing a big fat comment. Paul ????
+
+See mainline commits: ec433f0c5 and 8762705a for further enlightment.
+
+Reported-by: Kristian Lehmann <krleit00@hs-esslingen.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+
+---
+ kernel/rcutiny_plugin.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/rcutiny_plugin.h
++++ b/kernel/rcutiny_plugin.h
+@@ -560,7 +560,7 @@ void rcu_read_unlock_special(struct task
+ rcu_preempt_cpu_qs();
+
+ /* Hardware IRQ handlers cannot block. */
+- if (in_irq() || in_serving_softirq()) {
++ if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
+ local_irq_restore(flags);
+ return;
+ }
diff --git a/patches/rcutiny-use-simple-waitqueue.patch b/patches/rcutiny-use-simple-waitqueue.patch
new file mode 100644
index 0000000..a2d598b
--- /dev/null
+++ b/patches/rcutiny-use-simple-waitqueue.patch
@@ -0,0 +1,79 @@
+Subject: rcutiny: Use simple waitqueue
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 03 Dec 2012 16:25:21 +0100
+
+Simple waitqueues can be handled from interrupt disabled contexts.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/rcutiny_plugin.h | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/kernel/rcutiny_plugin.h
++++ b/kernel/rcutiny_plugin.h
+@@ -26,6 +26,7 @@
+ #include <linux/module.h>
+ #include <linux/debugfs.h>
+ #include <linux/seq_file.h>
++#include <linux/wait-simple.h>
+
+ /* Global control variables for rcupdate callback mechanism. */
+ struct rcu_ctrlblk {
+@@ -260,7 +261,7 @@ static void show_tiny_preempt_stats(stru
+
+ /* Controls for rcu_kthread() kthread. */
+ static struct task_struct *rcu_kthread_task;
+-static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
++static DEFINE_SWAIT_HEAD(rcu_kthread_wq);
+ static unsigned long have_rcu_kthread_work;
+
+ /*
+@@ -713,7 +714,7 @@ void synchronize_rcu(void)
+ }
+ EXPORT_SYMBOL_GPL(synchronize_rcu);
+
+-static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
++static DEFINE_SWAIT_HEAD(sync_rcu_preempt_exp_wq);
+ static unsigned long sync_rcu_preempt_exp_count;
+ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
+
+@@ -735,7 +736,7 @@ static int rcu_preempted_readers_exp(voi
+ */
+ static void rcu_report_exp_done(void)
+ {
+- wake_up(&sync_rcu_preempt_exp_wq);
++ swait_wake(&sync_rcu_preempt_exp_wq);
+ }
+
+ /*
+@@ -787,8 +788,8 @@ void synchronize_rcu_expedited(void)
+ } else {
+ rcu_initiate_boost();
+ local_irq_restore(flags);
+- wait_event(sync_rcu_preempt_exp_wq,
+- !rcu_preempted_readers_exp());
++ swait_event(sync_rcu_preempt_exp_wq,
++ !rcu_preempted_readers_exp());
+ }
+
+ /* Clean up and exit. */
+@@ -858,7 +859,7 @@ static void invoke_rcu_callbacks(void)
+ {
+ have_rcu_kthread_work = 1;
+ if (rcu_kthread_task != NULL)
+- wake_up(&rcu_kthread_wq);
++ swait_wake(&rcu_kthread_wq);
+ }
+
+ #ifdef CONFIG_RCU_TRACE
+@@ -888,8 +889,8 @@ static int rcu_kthread(void *arg)
+ unsigned long flags;
+
+ for (;;) {
+- wait_event_interruptible(rcu_kthread_wq,
+- have_rcu_kthread_work != 0);
++ swait_event_interruptible(rcu_kthread_wq,
++ have_rcu_kthread_work != 0);
+ morework = rcu_boost();
+ local_irq_save(flags);
+ work = have_rcu_kthread_work;
diff --git a/patches/re-migrate_disable-race-with-cpu-hotplug-3f.patch b/patches/re-migrate_disable-race-with-cpu-hotplug-3f.patch
new file mode 100644
index 0000000..7197951
--- /dev/null
+++ b/patches/re-migrate_disable-race-with-cpu-hotplug-3f.patch
@@ -0,0 +1,34 @@
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Thu, 28 Jul 2011 11:16:00 +0800
+Subject: hotplug: Reread hotplug_pcp on pin_current_cpu() retry
+
+When retry happens, it's likely that the task has been migrated to
+another cpu (except unplug failed), but it still derefernces the
+original hotplug_pcp per cpu data.
+
+Update the pointer to hotplug_pcp in the retry path, so it points to
+the current cpu.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/20110728031600.GA338@windriver.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/cpu.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -81,9 +81,11 @@ static DEFINE_PER_CPU(struct hotplug_pcp
+ */
+ void pin_current_cpu(void)
+ {
+- struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
++ struct hotplug_pcp *hp;
+
+ retry:
++ hp = &__get_cpu_var(hotplug_pcp);
++
+ if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
+ hp->unplug == current || (current->flags & PF_STOMPER)) {
+ hp->refcount++;
diff --git a/patches/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch b/patches/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch
new file mode 100644
index 0000000..e0a042a
--- /dev/null
+++ b/patches/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch
@@ -0,0 +1,69 @@
+Subject: ARM: Initialize ptl->lock for vector page
+From: Frank Rowand <frank.rowand@am.sony.com>
+Date: Sat, 1 Oct 2011 18:58:13 -0700
+
+Without this patch, ARM can not use SPLIT_PTLOCK_CPUS if
+PREEMPT_RT_FULL=y because vectors_user_mapping() creates a
+VM_ALWAYSDUMP mapping of the vector page (address 0xffff0000), but no
+ptl->lock has been allocated for the page. An attempt to coredump
+that page will result in a kernel NULL pointer dereference when
+follow_page() attempts to lock the page.
+
+The call tree to the NULL pointer dereference is:
+
+ do_notify_resume()
+ get_signal_to_deliver()
+ do_coredump()
+ elf_core_dump()
+ get_dump_page()
+ __get_user_pages()
+ follow_page()
+ pte_offset_map_lock() <----- a #define
+ ...
+ rt_spin_lock()
+
+The underlying problem is exposed by mm-shrink-the-page-frame-to-rt-size.patch.
+
+Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
+Cc: Frank <Frank_Rowand@sonyusa.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/4E87C535.2030907@am.sony.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/arm/kernel/process.c | 25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+--- a/arch/arm/kernel/process.c
++++ b/arch/arm/kernel/process.c
+@@ -459,6 +459,31 @@ unsigned long arch_randomize_brk(struct
+ }
+
+ #ifdef CONFIG_MMU
++
++/*
++ * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
++ * initialized by pgtable_page_ctor() then a coredump of the vector page will
++ * fail.
++ */
++static int __init vectors_user_mapping_init_page(void)
++{
++ struct page *page;
++ unsigned long addr = 0xffff0000;
++ pgd_t *pgd;
++ pud_t *pud;
++ pmd_t *pmd;
++
++ pgd = pgd_offset_k(addr);
++ pud = pud_offset(pgd, addr);
++ pmd = pmd_offset(pud, addr);
++ page = pmd_page(*(pmd));
++
++ pgtable_page_ctor(page);
++
++ return 0;
++}
++late_initcall(vectors_user_mapping_init_page);
++
+ /*
+ * The vectors page is always readable from user space for the
+ * atomic helpers and the signal restart code. Insert it into the
diff --git a/patches/relay-fix-timer-madness.patch b/patches/relay-fix-timer-madness.patch
new file mode 100644
index 0000000..738f20d
--- /dev/null
+++ b/patches/relay-fix-timer-madness.patch
@@ -0,0 +1,52 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:44:07 -0500
+Subject: relay: fix timer madness
+
+remove timer calls (!!!) from deep within the tracing infrastructure.
+This was totally bogus code that can cause lockups and worse. Poll
+the buffer every 2 jiffies for now.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/relay.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+--- a/kernel/relay.c
++++ b/kernel/relay.c
+@@ -340,6 +340,10 @@ static void wakeup_readers(unsigned long
+ {
+ struct rchan_buf *buf = (struct rchan_buf *)data;
+ wake_up_interruptible(&buf->read_wait);
++ /*
++ * Stupid polling for now:
++ */
++ mod_timer(&buf->timer, jiffies + 1);
+ }
+
+ /**
+@@ -357,6 +361,7 @@ static void __relay_reset(struct rchan_b
+ init_waitqueue_head(&buf->read_wait);
+ kref_init(&buf->kref);
+ setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
++ mod_timer(&buf->timer, jiffies + 1);
+ } else
+ del_timer_sync(&buf->timer);
+
+@@ -739,15 +744,6 @@ size_t relay_switch_subbuf(struct rchan_
+ else
+ buf->early_bytes += buf->chan->subbuf_size -
+ buf->padding[old_subbuf];
+- smp_mb();
+- if (waitqueue_active(&buf->read_wait))
+- /*
+- * Calling wake_up_interruptible() from here
+- * will deadlock if we happen to be logging
+- * from the scheduler (trying to re-grab
+- * rq->lock), so defer it.
+- */
+- mod_timer(&buf->timer, jiffies + 1);
+ }
+
+ old = buf->data;
diff --git a/patches/resource-counters-use-localirq-nort.patch b/patches/resource-counters-use-localirq-nort.patch
new file mode 100644
index 0000000..e39de4d
--- /dev/null
+++ b/patches/resource-counters-use-localirq-nort.patch
@@ -0,0 +1,86 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:44:33 -0500
+Subject: core: Do not disable interrupts on RT in res_counter.c
+
+Frederic Weisbecker reported this warning:
+
+[ 45.228562] BUG: sleeping function called from invalid context at kernel/rtmutex.c:683
+[ 45.228571] in_atomic(): 0, irqs_disabled(): 1, pid: 4290, name: ntpdate
+[ 45.228576] INFO: lockdep is turned off.
+[ 45.228580] irq event stamp: 0
+[ 45.228583] hardirqs last enabled at (0): [<(null)>] (null)
+[ 45.228589] hardirqs last disabled at (0): [<ffffffff8025449d>] copy_process+0x68d/0x1500
+[ 45.228602] softirqs last enabled at (0): [<ffffffff8025449d>] copy_process+0x68d/0x1500
+[ 45.228609] softirqs last disabled at (0): [<(null)>] (null)
+[ 45.228617] Pid: 4290, comm: ntpdate Tainted: G W 2.6.29-rc4-rt1-tip #1
+[ 45.228622] Call Trace:
+[ 45.228632] [<ffffffff8027dfb0>] ? print_irqtrace_events+0xd0/0xe0
+[ 45.228639] [<ffffffff8024cd73>] __might_sleep+0x113/0x130
+[ 45.228646] [<ffffffff8077c811>] rt_spin_lock+0xa1/0xb0
+[ 45.228653] [<ffffffff80296a3d>] res_counter_charge+0x5d/0x130
+[ 45.228660] [<ffffffff802fb67f>] __mem_cgroup_try_charge+0x7f/0x180
+[ 45.228667] [<ffffffff802fc407>] mem_cgroup_charge_common+0x57/0x90
+[ 45.228674] [<ffffffff80212096>] ? ftrace_call+0x5/0x2b
+[ 45.228680] [<ffffffff802fc49d>] mem_cgroup_newpage_charge+0x5d/0x60
+[ 45.228688] [<ffffffff802d94ce>] __do_fault+0x29e/0x4c0
+[ 45.228694] [<ffffffff8077c843>] ? rt_spin_unlock+0x23/0x80
+[ 45.228700] [<ffffffff802db8b5>] handle_mm_fault+0x205/0x890
+[ 45.228707] [<ffffffff80212096>] ? ftrace_call+0x5/0x2b
+[ 45.228714] [<ffffffff8023495e>] do_page_fault+0x11e/0x2a0
+[ 45.228720] [<ffffffff8077e5a5>] page_fault+0x25/0x30
+[ 45.228727] [<ffffffff8043e1ed>] ? __clear_user+0x3d/0x70
+[ 45.228733] [<ffffffff8043e1d1>] ? __clear_user+0x21/0x70
+
+The reason is the raw IRQ flag use of kernel/res_counter.c.
+
+The irq flags tricks there seem a bit pointless: it cannot protect the
+c->parent linkage because local_irq_save() is only per CPU.
+
+So replace it with _nort(). This code needs a second look.
+
+Reported-by: Frederic Weisbecker <fweisbec@gmail.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/res_counter.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/kernel/res_counter.c
++++ b/kernel/res_counter.c
+@@ -49,7 +49,7 @@ static int __res_counter_charge(struct r
+
+ r = ret = 0;
+ *limit_fail_at = NULL;
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ for (c = counter; c != NULL; c = c->parent) {
+ spin_lock(&c->lock);
+ r = res_counter_charge_locked(c, val, force);
+@@ -69,7 +69,7 @@ static int __res_counter_charge(struct r
+ spin_unlock(&u->lock);
+ }
+ }
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+
+ return ret;
+ }
+@@ -103,7 +103,7 @@ u64 res_counter_uncharge_until(struct re
+ struct res_counter *c;
+ u64 ret = 0;
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ for (c = counter; c != top; c = c->parent) {
+ u64 r;
+ spin_lock(&c->lock);
+@@ -112,7 +112,7 @@ u64 res_counter_uncharge_until(struct re
+ ret = r;
+ spin_unlock(&c->lock);
+ }
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ return ret;
+ }
+
diff --git a/patches/rfc-printk-don-27t-call-printk_tick-in-printk_needs_cpu.patch b/patches/rfc-printk-don-27t-call-printk_tick-in-printk_needs_cpu.patch
new file mode 100644
index 0000000..771294f
--- /dev/null
+++ b/patches/rfc-printk-don-27t-call-printk_tick-in-printk_needs_cpu.patch
@@ -0,0 +1,45 @@
+Subject: printk: Don't call printk_tick in printk_needs_cpu() on RT
+From: Yong Zhang <yong.zhang0@gmail.com>
+Date: Sun, 16 Oct 2011 18:56:45 +0800
+
+printk_tick() can't be called in atomic context when RT is enabled,
+otherwise below warning will show:
+
+[ 117.597095] BUG: sleeping function called from invalid context at kernel/rtmutex.c:645
+[ 117.597102] in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: kworker/0:0
+[ 117.597111] Pid: 0, comm: kworker/0:0 Not tainted 3.0.6-rt17-00284-gb76d419-dirty #7
+[ 117.597116] Call Trace:
+[ 117.597131] [<c06e3b61>] ? printk+0x1d/0x24
+[ 117.597142] [<c01390b6>] __might_sleep+0xe6/0x110
+[ 117.597151] [<c06e634c>] rt_spin_lock+0x1c/0x30
+[ 117.597158] [<c0142f26>] __wake_up+0x26/0x60
+[ 117.597166] [<c014c78e>] printk_tick+0x3e/0x40
+[ 117.597173] [<c014c7b4>] printk_needs_cpu+0x24/0x30
+[ 117.597181] [<c017ecc8>] tick_nohz_stop_sched_tick+0x2e8/0x410
+[ 117.597191] [<c017305a>] ? sched_clock_idle_wakeup_event+0x1a/0x20
+[ 117.597201] [<c010182a>] cpu_idle+0x4a/0xb0
+[ 117.597209] [<c06e0b97>] start_secondary+0xd3/0xd7
+
+Now this is a really rare case and it's very unlikely that we starve
+an logbuf waiter that way.
+
+Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
+Link: http://lkml.kernel.org/r/1318762607-2261-4-git-send-email-yong.zhang0@gmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/printk.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/printk.c
++++ b/kernel/printk.c
+@@ -2030,8 +2030,8 @@ void printk_tick(void)
+
+ int printk_needs_cpu(int cpu)
+ {
+- if (cpu_is_offline(cpu))
+- printk_tick();
++ if (unlikely(cpu_is_offline(cpu)))
++ __this_cpu_write(printk_pending, 0);
+ return __this_cpu_read(printk_pending);
+ }
+
diff --git a/patches/rfc-sched-rt-fix-wait_task_interactive-to-test-rt_spin_lock-state.patch b/patches/rfc-sched-rt-fix-wait_task_interactive-to-test-rt_spin_lock-state.patch
new file mode 100644
index 0000000..76166cb
--- /dev/null
+++ b/patches/rfc-sched-rt-fix-wait_task_interactive-to-test-rt_spin_lock-state.patch
@@ -0,0 +1,46 @@
+Subject: sched/rt: Fix wait_task_interactive() to test rt_spin_lock state
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Thu, 01 Mar 2012 13:55:33 -0500
+
+The wait_task_interactive() will have a task sleep waiting for another
+task to have a certain state. But it ignores the rt_spin_locks state
+and can return with an incorrect result if the task it is waiting
+for is blocked on a rt_spin_lock() and is waking up.
+
+The rt_spin_locks save the tasks state in the saved_state field
+and the wait_task_interactive() must also test that state.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: Carsten Emde <C.Emde@osadl.org>
+Cc: John Kacur <jkacur@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Clark Williams <clark.williams@gmail.com>
+Cc: stable-rt@vger.kernel.org
+Link: http://lkml.kernel.org/r/20120301190345.979435764@goodmis.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1041,7 +1041,8 @@ unsigned long wait_task_inactive(struct
+ * is actually now running somewhere else!
+ */
+ while (task_running(rq, p)) {
+- if (match_state && unlikely(p->state != match_state))
++ if (match_state && unlikely(p->state != match_state)
++ && unlikely(p->saved_state != match_state))
+ return 0;
+ cpu_relax();
+ }
+@@ -1056,7 +1057,8 @@ unsigned long wait_task_inactive(struct
+ running = task_running(rq, p);
+ on_rq = p->on_rq;
+ ncsw = 0;
+- if (!match_state || p->state == match_state)
++ if (!match_state || p->state == match_state
++ || p->saved_state == match_state)
+ ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+ task_rq_unlock(rq, p, &flags);
+
diff --git a/patches/rt-add-rt-locks.patch b/patches/rt-add-rt-locks.patch
new file mode 100644
index 0000000..072cd99
--- /dev/null
+++ b/patches/rt-add-rt-locks.patch
@@ -0,0 +1,894 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 26 Jul 2009 19:39:56 +0200
+Subject: rt: Add the preempt-rt lock replacement APIs
+
+Map spinlocks, rwlocks, rw_semaphores and semaphores to the rt_mutex
+based locking functions for preempt-rt.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/rwlock_rt.h | 123 ++++++++++
+ include/linux/spinlock.h | 12 -
+ include/linux/spinlock_api_smp.h | 4
+ include/linux/spinlock_rt.h | 158 +++++++++++++
+ kernel/Makefile | 9
+ kernel/rt.c | 442 +++++++++++++++++++++++++++++++++++++++
+ kernel/spinlock.c | 7
+ lib/spinlock_debug.c | 5
+ 8 files changed, 756 insertions(+), 4 deletions(-)
+
+--- /dev/null
++++ b/include/linux/rwlock_rt.h
+@@ -0,0 +1,123 @@
++#ifndef __LINUX_RWLOCK_RT_H
++#define __LINUX_RWLOCK_RT_H
++
++#ifndef __LINUX_SPINLOCK_H
++#error Do not include directly. Use spinlock.h
++#endif
++
++#define rwlock_init(rwl) \
++do { \
++ static struct lock_class_key __key; \
++ \
++ rt_mutex_init(&(rwl)->lock); \
++ __rt_rwlock_init(rwl, #rwl, &__key); \
++} while (0)
++
++extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
++extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
++extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
++extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
++extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
++extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
++extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
++extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
++extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
++extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
++
++#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
++#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
++
++#define write_trylock_irqsave(lock, flags) \
++ __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
++
++#define read_lock_irqsave(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ migrate_disable(); \
++ flags = rt_read_lock_irqsave(lock); \
++ } while (0)
++
++#define write_lock_irqsave(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ migrate_disable(); \
++ flags = rt_write_lock_irqsave(lock); \
++ } while (0)
++
++#define read_lock(lock) \
++ do { \
++ migrate_disable(); \
++ rt_read_lock(lock); \
++ } while (0)
++
++#define read_lock_bh(lock) \
++ do { \
++ local_bh_disable(); \
++ migrate_disable(); \
++ rt_read_lock(lock); \
++ } while (0)
++
++#define read_lock_irq(lock) read_lock(lock)
++
++#define write_lock(lock) \
++ do { \
++ migrate_disable(); \
++ rt_write_lock(lock); \
++ } while (0)
++
++#define write_lock_bh(lock) \
++ do { \
++ local_bh_disable(); \
++ migrate_disable(); \
++ rt_write_lock(lock); \
++ } while (0)
++
++#define write_lock_irq(lock) write_lock(lock)
++
++#define read_unlock(lock) \
++ do { \
++ rt_read_unlock(lock); \
++ migrate_enable(); \
++ } while (0)
++
++#define read_unlock_bh(lock) \
++ do { \
++ rt_read_unlock(lock); \
++ migrate_enable(); \
++ local_bh_enable(); \
++ } while (0)
++
++#define read_unlock_irq(lock) read_unlock(lock)
++
++#define write_unlock(lock) \
++ do { \
++ rt_write_unlock(lock); \
++ migrate_enable(); \
++ } while (0)
++
++#define write_unlock_bh(lock) \
++ do { \
++ rt_write_unlock(lock); \
++ migrate_enable(); \
++ local_bh_enable(); \
++ } while (0)
++
++#define write_unlock_irq(lock) write_unlock(lock)
++
++#define read_unlock_irqrestore(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ (void) flags; \
++ rt_read_unlock(lock); \
++ migrate_enable(); \
++ } while (0)
++
++#define write_unlock_irqrestore(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ (void) flags; \
++ rt_write_unlock(lock); \
++ migrate_enable(); \
++ } while (0)
++
++#endif
+--- a/include/linux/spinlock.h
++++ b/include/linux/spinlock.h
+@@ -254,7 +254,11 @@ static inline void do_raw_spin_unlock(ra
+ #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
+
+ /* Include rwlock functions */
+-#include <linux/rwlock.h>
++#ifdef CONFIG_PREEMPT_RT_FULL
++# include <linux/rwlock_rt.h>
++#else
++# include <linux/rwlock.h>
++#endif
+
+ /*
+ * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
+@@ -265,6 +269,10 @@ static inline void do_raw_spin_unlock(ra
+ # include <linux/spinlock_api_up.h>
+ #endif
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++# include <linux/spinlock_rt.h>
++#else /* PREEMPT_RT_FULL */
++
+ /*
+ * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
+ */
+@@ -394,4 +402,6 @@ extern int _atomic_dec_and_lock(atomic_t
+ #define atomic_dec_and_lock(atomic, lock) \
+ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
+
++#endif /* !PREEMPT_RT_FULL */
++
+ #endif /* __LINUX_SPINLOCK_H */
+--- a/include/linux/spinlock_api_smp.h
++++ b/include/linux/spinlock_api_smp.h
+@@ -191,6 +191,8 @@ static inline int __raw_spin_trylock_bh(
+ return 0;
+ }
+
+-#include <linux/rwlock_api_smp.h>
++#ifndef CONFIG_PREEMPT_RT_FULL
++# include <linux/rwlock_api_smp.h>
++#endif
+
+ #endif /* __LINUX_SPINLOCK_API_SMP_H */
+--- /dev/null
++++ b/include/linux/spinlock_rt.h
+@@ -0,0 +1,158 @@
++#ifndef __LINUX_SPINLOCK_RT_H
++#define __LINUX_SPINLOCK_RT_H
++
++#ifndef __LINUX_SPINLOCK_H
++#error Do not include directly. Use spinlock.h
++#endif
++
++#include <linux/bug.h>
++
++extern void
++__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
++
++#define spin_lock_init(slock) \
++do { \
++ static struct lock_class_key __key; \
++ \
++ rt_mutex_init(&(slock)->lock); \
++ __rt_spin_lock_init(slock, #slock, &__key); \
++} while (0)
++
++extern void __lockfunc rt_spin_lock(spinlock_t *lock);
++extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
++extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
++extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
++extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
++extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
++extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
++extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
++extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
++
++/*
++ * lockdep-less calls, for derived types like rwlock:
++ * (for trylock they can use rt_mutex_trylock() directly.
++ */
++extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
++extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
++
++#define spin_lock_local(lock) rt_spin_lock(lock)
++#define spin_unlock_local(lock) rt_spin_unlock(lock)
++
++#define spin_lock(lock) \
++ do { \
++ migrate_disable(); \
++ rt_spin_lock(lock); \
++ } while (0)
++
++#define spin_lock_bh(lock) \
++ do { \
++ local_bh_disable(); \
++ migrate_disable(); \
++ rt_spin_lock(lock); \
++ } while (0)
++
++#define spin_lock_irq(lock) spin_lock(lock)
++
++#define spin_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
++
++#ifdef CONFIG_LOCKDEP
++# define spin_lock_nested(lock, subclass) \
++ do { \
++ migrate_disable(); \
++ rt_spin_lock_nested(lock, subclass); \
++ } while (0)
++
++# define spin_lock_irqsave_nested(lock, flags, subclass) \
++ do { \
++ typecheck(unsigned long, flags); \
++ flags = 0; \
++ migrate_disable(); \
++ rt_spin_lock_nested(lock, subclass); \
++ } while (0)
++#else
++# define spin_lock_nested(lock, subclass) spin_lock(lock)
++
++# define spin_lock_irqsave_nested(lock, flags, subclass) \
++ do { \
++ typecheck(unsigned long, flags); \
++ flags = 0; \
++ spin_lock(lock); \
++ } while (0)
++#endif
++
++#define spin_lock_irqsave(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ flags = 0; \
++ spin_lock(lock); \
++ } while (0)
++
++static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
++{
++ unsigned long flags = 0;
++#ifdef CONFIG_TRACE_IRQFLAGS
++ flags = rt_spin_lock_trace_flags(lock);
++#else
++ spin_lock(lock); /* lock_local */
++#endif
++ return flags;
++}
++
++/* FIXME: we need rt_spin_lock_nest_lock */
++#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
++
++#define spin_unlock(lock) \
++ do { \
++ rt_spin_unlock(lock); \
++ migrate_enable(); \
++ } while (0)
++
++#define spin_unlock_bh(lock) \
++ do { \
++ rt_spin_unlock(lock); \
++ migrate_enable(); \
++ local_bh_enable(); \
++ } while (0)
++
++#define spin_unlock_irq(lock) spin_unlock(lock)
++
++#define spin_unlock_irqrestore(lock, flags) \
++ do { \
++ typecheck(unsigned long, flags); \
++ (void) flags; \
++ spin_unlock(lock); \
++ } while (0)
++
++#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
++#define spin_trylock_irq(lock) spin_trylock(lock)
++
++#define spin_trylock_irqsave(lock, flags) \
++ rt_spin_trylock_irqsave(lock, &(flags))
++
++#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
++
++#ifdef CONFIG_GENERIC_LOCKBREAK
++# define spin_is_contended(lock) ((lock)->break_lock)
++#else
++# define spin_is_contended(lock) (((void)(lock), 0))
++#endif
++
++static inline int spin_can_lock(spinlock_t *lock)
++{
++ return !rt_mutex_is_locked(&lock->lock);
++}
++
++static inline int spin_is_locked(spinlock_t *lock)
++{
++ return rt_mutex_is_locked(&lock->lock);
++}
++
++static inline void assert_spin_locked(spinlock_t *lock)
++{
++ BUG_ON(!spin_is_locked(lock));
++}
++
++#define atomic_dec_and_lock(atomic, lock) \
++ atomic_dec_and_spin_lock(atomic, lock)
++
++#endif
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -7,8 +7,8 @@ obj-y = fork.o exec_domain.o panic.o
+ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
+ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
+ rcupdate.o extable.o params.o posix-timers.o \
+- kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
+- hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
++ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o \
++ hrtimer.o nsproxy.o srcu.o semaphore.o \
+ notifier.o ksysfs.o cred.o \
+ async.o range.o groups.o lglock.o smpboot.o
+
+@@ -32,7 +32,11 @@ obj-$(CONFIG_FREEZER) += freezer.o
+ obj-$(CONFIG_PROFILING) += profile.o
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+ obj-y += time/
++ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
++obj-y += mutex.o
+ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
++obj-y += rwsem.o
++endif
+ obj-$(CONFIG_LOCKDEP) += lockdep.o
+ ifeq ($(CONFIG_PROC_FS),y)
+ obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
+@@ -44,6 +48,7 @@ endif
+ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
+ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
+ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
++obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_SMP) += smp.o
+ ifneq ($(CONFIG_SMP),y)
+--- /dev/null
++++ b/kernel/rt.c
+@@ -0,0 +1,442 @@
++/*
++ * kernel/rt.c
++ *
++ * Real-Time Preemption Support
++ *
++ * started by Ingo Molnar:
++ *
++ * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
++ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
++ *
++ * historic credit for proving that Linux spinlocks can be implemented via
++ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
++ * and others) who prototyped it on 2.4 and did lots of comparative
++ * research and analysis; TimeSys, for proving that you can implement a
++ * fully preemptible kernel via the use of IRQ threading and mutexes;
++ * Bill Huey for persuasively arguing on lkml that the mutex model is the
++ * right one; and to MontaVista, who ported pmutexes to 2.6.
++ *
++ * This code is a from-scratch implementation and is not based on pmutexes,
++ * but the idea of converting spinlocks to mutexes is used here too.
++ *
++ * lock debugging, locking tree, deadlock detection:
++ *
++ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
++ * Released under the General Public License (GPL).
++ *
++ * Includes portions of the generic R/W semaphore implementation from:
++ *
++ * Copyright (c) 2001 David Howells (dhowells@redhat.com).
++ * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
++ * - Derived also from comments by Linus
++ *
++ * Pending ownership of locks and ownership stealing:
++ *
++ * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
++ *
++ * (also by Steven Rostedt)
++ * - Converted single pi_lock to individual task locks.
++ *
++ * By Esben Nielsen:
++ * Doing priority inheritance with help of the scheduler.
++ *
++ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
++ * - major rework based on Esben Nielsens initial patch
++ * - replaced thread_info references by task_struct refs
++ * - removed task->pending_owner dependency
++ * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
++ * in the scheduler return path as discussed with Steven Rostedt
++ *
++ * Copyright (C) 2006, Kihon Technologies Inc.
++ * Steven Rostedt <rostedt@goodmis.org>
++ * - debugged and patched Thomas Gleixner's rework.
++ * - added back the cmpxchg to the rework.
++ * - turned atomic require back on for SMP.
++ */
++
++#include <linux/spinlock.h>
++#include <linux/rtmutex.h>
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/module.h>
++#include <linux/kallsyms.h>
++#include <linux/syscalls.h>
++#include <linux/interrupt.h>
++#include <linux/plist.h>
++#include <linux/fs.h>
++#include <linux/futex.h>
++#include <linux/hrtimer.h>
++
++#include "rtmutex_common.h"
++
++/*
++ * struct mutex functions
++ */
++void __mutex_do_init(struct mutex *mutex, const char *name,
++ struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ /*
++ * Make sure we are not reinitializing a held lock:
++ */
++ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
++ lockdep_init_map(&mutex->dep_map, name, key, 0);
++#endif
++ mutex->lock.save_state = 0;
++}
++EXPORT_SYMBOL(__mutex_do_init);
++
++void __lockfunc _mutex_lock(struct mutex *lock)
++{
++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++ rt_mutex_lock(&lock->lock);
++}
++EXPORT_SYMBOL(_mutex_lock);
++
++int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
++{
++ int ret;
++
++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++ ret = rt_mutex_lock_interruptible(&lock->lock, 0);
++ if (ret)
++ mutex_release(&lock->dep_map, 1, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_interruptible);
++
++int __lockfunc _mutex_lock_killable(struct mutex *lock)
++{
++ int ret;
++
++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++ ret = rt_mutex_lock_killable(&lock->lock, 0);
++ if (ret)
++ mutex_release(&lock->dep_map, 1, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_killable);
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
++{
++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
++ rt_mutex_lock(&lock->lock);
++}
++EXPORT_SYMBOL(_mutex_lock_nested);
++
++void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
++{
++ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
++ rt_mutex_lock(&lock->lock);
++}
++EXPORT_SYMBOL(_mutex_lock_nest_lock);
++
++int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
++{
++ int ret;
++
++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
++ ret = rt_mutex_lock_interruptible(&lock->lock, 0);
++ if (ret)
++ mutex_release(&lock->dep_map, 1, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
++
++int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
++{
++ int ret;
++
++ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++ ret = rt_mutex_lock_killable(&lock->lock, 0);
++ if (ret)
++ mutex_release(&lock->dep_map, 1, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_killable_nested);
++#endif
++
++int __lockfunc _mutex_trylock(struct mutex *lock)
++{
++ int ret = rt_mutex_trylock(&lock->lock);
++
++ if (ret)
++ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++
++ return ret;
++}
++EXPORT_SYMBOL(_mutex_trylock);
++
++void __lockfunc _mutex_unlock(struct mutex *lock)
++{
++ mutex_release(&lock->dep_map, 1, _RET_IP_);
++ rt_mutex_unlock(&lock->lock);
++}
++EXPORT_SYMBOL(_mutex_unlock);
++
++/*
++ * rwlock_t functions
++ */
++int __lockfunc rt_write_trylock(rwlock_t *rwlock)
++{
++ int ret = rt_mutex_trylock(&rwlock->lock);
++
++ migrate_disable();
++ if (ret)
++ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
++ else
++ migrate_enable();
++
++ return ret;
++}
++EXPORT_SYMBOL(rt_write_trylock);
++
++int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
++{
++ int ret;
++
++ *flags = 0;
++ migrate_disable();
++ ret = rt_write_trylock(rwlock);
++ if (!ret)
++ migrate_enable();
++ return ret;
++}
++EXPORT_SYMBOL(rt_write_trylock_irqsave);
++
++int __lockfunc rt_read_trylock(rwlock_t *rwlock)
++{
++ struct rt_mutex *lock = &rwlock->lock;
++ int ret = 1;
++
++ /*
++ * recursive read locks succeed when current owns the lock,
++ * but not when read_depth == 0 which means that the lock is
++ * write locked.
++ */
++ migrate_disable();
++ if (rt_mutex_owner(lock) != current)
++ ret = rt_mutex_trylock(lock);
++ else if (!rwlock->read_depth)
++ ret = 0;
++
++ if (ret) {
++ rwlock->read_depth++;
++ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
++ } else
++ migrate_enable();
++
++ return ret;
++}
++EXPORT_SYMBOL(rt_read_trylock);
++
++void __lockfunc rt_write_lock(rwlock_t *rwlock)
++{
++ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
++ __rt_spin_lock(&rwlock->lock);
++}
++EXPORT_SYMBOL(rt_write_lock);
++
++void __lockfunc rt_read_lock(rwlock_t *rwlock)
++{
++ struct rt_mutex *lock = &rwlock->lock;
++
++ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
++
++ /*
++ * recursive read locks succeed when current owns the lock
++ */
++ if (rt_mutex_owner(lock) != current)
++ __rt_spin_lock(lock);
++ rwlock->read_depth++;
++}
++
++EXPORT_SYMBOL(rt_read_lock);
++
++void __lockfunc rt_write_unlock(rwlock_t *rwlock)
++{
++ /* NOTE: we always pass in '1' for nested, for simplicity */
++ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
++ __rt_spin_unlock(&rwlock->lock);
++}
++EXPORT_SYMBOL(rt_write_unlock);
++
++void __lockfunc rt_read_unlock(rwlock_t *rwlock)
++{
++ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
++
++ /* Release the lock only when read_depth is down to 0 */
++ if (--rwlock->read_depth == 0)
++ __rt_spin_unlock(&rwlock->lock);
++}
++EXPORT_SYMBOL(rt_read_unlock);
++
++unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
++{
++ rt_write_lock(rwlock);
++
++ return 0;
++}
++EXPORT_SYMBOL(rt_write_lock_irqsave);
++
++unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
++{
++ rt_read_lock(rwlock);
++
++ return 0;
++}
++EXPORT_SYMBOL(rt_read_lock_irqsave);
++
++void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ /*
++ * Make sure we are not reinitializing a held lock:
++ */
++ debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
++ lockdep_init_map(&rwlock->dep_map, name, key, 0);
++#endif
++ rwlock->lock.save_state = 1;
++ rwlock->read_depth = 0;
++}
++EXPORT_SYMBOL(__rt_rwlock_init);
++
++/*
++ * rw_semaphores
++ */
++
++void rt_up_write(struct rw_semaphore *rwsem)
++{
++ rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
++ rt_mutex_unlock(&rwsem->lock);
++}
++EXPORT_SYMBOL(rt_up_write);
++
++void rt_up_read(struct rw_semaphore *rwsem)
++{
++ rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
++ if (--rwsem->read_depth == 0)
++ rt_mutex_unlock(&rwsem->lock);
++}
++EXPORT_SYMBOL(rt_up_read);
++
++/*
++ * downgrade a write lock into a read lock
++ * - just wake up any readers at the front of the queue
++ */
++void rt_downgrade_write(struct rw_semaphore *rwsem)
++{
++ BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
++ rwsem->read_depth = 1;
++}
++EXPORT_SYMBOL(rt_downgrade_write);
++
++int rt_down_write_trylock(struct rw_semaphore *rwsem)
++{
++ int ret = rt_mutex_trylock(&rwsem->lock);
++
++ if (ret)
++ rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
++ return ret;
++}
++EXPORT_SYMBOL(rt_down_write_trylock);
++
++void rt_down_write(struct rw_semaphore *rwsem)
++{
++ rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
++ rt_mutex_lock(&rwsem->lock);
++}
++EXPORT_SYMBOL(rt_down_write);
++
++void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
++{
++ rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
++ rt_mutex_lock(&rwsem->lock);
++}
++EXPORT_SYMBOL(rt_down_write_nested);
++
++int rt_down_read_trylock(struct rw_semaphore *rwsem)
++{
++ struct rt_mutex *lock = &rwsem->lock;
++ int ret = 1;
++
++ /*
++ * recursive read locks succeed when current owns the rwsem,
++ * but not when read_depth == 0 which means that the rwsem is
++ * write locked.
++ */
++ if (rt_mutex_owner(lock) != current)
++ ret = rt_mutex_trylock(&rwsem->lock);
++ else if (!rwsem->read_depth)
++ ret = 0;
++
++ if (ret) {
++ rwsem->read_depth++;
++ rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
++ }
++ return ret;
++}
++EXPORT_SYMBOL(rt_down_read_trylock);
++
++static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
++{
++ struct rt_mutex *lock = &rwsem->lock;
++
++ rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
++
++ if (rt_mutex_owner(lock) != current)
++ rt_mutex_lock(&rwsem->lock);
++ rwsem->read_depth++;
++}
++
++void rt_down_read(struct rw_semaphore *rwsem)
++{
++ __rt_down_read(rwsem, 0);
++}
++EXPORT_SYMBOL(rt_down_read);
++
++void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
++{
++ __rt_down_read(rwsem, subclass);
++}
++EXPORT_SYMBOL(rt_down_read_nested);
++
++void __rt_rwsem_init(struct rw_semaphore *rwsem, char *name,
++ struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ /*
++ * Make sure we are not reinitializing a held lock:
++ */
++ debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
++ lockdep_init_map(&rwsem->dep_map, name, key, 0);
++#endif
++ rwsem->read_depth = 0;
++ rwsem->lock.save_state = 0;
++}
++EXPORT_SYMBOL(__rt_rwsem_init);
++
++/**
++ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
++ * @cnt: the atomic which we are to dec
++ * @lock: the mutex to return holding if we dec to 0
++ *
++ * return true and hold lock if we dec to 0, return false otherwise
++ */
++int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
++{
++ /* dec if we can't possibly hit 0 */
++ if (atomic_add_unless(cnt, -1, 1))
++ return 0;
++ /* we might hit 0, so take the lock */
++ mutex_lock(lock);
++ if (!atomic_dec_and_test(cnt)) {
++ /* when we actually did the dec, we didn't hit 0 */
++ mutex_unlock(lock);
++ return 0;
++ }
++ /* we hit 0, and we hold the lock */
++ return 1;
++}
++EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
+--- a/kernel/spinlock.c
++++ b/kernel/spinlock.c
+@@ -110,8 +110,11 @@ void __lockfunc __raw_##op##_lock_bh(loc
+ * __[spin|read|write]_lock_bh()
+ */
+ BUILD_LOCK_OPS(spin, raw_spinlock);
++
++#ifndef CONFIG_PREEMPT_RT_FULL
+ BUILD_LOCK_OPS(read, rwlock);
+ BUILD_LOCK_OPS(write, rwlock);
++#endif
+
+ #endif
+
+@@ -195,6 +198,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_
+ EXPORT_SYMBOL(_raw_spin_unlock_bh);
+ #endif
+
++#ifndef CONFIG_PREEMPT_RT_FULL
++
+ #ifndef CONFIG_INLINE_READ_TRYLOCK
+ int __lockfunc _raw_read_trylock(rwlock_t *lock)
+ {
+@@ -339,6 +344,8 @@ void __lockfunc _raw_write_unlock_bh(rwl
+ EXPORT_SYMBOL(_raw_write_unlock_bh);
+ #endif
+
++#endif /* !PREEMPT_RT_FULL */
++
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
+--- a/lib/spinlock_debug.c
++++ b/lib/spinlock_debug.c
+@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t
+
+ EXPORT_SYMBOL(__raw_spin_lock_init);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ void __rwlock_init(rwlock_t *lock, const char *name,
+ struct lock_class_key *key)
+ {
+@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const
+ }
+
+ EXPORT_SYMBOL(__rwlock_init);
++#endif
+
+ static void spin_dump(raw_spinlock_t *lock, const char *msg)
+ {
+@@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *
+ arch_spin_unlock(&lock->raw_lock);
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ static void rwlock_bug(rwlock_t *lock, const char *msg)
+ {
+ if (!debug_locks_off())
+@@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock)
+ debug_write_unlock(lock);
+ arch_write_unlock(&lock->raw_lock);
+ }
++
++#endif
diff --git a/patches/rt-add-rt-spinlock-to-headers.patch b/patches/rt-add-rt-spinlock-to-headers.patch
new file mode 100644
index 0000000..40ea9a5
--- /dev/null
+++ b/patches/rt-add-rt-spinlock-to-headers.patch
@@ -0,0 +1,118 @@
+Subject: rt-add-rt-spinlocks.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 19:43:35 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rwlock_types_rt.h | 33 +++++++++++++++++++++++++
+ include/linux/spinlock_types.h | 11 ++++++--
+ include/linux/spinlock_types_rt.h | 49 ++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 90 insertions(+), 3 deletions(-)
+
+--- /dev/null
++++ b/include/linux/rwlock_types_rt.h
+@@ -0,0 +1,33 @@
++#ifndef __LINUX_RWLOCK_TYPES_RT_H
++#define __LINUX_RWLOCK_TYPES_RT_H
++
++#ifndef __LINUX_SPINLOCK_TYPES_H
++#error "Do not include directly. Include spinlock_types.h instead"
++#endif
++
++/*
++ * rwlocks - rtmutex which allows single reader recursion
++ */
++typedef struct {
++ struct rt_mutex lock;
++ int read_depth;
++ unsigned int break_lock;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++} rwlock_t;
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
++#else
++# define RW_DEP_MAP_INIT(lockname)
++#endif
++
++#define __RW_LOCK_UNLOCKED(name) \
++ { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
++ RW_DEP_MAP_INIT(name) }
++
++#define DEFINE_RWLOCK(name) \
++ rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
++
++#endif
+--- a/include/linux/spinlock_types.h
++++ b/include/linux/spinlock_types.h
+@@ -11,8 +11,13 @@
+
+ #include <linux/spinlock_types_raw.h>
+
+-#include <linux/spinlock_types_nort.h>
+-
+-#include <linux/rwlock_types.h>
++#ifndef CONFIG_PREEMPT_RT_FULL
++# include <linux/spinlock_types_nort.h>
++# include <linux/rwlock_types.h>
++#else
++# include <linux/rtmutex.h>
++# include <linux/spinlock_types_rt.h>
++# include <linux/rwlock_types_rt.h>
++#endif
+
+ #endif /* __LINUX_SPINLOCK_TYPES_H */
+--- /dev/null
++++ b/include/linux/spinlock_types_rt.h
+@@ -0,0 +1,49 @@
++#ifndef __LINUX_SPINLOCK_TYPES_RT_H
++#define __LINUX_SPINLOCK_TYPES_RT_H
++
++#ifndef __LINUX_SPINLOCK_TYPES_H
++#error "Do not include directly. Include spinlock_types.h instead"
++#endif
++
++/*
++ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
++ */
++typedef struct spinlock {
++ struct rt_mutex lock;
++ unsigned int break_lock;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++} spinlock_t;
++
++#ifdef CONFIG_DEBUG_RT_MUTEXES
++# define __RT_SPIN_INITIALIZER(name) \
++ { \
++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
++ .save_state = 1, \
++ .file = __FILE__, \
++ .line = __LINE__ , \
++ }
++#else
++# define __RT_SPIN_INITIALIZER(name) \
++ { \
++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
++ .save_state = 1, \
++ }
++#endif
++
++/*
++.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
++*/
++
++#define __SPIN_LOCK_UNLOCKED(name) \
++ { .lock = __RT_SPIN_INITIALIZER(name.lock), \
++ SPIN_DEP_MAP_INIT(name) }
++
++#define __DEFINE_SPINLOCK(name) \
++ spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
++
++#define DEFINE_SPINLOCK(name) \
++ spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
++
++#endif
diff --git a/patches/rt-add-rt-to-mutex-headers.patch b/patches/rt-add-rt-to-mutex-headers.patch
new file mode 100644
index 0000000..0d843f0
--- /dev/null
+++ b/patches/rt-add-rt-to-mutex-headers.patch
@@ -0,0 +1,141 @@
+Subject: rt-add-rt-to-mutex-headers.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 20:56:22 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/mutex.h | 21 +++++++----
+ include/linux/mutex_rt.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 98 insertions(+), 7 deletions(-)
+
+--- a/include/linux/mutex.h
++++ b/include/linux/mutex.h
+@@ -17,6 +17,17 @@
+
+ #include <linux/atomic.h>
+
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
++ , .dep_map = { .name = #lockname }
++#else
++# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
++#endif
++
++#ifdef CONFIG_PREEMPT_RT_FULL
++# include <linux/mutex_rt.h>
++#else
++
+ /*
+ * Simple, straightforward mutexes with strict semantics:
+ *
+@@ -95,13 +106,6 @@ do { \
+ static inline void mutex_destroy(struct mutex *lock) {}
+ #endif
+
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
+- , .dep_map = { .name = #lockname }
+-#else
+-# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
+-#endif
+-
+ #define __MUTEX_INITIALIZER(lockname) \
+ { .count = ATOMIC_INIT(1) \
+ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
+@@ -167,6 +171,9 @@ extern int __must_check mutex_lock_killa
+ */
+ extern int mutex_trylock(struct mutex *lock);
+ extern void mutex_unlock(struct mutex *lock);
++
++#endif /* !PREEMPT_RT_FULL */
++
+ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+
+ #ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
+--- /dev/null
++++ b/include/linux/mutex_rt.h
+@@ -0,0 +1,84 @@
++#ifndef __LINUX_MUTEX_RT_H
++#define __LINUX_MUTEX_RT_H
++
++#ifndef __LINUX_MUTEX_H
++#error "Please include mutex.h"
++#endif
++
++#include <linux/rtmutex.h>
++
++/* FIXME: Just for __lockfunc */
++#include <linux/spinlock.h>
++
++struct mutex {
++ struct rt_mutex lock;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++};
++
++#define __MUTEX_INITIALIZER(mutexname) \
++ { \
++ .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
++ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
++ }
++
++#define DEFINE_MUTEX(mutexname) \
++ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
++
++extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
++extern void __lockfunc _mutex_lock(struct mutex *lock);
++extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
++extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
++extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
++extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
++extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
++extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
++extern int __lockfunc _mutex_trylock(struct mutex *lock);
++extern void __lockfunc _mutex_unlock(struct mutex *lock);
++
++#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
++#define mutex_lock(l) _mutex_lock(l)
++#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
++#define mutex_lock_killable(l) _mutex_lock_killable(l)
++#define mutex_trylock(l) _mutex_trylock(l)
++#define mutex_unlock(l) _mutex_unlock(l)
++#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
++# define mutex_lock_interruptible_nested(l, s) \
++ _mutex_lock_interruptible_nested(l, s)
++# define mutex_lock_killable_nested(l, s) \
++ _mutex_lock_killable_nested(l, s)
++
++# define mutex_lock_nest_lock(lock, nest_lock) \
++do { \
++ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
++ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
++} while (0)
++
++#else
++# define mutex_lock_nested(l, s) _mutex_lock(l)
++# define mutex_lock_interruptible_nested(l, s) \
++ _mutex_lock_interruptible(l)
++# define mutex_lock_killable_nested(l, s) \
++ _mutex_lock_killable(l)
++# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
++#endif
++
++# define mutex_init(mutex) \
++do { \
++ static struct lock_class_key __key; \
++ \
++ rt_mutex_init(&(mutex)->lock); \
++ __mutex_do_init((mutex), #mutex, &__key); \
++} while (0)
++
++# define __mutex_init(mutex, name, key) \
++do { \
++ rt_mutex_init(&(mutex)->lock); \
++ __mutex_do_init((mutex), name, key); \
++} while (0)
++
++#endif
diff --git a/patches/rt-introduce-cpu-chill.patch b/patches/rt-introduce-cpu-chill.patch
new file mode 100644
index 0000000..02864a0
--- /dev/null
+++ b/patches/rt-introduce-cpu-chill.patch
@@ -0,0 +1,28 @@
+Subject: rt: Introduce cpu_chill()
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 07 Mar 2012 20:51:03 +0100
+
+Retry loops on RT might loop forever when the modifying side was
+preempted. Add cpu_chill() to replace cpu_relax(). cpu_chill()
+defaults to cpu_relax() for non RT. On RT it puts the looping task to
+sleep for a tick so the preempted task can make progress.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ include/linux/delay.h | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/include/linux/delay.h
++++ b/include/linux/delay.h
+@@ -52,4 +52,10 @@ static inline void ssleep(unsigned int s
+ msleep(seconds * 1000);
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define cpu_chill() msleep(1)
++#else
++# define cpu_chill() cpu_relax()
++#endif
++
+ #endif /* defined(_LINUX_DELAY_H) */
diff --git a/patches/rt-local-irq-lock.patch b/patches/rt-local-irq-lock.patch
new file mode 100644
index 0000000..d7e8093
--- /dev/null
+++ b/patches/rt-local-irq-lock.patch
@@ -0,0 +1,250 @@
+Subject: rt-local-irq-lock.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 20 Jun 2011 09:03:47 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/locallock.h | 238 ++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 238 insertions(+)
+
+--- /dev/null
++++ b/include/linux/locallock.h
+@@ -0,0 +1,238 @@
++#ifndef _LINUX_LOCALLOCK_H
++#define _LINUX_LOCALLOCK_H
++
++#include <linux/spinlock.h>
++
++#ifdef CONFIG_PREEMPT_RT_BASE
++
++#ifdef CONFIG_DEBUG_SPINLOCK
++# define LL_WARN(cond) WARN_ON(cond)
++#else
++# define LL_WARN(cond) do { } while (0)
++#endif
++
++/*
++ * per cpu lock based substitute for local_irq_*()
++ */
++struct local_irq_lock {
++ spinlock_t lock;
++ struct task_struct *owner;
++ int nestcnt;
++ unsigned long flags;
++};
++
++#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
++ DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
++ .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
++
++#define local_irq_lock_init(lvar) \
++ do { \
++ int __cpu; \
++ for_each_possible_cpu(__cpu) \
++ spin_lock_init(&per_cpu(lvar, __cpu).lock); \
++ } while (0)
++
++static inline void __local_lock(struct local_irq_lock *lv)
++{
++ if (lv->owner != current) {
++ spin_lock(&lv->lock);
++ LL_WARN(lv->owner);
++ LL_WARN(lv->nestcnt);
++ lv->owner = current;
++ }
++ lv->nestcnt++;
++}
++
++#define local_lock(lvar) \
++ do { __local_lock(&get_local_var(lvar)); } while (0)
++
++static inline int __local_trylock(struct local_irq_lock *lv)
++{
++ if (lv->owner != current && spin_trylock(&lv->lock)) {
++ LL_WARN(lv->owner);
++ LL_WARN(lv->nestcnt);
++ lv->owner = current;
++ lv->nestcnt = 1;
++ return 1;
++ }
++ return 0;
++}
++
++#define local_trylock(lvar) \
++ ({ \
++ int __locked; \
++ __locked = __local_trylock(&get_local_var(lvar)); \
++ if (!__locked) \
++ put_local_var(lvar); \
++ __locked; \
++ })
++
++static inline void __local_unlock(struct local_irq_lock *lv)
++{
++ LL_WARN(lv->nestcnt == 0);
++ LL_WARN(lv->owner != current);
++ if (--lv->nestcnt)
++ return;
++
++ lv->owner = NULL;
++ spin_unlock(&lv->lock);
++}
++
++#define local_unlock(lvar) \
++ do { \
++ __local_unlock(&__get_cpu_var(lvar)); \
++ put_local_var(lvar); \
++ } while (0)
++
++static inline void __local_lock_irq(struct local_irq_lock *lv)
++{
++ spin_lock_irqsave(&lv->lock, lv->flags);
++ LL_WARN(lv->owner);
++ LL_WARN(lv->nestcnt);
++ lv->owner = current;
++ lv->nestcnt = 1;
++}
++
++#define local_lock_irq(lvar) \
++ do { __local_lock_irq(&get_local_var(lvar)); } while (0)
++
++#define local_lock_irq_on(lvar, cpu) \
++ do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
++
++static inline void __local_unlock_irq(struct local_irq_lock *lv)
++{
++ LL_WARN(!lv->nestcnt);
++ LL_WARN(lv->owner != current);
++ lv->owner = NULL;
++ lv->nestcnt = 0;
++ spin_unlock_irq(&lv->lock);
++}
++
++#define local_unlock_irq(lvar) \
++ do { \
++ __local_unlock_irq(&__get_cpu_var(lvar)); \
++ put_local_var(lvar); \
++ } while (0)
++
++#define local_unlock_irq_on(lvar, cpu) \
++ do { \
++ __local_unlock_irq(&per_cpu(lvar, cpu)); \
++ } while (0)
++
++static inline int __local_lock_irqsave(struct local_irq_lock *lv)
++{
++ if (lv->owner != current) {
++ __local_lock_irq(lv);
++ return 0;
++ } else {
++ lv->nestcnt++;
++ return 1;
++ }
++}
++
++#define local_lock_irqsave(lvar, _flags) \
++ do { \
++ if (__local_lock_irqsave(&get_local_var(lvar))) \
++ put_local_var(lvar); \
++ _flags = __get_cpu_var(lvar).flags; \
++ } while (0)
++
++static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
++ unsigned long flags)
++{
++ LL_WARN(!lv->nestcnt);
++ LL_WARN(lv->owner != current);
++ if (--lv->nestcnt)
++ return 0;
++
++ lv->owner = NULL;
++ spin_unlock_irqrestore(&lv->lock, lv->flags);
++ return 1;
++}
++
++#define local_unlock_irqrestore(lvar, flags) \
++ do { \
++ if (__local_unlock_irqrestore(&__get_cpu_var(lvar), flags)) \
++ put_local_var(lvar); \
++ } while (0)
++
++#define local_spin_trylock_irq(lvar, lock) \
++ ({ \
++ int __locked; \
++ local_lock_irq(lvar); \
++ __locked = spin_trylock(lock); \
++ if (!__locked) \
++ local_unlock_irq(lvar); \
++ __locked; \
++ })
++
++#define local_spin_lock_irq(lvar, lock) \
++ do { \
++ local_lock_irq(lvar); \
++ spin_lock(lock); \
++ } while (0)
++
++#define local_spin_unlock_irq(lvar, lock) \
++ do { \
++ spin_unlock(lock); \
++ local_unlock_irq(lvar); \
++ } while (0)
++
++#define local_spin_lock_irqsave(lvar, lock, flags) \
++ do { \
++ local_lock_irqsave(lvar, flags); \
++ spin_lock(lock); \
++ } while (0)
++
++#define local_spin_unlock_irqrestore(lvar, lock, flags) \
++ do { \
++ spin_unlock(lock); \
++ local_unlock_irqrestore(lvar, flags); \
++ } while (0)
++
++#define get_locked_var(lvar, var) \
++ (*({ \
++ local_lock(lvar); \
++ &__get_cpu_var(var); \
++ }))
++
++#define put_locked_var(lvar, var) local_unlock(lvar)
++
++#define local_lock_cpu(lvar) \
++ ({ \
++ local_lock(lvar); \
++ smp_processor_id(); \
++ })
++
++#define local_unlock_cpu(lvar) local_unlock(lvar)
++
++#else /* PREEMPT_RT_BASE */
++
++#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
++
++static inline void local_irq_lock_init(int lvar) { }
++
++#define local_lock(lvar) preempt_disable()
++#define local_unlock(lvar) preempt_enable()
++#define local_lock_irq(lvar) local_irq_disable()
++#define local_unlock_irq(lvar) local_irq_enable()
++#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
++#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
++
++#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
++#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
++#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
++#define local_spin_lock_irqsave(lvar, lock, flags) \
++ spin_lock_irqsave(lock, flags)
++#define local_spin_unlock_irqrestore(lvar, lock, flags) \
++ spin_unlock_irqrestore(lock, flags)
++
++#define get_locked_var(lvar, var) get_cpu_var(var)
++#define put_locked_var(lvar, var) put_cpu_var(var)
++
++#define local_lock_cpu(lvar) get_cpu()
++#define local_unlock_cpu(lvar) put_cpu()
++
++#endif
++
++#endif
diff --git a/patches/rt-mutex-add-sleeping-spinlocks-support.patch b/patches/rt-mutex-add-sleeping-spinlocks-support.patch
new file mode 100644
index 0000000..a3783cf
--- /dev/null
+++ b/patches/rt-mutex-add-sleeping-spinlocks-support.patch
@@ -0,0 +1,619 @@
+Subject: rt-mutex-add-sleeping-spinlocks-support.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 10 Jun 2011 11:21:25 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rtmutex.h | 35 +++-
+ kernel/futex.c | 3
+ kernel/rtmutex.c | 384 +++++++++++++++++++++++++++++++++++++++++++++---
+ kernel/rtmutex_common.h | 9 +
+ 4 files changed, 404 insertions(+), 27 deletions(-)
+
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -29,9 +29,10 @@ struct rt_mutex {
+ raw_spinlock_t wait_lock;
+ struct plist_head wait_list;
+ struct task_struct *owner;
+-#ifdef CONFIG_DEBUG_RT_MUTEXES
+ int save_state;
+- const char *name, *file;
++#ifdef CONFIG_DEBUG_RT_MUTEXES
++ const char *file;
++ const char *name;
+ int line;
+ void *magic;
+ #endif
+@@ -56,19 +57,39 @@ struct hrtimer_sleeper;
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
+ , .name = #mutexname, .file = __FILE__, .line = __LINE__
+-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
++
++# define rt_mutex_init(mutex) \
++ do { \
++ raw_spin_lock_init(&(mutex)->wait_lock); \
++ __rt_mutex_init(mutex, #mutex); \
++ } while (0)
++
+ extern void rt_mutex_debug_task_free(struct task_struct *tsk);
+ #else
+ # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
+-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
++
++# define rt_mutex_init(mutex) \
++ do { \
++ raw_spin_lock_init(&(mutex)->wait_lock); \
++ __rt_mutex_init(mutex, #mutex); \
++ } while (0)
++
+ # define rt_mutex_debug_task_free(t) do { } while (0)
+ #endif
+
+-#define __RT_MUTEX_INITIALIZER(mutexname) \
+- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
++#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
+ , .wait_list = PLIST_HEAD_INIT(mutexname.wait_list) \
+ , .owner = NULL \
+- __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
++ __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
++
++
++#define __RT_MUTEX_INITIALIZER(mutexname) \
++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
++
++#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
++ , .save_state = 1 }
+
+ #define DEFINE_RT_MUTEX(mutexname) \
+ struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2323,8 +2323,7 @@ static int futex_wait_requeue_pi(u32 __u
+ * The waiter is allocated on our stack, manipulated by the requeue
+ * code while we sleep on uaddr.
+ */
+- debug_rt_mutex_init_waiter(&rt_waiter);
+- rt_waiter.task = NULL;
++ rt_mutex_init_waiter(&rt_waiter, false);
+
+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+ if (unlikely(ret != 0))
+--- a/kernel/rtmutex.c
++++ b/kernel/rtmutex.c
+@@ -8,6 +8,12 @@
+ * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
+ * Copyright (C) 2006 Esben Nielsen
+ *
++ * Adaptive Spinlocks:
++ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
++ * and Peter Morreale,
++ * Adaptive Spinlocks simplification:
++ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
++ *
+ * See Documentation/rt-mutex-design.txt for details.
+ */
+ #include <linux/spinlock.h>
+@@ -96,6 +102,12 @@ static inline void mark_rt_mutex_waiters
+ }
+ #endif
+
++static inline void init_lists(struct rt_mutex *lock)
++{
++ if (unlikely(!lock->wait_list.node_list.prev))
++ plist_head_init(&lock->wait_list);
++}
++
+ /*
+ * Calculate task priority from the waiter list priority
+ *
+@@ -142,6 +154,14 @@ static void rt_mutex_adjust_prio(struct
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ }
+
++static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
++{
++ if (waiter->savestate)
++ wake_up_lock_sleeper(waiter->task);
++ else
++ wake_up_process(waiter->task);
++}
++
+ /*
+ * Max number of times we'll walk the boosting chain:
+ */
+@@ -253,13 +273,15 @@ static int rt_mutex_adjust_prio_chain(st
+ /* Release the task */
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ if (!rt_mutex_owner(lock)) {
++ struct rt_mutex_waiter *lock_top_waiter;
++
+ /*
+ * If the requeue above changed the top waiter, then we need
+ * to wake the new top waiter up to try to get the lock.
+ */
+-
+- if (top_waiter != rt_mutex_top_waiter(lock))
+- wake_up_process(rt_mutex_top_waiter(lock)->task);
++ lock_top_waiter = rt_mutex_top_waiter(lock);
++ if (top_waiter != lock_top_waiter)
++ rt_mutex_wake_waiter(lock_top_waiter);
+ raw_spin_unlock(&lock->wait_lock);
+ goto out_put_task;
+ }
+@@ -304,6 +326,25 @@ static int rt_mutex_adjust_prio_chain(st
+ return ret;
+ }
+
++
++#define STEAL_NORMAL 0
++#define STEAL_LATERAL 1
++
++/*
++ * Note that RT tasks are excluded from lateral-steals to prevent the
++ * introduction of an unbounded latency
++ */
++static inline int lock_is_stealable(struct task_struct *task,
++ struct task_struct *pendowner, int mode)
++{
++ if (mode == STEAL_NORMAL || rt_task(task)) {
++ if (task->prio >= pendowner->prio)
++ return 0;
++ } else if (task->prio > pendowner->prio)
++ return 0;
++ return 1;
++}
++
+ /*
+ * Try to take an rt-mutex
+ *
+@@ -313,8 +354,9 @@ static int rt_mutex_adjust_prio_chain(st
+ * @task: the task which wants to acquire the lock
+ * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
+ */
+-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+- struct rt_mutex_waiter *waiter)
++static int
++__try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
++ struct rt_mutex_waiter *waiter, int mode)
+ {
+ /*
+ * We have to be careful here if the atomic speedups are
+@@ -347,12 +389,14 @@ static int try_to_take_rt_mutex(struct r
+ * 3) it is top waiter
+ */
+ if (rt_mutex_has_waiters(lock)) {
+- if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
+- if (!waiter || waiter != rt_mutex_top_waiter(lock))
+- return 0;
+- }
++ struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
++
++ if (task != pown && !lock_is_stealable(task, pown, mode))
++ return 0;
+ }
+
++ /* We got the lock. */
++
+ if (waiter || rt_mutex_has_waiters(lock)) {
+ unsigned long flags;
+ struct rt_mutex_waiter *top;
+@@ -377,7 +421,6 @@ static int try_to_take_rt_mutex(struct r
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ }
+
+- /* We got the lock. */
+ debug_rt_mutex_lock(lock);
+
+ rt_mutex_set_owner(lock, task);
+@@ -387,6 +430,13 @@ static int try_to_take_rt_mutex(struct r
+ return 1;
+ }
+
++static inline int
++try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
++ struct rt_mutex_waiter *waiter)
++{
++ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
++}
++
+ /*
+ * Task blocks on lock.
+ *
+@@ -501,7 +551,7 @@ static void wakeup_next_waiter(struct rt
+
+ raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+
+- wake_up_process(waiter->task);
++ rt_mutex_wake_waiter(waiter);
+ }
+
+ /*
+@@ -580,18 +630,315 @@ void rt_mutex_adjust_pi(struct task_stru
+ return;
+ }
+
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+-
+ /* gets dropped in rt_mutex_adjust_prio_chain()! */
+ get_task_struct(task);
++ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++/*
++ * preemptible spin_lock functions:
++ */
++static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
++ void (*slowfn)(struct rt_mutex *lock))
++{
++ might_sleep();
++
++ if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
++ rt_mutex_deadlock_account_lock(lock, current);
++ else
++ slowfn(lock);
++}
++
++static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
++ void (*slowfn)(struct rt_mutex *lock))
++{
++ if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
++ rt_mutex_deadlock_account_unlock(current);
++ else
++ slowfn(lock);
++}
++
++#ifdef CONFIG_SMP
++/*
++ * Note that owner is a speculative pointer and dereferencing relies
++ * on rcu_read_lock() and the check against the lock owner.
++ */
++static int adaptive_wait(struct rt_mutex *lock,
++ struct task_struct *owner)
++{
++ int res = 0;
++
++ rcu_read_lock();
++ for (;;) {
++ if (owner != rt_mutex_owner(lock))
++ break;
++ /*
++ * Ensure that owner->on_cpu is dereferenced _after_
++ * checking the above to be valid.
++ */
++ barrier();
++ if (!owner->on_cpu) {
++ res = 1;
++ break;
++ }
++ cpu_relax();
++ }
++ rcu_read_unlock();
++ return res;
++}
++#else
++static int adaptive_wait(struct rt_mutex *lock,
++ struct task_struct *orig_owner)
++{
++ return 1;
++}
++#endif
++
++# define pi_lock(lock) raw_spin_lock_irq(lock)
++# define pi_unlock(lock) raw_spin_unlock_irq(lock)
++
++/*
++ * Slow path lock function spin_lock style: this variant is very
++ * careful not to miss any non-lock wakeups.
++ *
++ * We store the current state under p->pi_lock in p->saved_state and
++ * the try_to_wake_up() code handles this accordingly.
++ */
++static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
++{
++ struct task_struct *lock_owner, *self = current;
++ struct rt_mutex_waiter waiter, *top_waiter;
++ int ret;
++
++ rt_mutex_init_waiter(&waiter, true);
++
++ raw_spin_lock(&lock->wait_lock);
++ init_lists(lock);
++
++ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
++ raw_spin_unlock(&lock->wait_lock);
++ return;
++ }
++
++ BUG_ON(rt_mutex_owner(lock) == self);
++
++ /*
++ * We save whatever state the task is in and we'll restore it
++ * after acquiring the lock taking real wakeups into account
++ * as well. We are serialized via pi_lock against wakeups. See
++ * try_to_wake_up().
++ */
++ pi_lock(&self->pi_lock);
++ self->saved_state = self->state;
++ __set_current_state(TASK_UNINTERRUPTIBLE);
++ pi_unlock(&self->pi_lock);
++
++ ret = task_blocks_on_rt_mutex(lock, &waiter, self, 0);
++ BUG_ON(ret);
++
++ for (;;) {
++ /* Try to acquire the lock again. */
++ if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
++ break;
++
++ top_waiter = rt_mutex_top_waiter(lock);
++ lock_owner = rt_mutex_owner(lock);
++
++ raw_spin_unlock(&lock->wait_lock);
++
++ debug_rt_mutex_print_deadlock(&waiter);
++
++ if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
++ schedule_rt_mutex(lock);
++
++ raw_spin_lock(&lock->wait_lock);
++
++ pi_lock(&self->pi_lock);
++ __set_current_state(TASK_UNINTERRUPTIBLE);
++ pi_unlock(&self->pi_lock);
++ }
++
++ /*
++ * Restore the task state to current->saved_state. We set it
++ * to the original state above and the try_to_wake_up() code
++ * has possibly updated it when a real (non-rtmutex) wakeup
++ * happened while we were blocked. Clear saved_state so
++ * try_to_wakeup() does not get confused.
++ */
++ pi_lock(&self->pi_lock);
++ __set_current_state(self->saved_state);
++ self->saved_state = TASK_RUNNING;
++ pi_unlock(&self->pi_lock);
++
++ /*
++ * try_to_take_rt_mutex() sets the waiter bit
++ * unconditionally. We might have to fix that up:
++ */
++ fixup_rt_mutex_waiters(lock);
++
++ BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
++ BUG_ON(!plist_node_empty(&waiter.list_entry));
++
++ raw_spin_unlock(&lock->wait_lock);
++
++ debug_rt_mutex_free_waiter(&waiter);
++}
++
++/*
++ * Slow path to release a rt_mutex spin_lock style
++ */
++static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
++{
++ raw_spin_lock(&lock->wait_lock);
++
++ debug_rt_mutex_unlock(lock);
++
++ rt_mutex_deadlock_account_unlock(current);
++
++ if (!rt_mutex_has_waiters(lock)) {
++ lock->owner = NULL;
++ raw_spin_unlock(&lock->wait_lock);
++ return;
++ }
++
++ wakeup_next_waiter(lock);
++
++ raw_spin_unlock(&lock->wait_lock);
++
++ /* Undo pi boosting.when necessary */
++ rt_mutex_adjust_prio(current);
++}
++
++void __lockfunc rt_spin_lock(spinlock_t *lock)
++{
++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++}
++EXPORT_SYMBOL(rt_spin_lock);
++
++void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
++{
++ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
++}
++EXPORT_SYMBOL(__rt_spin_lock);
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
++{
++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++}
++EXPORT_SYMBOL(rt_spin_lock_nested);
++#endif
++
++void __lockfunc rt_spin_unlock(spinlock_t *lock)
++{
++ /* NOTE: we always pass in '1' for nested, for simplicity */
++ spin_release(&lock->dep_map, 1, _RET_IP_);
++ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
++}
++EXPORT_SYMBOL(rt_spin_unlock);
++
++void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
++{
++ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
++}
++EXPORT_SYMBOL(__rt_spin_unlock);
++
++/*
++ * Wait for the lock to get unlocked: instead of polling for an unlock
++ * (like raw spinlocks do), we lock and unlock, to force the kernel to
++ * schedule if there's contention:
++ */
++void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
++{
++ spin_lock(lock);
++ spin_unlock(lock);
++}
++EXPORT_SYMBOL(rt_spin_unlock_wait);
++
++int __lockfunc rt_spin_trylock(spinlock_t *lock)
++{
++ int ret;
++
++ migrate_disable();
++ ret = rt_mutex_trylock(&lock->lock);
++ if (ret)
++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++ else
++ migrate_enable();
++
++ return ret;
++}
++EXPORT_SYMBOL(rt_spin_trylock);
++
++int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
++{
++ int ret;
++
++ local_bh_disable();
++ ret = rt_mutex_trylock(&lock->lock);
++ if (ret) {
++ migrate_disable();
++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++ } else
++ local_bh_enable();
++ return ret;
++}
++EXPORT_SYMBOL(rt_spin_trylock_bh);
++
++int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
++{
++ int ret;
++
++ *flags = 0;
++ migrate_disable();
++ ret = rt_mutex_trylock(&lock->lock);
++ if (ret)
++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++ else
++ migrate_enable();
++ return ret;
++}
++EXPORT_SYMBOL(rt_spin_trylock_irqsave);
++
++int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
++{
++ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
++ if (atomic_add_unless(atomic, -1, 1))
++ return 0;
++ migrate_disable();
++ rt_spin_lock(lock);
++ if (atomic_dec_and_test(atomic))
++ return 1;
++ rt_spin_unlock(lock);
++ migrate_enable();
++ return 0;
++}
++EXPORT_SYMBOL(atomic_dec_and_spin_lock);
++
++void
++__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ /*
++ * Make sure we are not reinitializing a held lock:
++ */
++ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
++ lockdep_init_map(&lock->dep_map, name, key, 0);
++#endif
++}
++EXPORT_SYMBOL(__rt_spin_lock_init);
++
++#endif /* PREEMPT_RT_FULL */
++
+ /**
+ * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
+ * @lock: the rt_mutex to take
+ * @state: the state the task should block in (TASK_INTERRUPTIBLE
+- * or TASK_UNINTERRUPTIBLE)
++ * or TASK_UNINTERRUPTIBLE)
+ * @timeout: the pre-initialized and started timer, or NULL for none
+ * @waiter: the pre-initialized rt_mutex_waiter
+ *
+@@ -647,9 +994,10 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ struct rt_mutex_waiter waiter;
+ int ret = 0;
+
+- debug_rt_mutex_init_waiter(&waiter);
++ rt_mutex_init_waiter(&waiter, false);
+
+ raw_spin_lock(&lock->wait_lock);
++ init_lists(lock);
+
+ /* Try to acquire the lock again: */
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
+@@ -702,6 +1050,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lo
+ int ret = 0;
+
+ raw_spin_lock(&lock->wait_lock);
++ init_lists(lock);
+
+ if (likely(rt_mutex_owner(lock) != current)) {
+
+@@ -934,12 +1283,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
+ void __rt_mutex_init(struct rt_mutex *lock, const char *name)
+ {
+ lock->owner = NULL;
+- raw_spin_lock_init(&lock->wait_lock);
+ plist_head_init(&lock->wait_list);
+
+ debug_rt_mutex_init(lock, name);
+ }
+-EXPORT_SYMBOL_GPL(__rt_mutex_init);
++EXPORT_SYMBOL(__rt_mutex_init);
+
+ /**
+ * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
+@@ -954,7 +1302,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
+ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+ struct task_struct *proxy_owner)
+ {
+- __rt_mutex_init(lock, NULL);
++ rt_mutex_init(lock);
+ debug_rt_mutex_proxy_lock(lock, proxy_owner);
+ rt_mutex_set_owner(lock, proxy_owner);
+ rt_mutex_deadlock_account_lock(lock, proxy_owner);
+--- a/kernel/rtmutex_common.h
++++ b/kernel/rtmutex_common.h
+@@ -49,6 +49,7 @@ struct rt_mutex_waiter {
+ struct plist_node pi_list_entry;
+ struct task_struct *task;
+ struct rt_mutex *lock;
++ bool savestate;
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ unsigned long ip;
+ struct pid *deadlock_task_pid;
+@@ -126,4 +127,12 @@ extern int rt_mutex_finish_proxy_lock(st
+ # include "rtmutex.h"
+ #endif
+
++static inline void
++rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
++{
++ debug_rt_mutex_init_waiter(waiter);
++ waiter->task = NULL;
++ waiter->savestate = savestate;
++}
++
+ #endif
diff --git a/patches/rt-preempt-base-config.patch b/patches/rt-preempt-base-config.patch
new file mode 100644
index 0000000..3c6d7e1
--- /dev/null
+++ b/patches/rt-preempt-base-config.patch
@@ -0,0 +1,49 @@
+Subject: rt-preempt-base-config.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 17 Jun 2011 12:39:57 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/Kconfig.preempt | 19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -1,3 +1,10 @@
++config PREEMPT
++ bool
++ select PREEMPT_COUNT
++
++config PREEMPT_RT_BASE
++ bool
++ select PREEMPT
+
+ choice
+ prompt "Preemption Model"
+@@ -33,9 +40,9 @@ config PREEMPT_VOLUNTARY
+
+ Select this if you are building a kernel for a desktop system.
+
+-config PREEMPT
++config PREEMPT__LL
+ bool "Preemptible Kernel (Low-Latency Desktop)"
+- select PREEMPT_COUNT
++ select PREEMPT
+ select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
+ help
+ This option reduces the latency of the kernel by making
+@@ -52,6 +59,14 @@ config PREEMPT
+ embedded system with latency requirements in the milliseconds
+ range.
+
++config PREEMPT_RTB
++ bool "Preemptible Kernel (Basic RT)"
++ select PREEMPT_RT_BASE
++ help
++ This option is basically the same as (Low-Latency Desktop) but
++ enables changes which are preliminary for the full preemptiple
++ RT kernel.
++
+ endchoice
+
+ config PREEMPT_COUNT
diff --git a/patches/rt-rw-lockdep-annotations.patch b/patches/rt-rw-lockdep-annotations.patch
new file mode 100644
index 0000000..ba99b86
--- /dev/null
+++ b/patches/rt-rw-lockdep-annotations.patch
@@ -0,0 +1,133 @@
+Subject: rt: rwsem/rwlock: lockdep annotations
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 28 Sep 2012 10:49:42 +0100
+
+rwlocks and rwsems on RT do not allow multiple readers. Annotate the
+lockdep acquire functions accordingly.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ kernel/rt.c | 53 ++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 32 insertions(+), 21 deletions(-)
+
+--- a/kernel/rt.c
++++ b/kernel/rt.c
+@@ -216,15 +216,17 @@ int __lockfunc rt_read_trylock(rwlock_t
+ * write locked.
+ */
+ migrate_disable();
+- if (rt_mutex_owner(lock) != current)
++ if (rt_mutex_owner(lock) != current) {
+ ret = rt_mutex_trylock(lock);
+- else if (!rwlock->read_depth)
++ if (ret)
++ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
++ } else if (!rwlock->read_depth) {
+ ret = 0;
++ }
+
+- if (ret) {
++ if (ret)
+ rwlock->read_depth++;
+- rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
+- } else
++ else
+ migrate_enable();
+
+ return ret;
+@@ -242,13 +244,13 @@ void __lockfunc rt_read_lock(rwlock_t *r
+ {
+ struct rt_mutex *lock = &rwlock->lock;
+
+- rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
+-
+ /*
+ * recursive read locks succeed when current owns the lock
+ */
+- if (rt_mutex_owner(lock) != current)
++ if (rt_mutex_owner(lock) != current) {
++ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
+ __rt_spin_lock(lock);
++ }
+ rwlock->read_depth++;
+ }
+
+@@ -264,11 +266,11 @@ EXPORT_SYMBOL(rt_write_unlock);
+
+ void __lockfunc rt_read_unlock(rwlock_t *rwlock)
+ {
+- rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
+-
+ /* Release the lock only when read_depth is down to 0 */
+- if (--rwlock->read_depth == 0)
++ if (--rwlock->read_depth == 0) {
++ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
+ __rt_spin_unlock(&rwlock->lock);
++ }
+ }
+ EXPORT_SYMBOL(rt_read_unlock);
+
+@@ -315,9 +317,10 @@ EXPORT_SYMBOL(rt_up_write);
+
+ void rt_up_read(struct rw_semaphore *rwsem)
+ {
+- rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
+- if (--rwsem->read_depth == 0)
++ if (--rwsem->read_depth == 0) {
++ rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
+ rt_mutex_unlock(&rwsem->lock);
++ }
+ }
+ EXPORT_SYMBOL(rt_up_read);
+
+@@ -356,6 +359,13 @@ void rt_down_write_nested(struct rw_sem
+ }
+ EXPORT_SYMBOL(rt_down_write_nested);
+
++void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
++ struct lockdep_map *nest)
++{
++ rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
++ rt_mutex_lock(&rwsem->lock);
++}
++
+ int rt_down_read_trylock(struct rw_semaphore *rwsem)
+ {
+ struct rt_mutex *lock = &rwsem->lock;
+@@ -366,15 +376,16 @@ int rt_down_read_trylock(struct rw_sema
+ * but not when read_depth == 0 which means that the rwsem is
+ * write locked.
+ */
+- if (rt_mutex_owner(lock) != current)
++ if (rt_mutex_owner(lock) != current) {
+ ret = rt_mutex_trylock(&rwsem->lock);
+- else if (!rwsem->read_depth)
++ if (ret)
++ rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
++ } else if (!rwsem->read_depth) {
+ ret = 0;
++ }
+
+- if (ret) {
++ if (ret)
+ rwsem->read_depth++;
+- rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
+- }
+ return ret;
+ }
+ EXPORT_SYMBOL(rt_down_read_trylock);
+@@ -383,10 +394,10 @@ static void __rt_down_read(struct rw_sem
+ {
+ struct rt_mutex *lock = &rwsem->lock;
+
+- rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
+-
+- if (rt_mutex_owner(lock) != current)
++ if (rt_mutex_owner(lock) != current) {
++ rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
+ rt_mutex_lock(&rwsem->lock);
++ }
+ rwsem->read_depth++;
+ }
+
diff --git a/patches/rt-sched-do-not-compare-cpu-masks-in-scheduler.patch b/patches/rt-sched-do-not-compare-cpu-masks-in-scheduler.patch
new file mode 100644
index 0000000..b7c97df
--- /dev/null
+++ b/patches/rt-sched-do-not-compare-cpu-masks-in-scheduler.patch
@@ -0,0 +1,38 @@
+Subject: sched: Do not compare cpu masks in scheduler
+Date: Tue, 27 Sep 2011 08:40:24 -0400
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Clark Williams <williams@redhat.com>
+Link: http://lkml.kernel.org/r/20110927124423.128129033@goodmis.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/sched/core.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2876,16 +2876,12 @@ static inline void update_migrate_disabl
+ */
+ mask = tsk_cpus_allowed(p);
+
+- WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
++ if (p->sched_class->set_cpus_allowed)
++ p->sched_class->set_cpus_allowed(p, mask);
++ p->nr_cpus_allowed = cpumask_weight(mask);
+
+- if (!cpumask_equal(&p->cpus_allowed, mask)) {
+- if (p->sched_class->set_cpus_allowed)
+- p->sched_class->set_cpus_allowed(p, mask);
+- p->nr_cpus_allowed = cpumask_weight(mask);
+-
+- /* Let migrate_enable know to fix things back up */
+- p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
+- }
++ /* Let migrate_enable know to fix things back up */
++ p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
+ }
+
+ void migrate_disable(void)
diff --git a/patches/rt-sched-have-migrate_disable-ignore-bounded-threads.patch b/patches/rt-sched-have-migrate_disable-ignore-bounded-threads.patch
new file mode 100644
index 0000000..8a59012
--- /dev/null
+++ b/patches/rt-sched-have-migrate_disable-ignore-bounded-threads.patch
@@ -0,0 +1,68 @@
+Subject: sched: Have migrate_disable ignore bounded threads
+Date: Tue, 27 Sep 2011 08:40:25 -0400
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Clark Williams <williams@redhat.com>
+Link: http://lkml.kernel.org/r/20110927124423.567944215@goodmis.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/sched/core.c | 23 +++++++++--------------
+ 1 file changed, 9 insertions(+), 14 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2888,7 +2888,7 @@ void migrate_disable(void)
+ {
+ struct task_struct *p = current;
+
+- if (in_atomic()) {
++ if (in_atomic() || p->flags & PF_THREAD_BOUND) {
+ #ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic++;
+ #endif
+@@ -2919,7 +2919,7 @@ void migrate_enable(void)
+ unsigned long flags;
+ struct rq *rq;
+
+- if (in_atomic()) {
++ if (in_atomic() || p->flags & PF_THREAD_BOUND) {
+ #ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic--;
+ #endif
+@@ -2940,26 +2940,21 @@ void migrate_enable(void)
+
+ if (unlikely(migrate_disabled_updated(p))) {
+ /*
+- * See comment in update_migrate_disable() about locking.
++ * Undo whatever update_migrate_disable() did, also see there
++ * about locking.
+ */
+ rq = this_rq();
+ raw_spin_lock_irqsave(&rq->lock, flags);
+- mask = tsk_cpus_allowed(p);
++
+ /*
+ * Clearing migrate_disable causes tsk_cpus_allowed to
+ * show the tasks original cpu affinity.
+ */
+ p->migrate_disable = 0;
+-
+- WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
+-
+- if (unlikely(!cpumask_equal(&p->cpus_allowed, mask))) {
+- /* Get the mask now that migration is enabled */
+- mask = tsk_cpus_allowed(p);
+- if (p->sched_class->set_cpus_allowed)
+- p->sched_class->set_cpus_allowed(p, mask);
+- p->nr_cpus_allowed = cpumask_weight(mask);
+- }
++ mask = tsk_cpus_allowed(p);
++ if (p->sched_class->set_cpus_allowed)
++ p->sched_class->set_cpus_allowed(p, mask);
++ p->nr_cpus_allowed = cpumask_weight(mask);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ } else
+ p->migrate_disable = 0;
diff --git a/patches/rt-sched-postpone-actual-migration-disalbe-to-schedule.patch b/patches/rt-sched-postpone-actual-migration-disalbe-to-schedule.patch
new file mode 100644
index 0000000..7ccbdea
--- /dev/null
+++ b/patches/rt-sched-postpone-actual-migration-disalbe-to-schedule.patch
@@ -0,0 +1,305 @@
+Subject: sched: Postpone actual migration disalbe to schedule
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 27 Sep 2011 08:40:23 -0400
+
+The migrate_disable() can cause a bit of a overhead to the RT kernel,
+as changing the affinity is expensive to do at every lock encountered.
+As a running task can not migrate, the actual disabling of migration
+does not need to occur until the task is about to schedule out.
+
+In most cases, a task that disables migration will enable it before
+it schedules making this change improve performance tremendously.
+
+[ Frank Rowand: UP compile fix ]
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Clark Williams <williams@redhat.com>
+Link: http://lkml.kernel.org/r/20110927124422.779693167@goodmis.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/sched/core.c | 251 +++++++++++++++++++++++++++-------------------------
+ 1 file changed, 132 insertions(+), 119 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2848,6 +2848,135 @@ static inline void schedule_debug(struct
+ schedstat_inc(this_rq(), sched_count);
+ }
+
++#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
++#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */
++#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
++#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
++
++static inline void update_migrate_disable(struct task_struct *p)
++{
++ const struct cpumask *mask;
++
++ if (likely(!p->migrate_disable))
++ return;
++
++ /* Did we already update affinity? */
++ if (unlikely(migrate_disabled_updated(p)))
++ return;
++
++ /*
++ * Since this is always current we can get away with only locking
++ * rq->lock, the ->cpus_allowed value can normally only be changed
++ * while holding both p->pi_lock and rq->lock, but seeing that this
++ * is current, we cannot actually be waking up, so all code that
++ * relies on serialization against p->pi_lock is out of scope.
++ *
++ * Having rq->lock serializes us against things like
++ * set_cpus_allowed_ptr() that can still happen concurrently.
++ */
++ mask = tsk_cpus_allowed(p);
++
++ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
++
++ if (!cpumask_equal(&p->cpus_allowed, mask)) {
++ if (p->sched_class->set_cpus_allowed)
++ p->sched_class->set_cpus_allowed(p, mask);
++ p->nr_cpus_allowed = cpumask_weight(mask);
++
++ /* Let migrate_enable know to fix things back up */
++ p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
++ }
++}
++
++void migrate_disable(void)
++{
++ struct task_struct *p = current;
++
++ if (in_atomic()) {
++#ifdef CONFIG_SCHED_DEBUG
++ p->migrate_disable_atomic++;
++#endif
++ return;
++ }
++
++#ifdef CONFIG_SCHED_DEBUG
++ WARN_ON_ONCE(p->migrate_disable_atomic);
++#endif
++
++ preempt_disable();
++ if (p->migrate_disable) {
++ p->migrate_disable++;
++ preempt_enable();
++ return;
++ }
++
++ pin_current_cpu();
++ p->migrate_disable = 1;
++ preempt_enable();
++}
++EXPORT_SYMBOL(migrate_disable);
++
++void migrate_enable(void)
++{
++ struct task_struct *p = current;
++ const struct cpumask *mask;
++ unsigned long flags;
++ struct rq *rq;
++
++ if (in_atomic()) {
++#ifdef CONFIG_SCHED_DEBUG
++ p->migrate_disable_atomic--;
++#endif
++ return;
++ }
++
++#ifdef CONFIG_SCHED_DEBUG
++ WARN_ON_ONCE(p->migrate_disable_atomic);
++#endif
++ WARN_ON_ONCE(p->migrate_disable <= 0);
++
++ preempt_disable();
++ if (migrate_disable_count(p) > 1) {
++ p->migrate_disable--;
++ preempt_enable();
++ return;
++ }
++
++ if (unlikely(migrate_disabled_updated(p))) {
++ /*
++ * See comment in update_migrate_disable() about locking.
++ */
++ rq = this_rq();
++ raw_spin_lock_irqsave(&rq->lock, flags);
++ mask = tsk_cpus_allowed(p);
++ /*
++ * Clearing migrate_disable causes tsk_cpus_allowed to
++ * show the tasks original cpu affinity.
++ */
++ p->migrate_disable = 0;
++
++ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
++
++ if (unlikely(!cpumask_equal(&p->cpus_allowed, mask))) {
++ /* Get the mask now that migration is enabled */
++ mask = tsk_cpus_allowed(p);
++ if (p->sched_class->set_cpus_allowed)
++ p->sched_class->set_cpus_allowed(p, mask);
++ p->nr_cpus_allowed = cpumask_weight(mask);
++ }
++ raw_spin_unlock_irqrestore(&rq->lock, flags);
++ } else
++ p->migrate_disable = 0;
++
++ unpin_current_cpu();
++ preempt_enable();
++}
++EXPORT_SYMBOL(migrate_enable);
++#else
++static inline void update_migrate_disable(struct task_struct *p) { }
++#define migrate_disabled_updated(p) 0
++#endif
++
+ static void put_prev_task(struct rq *rq, struct task_struct *prev)
+ {
+ if (prev->on_rq || rq->skip_clock_update < 0)
+@@ -2941,6 +3070,8 @@ need_resched:
+
+ raw_spin_lock_irq(&rq->lock);
+
++ update_migrate_disable(prev);
++
+ switch_count = &prev->nivcsw;
+ if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
+ if (unlikely(signal_pending_state(prev->state, prev))) {
+@@ -4734,7 +4865,7 @@ void __cpuinit init_idle(struct task_str
+ #ifdef CONFIG_SMP
+ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ {
+- if (!__migrate_disabled(p)) {
++ if (!migrate_disabled_updated(p)) {
+ if (p->sched_class && p->sched_class->set_cpus_allowed)
+ p->sched_class->set_cpus_allowed(p, new_mask);
+ p->nr_cpus_allowed = cpumask_weight(new_mask);
+@@ -4809,124 +4940,6 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+
+-#ifdef CONFIG_PREEMPT_RT_FULL
+-void migrate_disable(void)
+-{
+- struct task_struct *p = current;
+- const struct cpumask *mask;
+- unsigned long flags;
+- struct rq *rq;
+-
+- if (in_atomic()) {
+-#ifdef CONFIG_SCHED_DEBUG
+- p->migrate_disable_atomic++;
+-#endif
+- return;
+- }
+-
+-#ifdef CONFIG_SCHED_DEBUG
+- WARN_ON_ONCE(p->migrate_disable_atomic);
+-#endif
+-
+- preempt_disable();
+- if (p->migrate_disable) {
+- p->migrate_disable++;
+- preempt_enable();
+- return;
+- }
+-
+- pin_current_cpu();
+- if (unlikely(!scheduler_running)) {
+- p->migrate_disable = 1;
+- preempt_enable();
+- return;
+- }
+-
+- /*
+- * Since this is always current we can get away with only locking
+- * rq->lock, the ->cpus_allowed value can normally only be changed
+- * while holding both p->pi_lock and rq->lock, but seeing that this
+- * it current, we cannot actually be waking up, so all code that
+- * relies on serialization against p->pi_lock is out of scope.
+- *
+- * Taking rq->lock serializes us against things like
+- * set_cpus_allowed_ptr() that can still happen concurrently.
+- */
+- rq = this_rq();
+- raw_spin_lock_irqsave(&rq->lock, flags);
+- p->migrate_disable = 1;
+- mask = tsk_cpus_allowed(p);
+-
+- WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
+-
+- if (!cpumask_equal(&p->cpus_allowed, mask)) {
+- if (p->sched_class->set_cpus_allowed)
+- p->sched_class->set_cpus_allowed(p, mask);
+- p->nr_cpus_allowed = cpumask_weight(mask);
+- }
+- raw_spin_unlock_irqrestore(&rq->lock, flags);
+- preempt_enable();
+-}
+-EXPORT_SYMBOL(migrate_disable);
+-
+-void migrate_enable(void)
+-{
+- struct task_struct *p = current;
+- const struct cpumask *mask;
+- unsigned long flags;
+- struct rq *rq;
+-
+- if (in_atomic()) {
+-#ifdef CONFIG_SCHED_DEBUG
+- p->migrate_disable_atomic--;
+-#endif
+- return;
+- }
+-
+-#ifdef CONFIG_SCHED_DEBUG
+- WARN_ON_ONCE(p->migrate_disable_atomic);
+-#endif
+- WARN_ON_ONCE(p->migrate_disable <= 0);
+-
+- preempt_disable();
+- if (p->migrate_disable > 1) {
+- p->migrate_disable--;
+- preempt_enable();
+- return;
+- }
+-
+- if (unlikely(!scheduler_running)) {
+- p->migrate_disable = 0;
+- unpin_current_cpu();
+- preempt_enable();
+- return;
+- }
+-
+- /*
+- * See comment in migrate_disable().
+- */
+- rq = this_rq();
+- raw_spin_lock_irqsave(&rq->lock, flags);
+- mask = tsk_cpus_allowed(p);
+- p->migrate_disable = 0;
+-
+- WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
+-
+- if (!cpumask_equal(&p->cpus_allowed, mask)) {
+- /* Get the mask now that migration is enabled */
+- mask = tsk_cpus_allowed(p);
+- if (p->sched_class->set_cpus_allowed)
+- p->sched_class->set_cpus_allowed(p, mask);
+- p->nr_cpus_allowed = cpumask_weight(mask);
+- }
+-
+- raw_spin_unlock_irqrestore(&rq->lock, flags);
+- unpin_current_cpu();
+- preempt_enable();
+-}
+-EXPORT_SYMBOL(migrate_enable);
+-#endif /* CONFIG_PREEMPT_RT_FULL */
+-
+ /*
+ * Move (not current) task off this cpu, onto dest cpu. We're doing
+ * this because either it can't run here any more (set_cpus_allowed()
diff --git a/patches/rt-serial-warn-fix.patch b/patches/rt-serial-warn-fix.patch
new file mode 100644
index 0000000..ac091b2
--- /dev/null
+++ b/patches/rt-serial-warn-fix.patch
@@ -0,0 +1,37 @@
+Subject: rt: Improve the serial console PASS_LIMIT
+From: Ingo Molnar <mingo@elte.hu>
+Date: Wed Dec 14 13:05:54 CET 2011
+
+Beyond the warning:
+
+ drivers/tty/serial/8250/8250.c:1613:6: warning: unused variable ‘pass_counter’ [-Wunused-variable]
+
+the solution of just looping infinitely was ugly - up it to 1 million to
+give it a chance to continue in some really ugly situation.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/tty/serial/8250/8250.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/tty/serial/8250/8250.c
++++ b/drivers/tty/serial/8250/8250.c
+@@ -80,7 +80,16 @@ static unsigned int skip_txen_test; /* f
+ #define DEBUG_INTR(fmt...) do { } while (0)
+ #endif
+
+-#define PASS_LIMIT 512
++/*
++ * On -rt we can have a more delays, and legitimately
++ * so - so don't drop work spuriously and spam the
++ * syslog:
++ */
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define PASS_LIMIT 1000000
++#else
++# define PASS_LIMIT 512
++#endif
+
+ #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
+
diff --git a/patches/rt-tracing-show-padding-as-unsigned-short.patch b/patches/rt-tracing-show-padding-as-unsigned-short.patch
new file mode 100644
index 0000000..c27cc66
--- /dev/null
+++ b/patches/rt-tracing-show-padding-as-unsigned-short.patch
@@ -0,0 +1,46 @@
+Subject: tracing: Show padding as unsigned short
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Wed, 16 Nov 2011 13:19:35 -0500
+
+RT added two bytes to trace migrate disable counting to the trace events
+and used two bytes of the padding to make the change. The structures and
+all were updated correctly, but the display in the event formats was
+not:
+
+cat /debug/tracing/events/sched/sched_switch/format
+
+name: sched_switch
+ID: 51
+format:
+ field:unsigned short common_type; offset:0; size:2; signed:0;
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
+ field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
+ field:int common_pid; offset:4; size:4; signed:1;
+ field:unsigned short common_migrate_disable; offset:8; size:2; signed:0;
+ field:int common_padding; offset:10; size:2; signed:0;
+
+
+The field for common_padding has the correct size and offset, but the
+use of "int" might confuse some parsers (and people that are reading
+it). This needs to be changed to "unsigned short".
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Link: http://lkml.kernel.org/r/1321467575.4181.36.camel@frodo
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/trace/trace_events.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -117,7 +117,7 @@ static int trace_define_common_fields(vo
+ __common_field(unsigned char, preempt_count);
+ __common_field(int, pid);
+ __common_field(unsigned short, migrate_disable);
+- __common_field(int, padding);
++ __common_field(unsigned short, padding);
+
+ return ret;
+ }
diff --git a/patches/rtmutex-avoid-include-hell.patch b/patches/rtmutex-avoid-include-hell.patch
new file mode 100644
index 0000000..f0bbc60
--- /dev/null
+++ b/patches/rtmutex-avoid-include-hell.patch
@@ -0,0 +1,20 @@
+Subject: rtmutex-avoid-include-hell.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 20:06:39 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rtmutex.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -14,7 +14,7 @@
+
+ #include <linux/linkage.h>
+ #include <linux/plist.h>
+-#include <linux/spinlock_types.h>
++#include <linux/spinlock_types_raw.h>
+
+ extern int max_lock_depth; /* for sysctl */
+
diff --git a/patches/rtmutex-futex-prepare-rt.patch b/patches/rtmutex-futex-prepare-rt.patch
new file mode 100644
index 0000000..d474b54
--- /dev/null
+++ b/patches/rtmutex-futex-prepare-rt.patch
@@ -0,0 +1,215 @@
+Subject: rtmutex-futex-prepare-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 10 Jun 2011 11:04:15 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/futex.c | 77 ++++++++++++++++++++++++++++++++++++++----------
+ kernel/rtmutex.c | 31 ++++++++++++++++---
+ kernel/rtmutex_common.h | 2 +
+ 3 files changed, 91 insertions(+), 19 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1442,6 +1442,16 @@ retry_private:
+ requeue_pi_wake_futex(this, &key2, hb2);
+ drop_count++;
+ continue;
++ } else if (ret == -EAGAIN) {
++ /*
++ * Waiter was woken by timeout or
++ * signal and has set pi_blocked_on to
++ * PI_WAKEUP_INPROGRESS before we
++ * tried to enqueue it on the rtmutex.
++ */
++ this->pi_state = NULL;
++ free_pi_state(pi_state);
++ continue;
+ } else if (ret) {
+ /* -EDEADLK */
+ this->pi_state = NULL;
+@@ -2286,7 +2296,7 @@ static int futex_wait_requeue_pi(u32 __u
+ struct hrtimer_sleeper timeout, *to = NULL;
+ struct rt_mutex_waiter rt_waiter;
+ struct rt_mutex *pi_mutex = NULL;
+- struct futex_hash_bucket *hb;
++ struct futex_hash_bucket *hb, *hb2;
+ union futex_key key2 = FUTEX_KEY_INIT;
+ struct futex_q q = futex_q_init;
+ int res, ret;
+@@ -2333,20 +2343,55 @@ static int futex_wait_requeue_pi(u32 __u
+ /* Queue the futex_q, drop the hb lock, wait for wakeup. */
+ futex_wait_queue_me(hb, &q, to);
+
+- spin_lock(&hb->lock);
+- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+- spin_unlock(&hb->lock);
+- if (ret)
+- goto out_put_keys;
++ /*
++ * On RT we must avoid races with requeue and trying to block
++ * on two mutexes (hb->lock and uaddr2's rtmutex) by
++ * serializing access to pi_blocked_on with pi_lock.
++ */
++ raw_spin_lock_irq(&current->pi_lock);
++ if (current->pi_blocked_on) {
++ /*
++ * We have been requeued or are in the process of
++ * being requeued.
++ */
++ raw_spin_unlock_irq(&current->pi_lock);
++ } else {
++ /*
++ * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
++ * prevents a concurrent requeue from moving us to the
++ * uaddr2 rtmutex. After that we can safely acquire
++ * (and possibly block on) hb->lock.
++ */
++ current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
++ raw_spin_unlock_irq(&current->pi_lock);
++
++ spin_lock(&hb->lock);
++
++ /*
++ * Clean up pi_blocked_on. We might leak it otherwise
++ * when we succeeded with the hb->lock in the fast
++ * path.
++ */
++ raw_spin_lock_irq(&current->pi_lock);
++ current->pi_blocked_on = NULL;
++ raw_spin_unlock_irq(&current->pi_lock);
++
++ ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
++ spin_unlock(&hb->lock);
++ if (ret)
++ goto out_put_keys;
++ }
+
+ /*
+- * In order for us to be here, we know our q.key == key2, and since
+- * we took the hb->lock above, we also know that futex_requeue() has
+- * completed and we no longer have to concern ourselves with a wakeup
+- * race with the atomic proxy lock acquisition by the requeue code. The
+- * futex_requeue dropped our key1 reference and incremented our key2
+- * reference count.
++ * In order to be here, we have either been requeued, are in
++ * the process of being requeued, or requeue successfully
++ * acquired uaddr2 on our behalf. If pi_blocked_on was
++ * non-null above, we may be racing with a requeue. Do not
++ * rely on q->lock_ptr to be hb2->lock until after blocking on
++ * hb->lock or hb2->lock. The futex_requeue dropped our key1
++ * reference and incremented our key2 reference count.
+ */
++ hb2 = hash_futex(&key2);
+
+ /* Check if the requeue code acquired the second futex for us. */
+ if (!q.rt_waiter) {
+@@ -2355,9 +2400,10 @@ static int futex_wait_requeue_pi(u32 __u
+ * did a lock-steal - fix up the PI-state in that case.
+ */
+ if (q.pi_state && (q.pi_state->owner != current)) {
+- spin_lock(q.lock_ptr);
++ spin_lock(&hb2->lock);
++ BUG_ON(&hb2->lock != q.lock_ptr);
+ ret = fixup_pi_state_owner(uaddr2, &q, current);
+- spin_unlock(q.lock_ptr);
++ spin_unlock(&hb2->lock);
+ }
+ } else {
+ /*
+@@ -2370,7 +2416,8 @@ static int futex_wait_requeue_pi(u32 __u
+ ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+ debug_rt_mutex_free_waiter(&rt_waiter);
+
+- spin_lock(q.lock_ptr);
++ spin_lock(&hb2->lock);
++ BUG_ON(&hb2->lock != q.lock_ptr);
+ /*
+ * Fixup the pi_state owner and possibly acquire the lock if we
+ * haven't already.
+--- a/kernel/rtmutex.c
++++ b/kernel/rtmutex.c
+@@ -67,6 +67,11 @@ static void fixup_rt_mutex_waiters(struc
+ clear_rt_mutex_waiters(lock);
+ }
+
++static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
++{
++ return waiter && waiter != PI_WAKEUP_INPROGRESS;
++}
++
+ /*
+ * We can speed up the acquire/release, if the architecture
+ * supports cmpxchg and if there's no debugging state to be set up
+@@ -196,7 +201,7 @@ static int rt_mutex_adjust_prio_chain(st
+ * reached or the state of the chain has changed while we
+ * dropped the locks.
+ */
+- if (!waiter)
++ if (!rt_mutex_real_waiter(waiter))
+ goto out_unlock_pi;
+
+ /*
+@@ -399,6 +404,23 @@ static int task_blocks_on_rt_mutex(struc
+ int chain_walk = 0, res;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
++
++ /*
++ * In the case of futex requeue PI, this will be a proxy
++ * lock. The task will wake unaware that it is enqueueed on
++ * this lock. Avoid blocking on two locks and corrupting
++ * pi_blocked_on via the PI_WAKEUP_INPROGRESS
++ * flag. futex_wait_requeue_pi() sets this when it wakes up
++ * before requeue (due to a signal or timeout). Do not enqueue
++ * the task if PI_WAKEUP_INPROGRESS is set.
++ */
++ if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
++ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++ return -EAGAIN;
++ }
++
++ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
++
+ __rt_mutex_adjust_prio(task);
+ waiter->task = task;
+ waiter->lock = lock;
+@@ -423,7 +445,7 @@ static int task_blocks_on_rt_mutex(struc
+ plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
+
+ __rt_mutex_adjust_prio(owner);
+- if (owner->pi_blocked_on)
++ if (rt_mutex_real_waiter(owner->pi_blocked_on))
+ chain_walk = 1;
+ raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+ }
+@@ -517,7 +539,7 @@ static void remove_waiter(struct rt_mute
+ }
+ __rt_mutex_adjust_prio(owner);
+
+- if (owner->pi_blocked_on)
++ if (rt_mutex_real_waiter(owner->pi_blocked_on))
+ chain_walk = 1;
+
+ raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+@@ -551,7 +573,8 @@ void rt_mutex_adjust_pi(struct task_stru
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ waiter = task->pi_blocked_on;
+- if (!waiter || waiter->list_entry.prio == task->prio) {
++ if (!rt_mutex_real_waiter(waiter) ||
++ waiter->list_entry.prio == task->prio) {
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return;
+ }
+--- a/kernel/rtmutex_common.h
++++ b/kernel/rtmutex_common.h
+@@ -103,6 +103,8 @@ static inline struct task_struct *rt_mut
+ /*
+ * PI-futex support (proxy locking functions, etc.):
+ */
++#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
++
+ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+ struct task_struct *proxy_owner);
diff --git a/patches/rtmutex-lock-killable.patch b/patches/rtmutex-lock-killable.patch
new file mode 100644
index 0000000..d068299
--- /dev/null
+++ b/patches/rtmutex-lock-killable.patch
@@ -0,0 +1,80 @@
+Subject: rtmutex-lock-killable.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 09 Jun 2011 11:43:52 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rtmutex.h | 1 +
+ kernel/rtmutex.c | 33 +++++++++++++++++++++++++++------
+ 2 files changed, 28 insertions(+), 6 deletions(-)
+
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -90,6 +90,7 @@ extern void rt_mutex_destroy(struct rt_m
+ extern void rt_mutex_lock(struct rt_mutex *lock);
+ extern int rt_mutex_lock_interruptible(struct rt_mutex *lock,
+ int detect_deadlock);
++extern int rt_mutex_lock_killable(struct rt_mutex *lock, int detect_deadlock);
+ extern int rt_mutex_timed_lock(struct rt_mutex *lock,
+ struct hrtimer_sleeper *timeout,
+ int detect_deadlock);
+--- a/kernel/rtmutex.c
++++ b/kernel/rtmutex.c
+@@ -815,12 +815,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
+ /**
+ * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
+ *
+- * @lock: the rt_mutex to be locked
++ * @lock: the rt_mutex to be locked
+ * @detect_deadlock: deadlock detection on/off
+ *
+ * Returns:
+- * 0 on success
+- * -EINTR when interrupted by a signal
++ * 0 on success
++ * -EINTR when interrupted by a signal
+ * -EDEADLK when the lock would deadlock (when deadlock detection is on)
+ */
+ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
+@@ -834,17 +834,38 @@ int __sched rt_mutex_lock_interruptible(
+ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
+
+ /**
++ * rt_mutex_lock_killable - lock a rt_mutex killable
++ *
++ * @lock: the rt_mutex to be locked
++ * @detect_deadlock: deadlock detection on/off
++ *
++ * Returns:
++ * 0 on success
++ * -EINTR when interrupted by a signal
++ * -EDEADLK when the lock would deadlock (when deadlock detection is on)
++ */
++int __sched rt_mutex_lock_killable(struct rt_mutex *lock,
++ int detect_deadlock)
++{
++ might_sleep();
++
++ return rt_mutex_fastlock(lock, TASK_KILLABLE,
++ detect_deadlock, rt_mutex_slowlock);
++}
++EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
++
++/**
+ * rt_mutex_timed_lock - lock a rt_mutex interruptible
+ * the timeout structure is provided
+ * by the caller
+ *
+- * @lock: the rt_mutex to be locked
++ * @lock: the rt_mutex to be locked
+ * @timeout: timeout structure or NULL (no timeout)
+ * @detect_deadlock: deadlock detection on/off
+ *
+ * Returns:
+- * 0 on success
+- * -EINTR when interrupted by a signal
++ * 0 on success
++ * -EINTR when interrupted by a signal
+ * -ETIMEDOUT when the timeout expired
+ * -EDEADLK when the lock would deadlock (when deadlock detection is on)
+ */
diff --git a/patches/rwsem-add-rt-variant.patch b/patches/rwsem-add-rt-variant.patch
new file mode 100644
index 0000000..04e16dc
--- /dev/null
+++ b/patches/rwsem-add-rt-variant.patch
@@ -0,0 +1,170 @@
+Subject: rwsem-add-rt-variant.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 21:02:53 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rwsem.h | 6 ++
+ include/linux/rwsem_rt.h | 121 +++++++++++++++++++++++++++++++++++++++++++++++
+ lib/Makefile | 3 +
+ 3 files changed, 130 insertions(+)
+
+--- a/include/linux/rwsem.h
++++ b/include/linux/rwsem.h
+@@ -16,6 +16,10 @@
+
+ #include <linux/atomic.h>
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++#include <linux/rwsem_rt.h>
++#else /* PREEMPT_RT_FULL */
++
+ struct rw_semaphore;
+
+ #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+@@ -139,4 +143,6 @@ do { \
+ # define down_write_nested(sem, subclass) down_write(sem)
+ #endif
+
++#endif /* !PREEMPT_RT_FULL */
++
+ #endif /* _LINUX_RWSEM_H */
+--- /dev/null
++++ b/include/linux/rwsem_rt.h
+@@ -0,0 +1,121 @@
++#ifndef _LINUX_RWSEM_RT_H
++#define _LINUX_RWSEM_RT_H
++
++#ifndef _LINUX_RWSEM_H
++#error "Include rwsem.h"
++#endif
++
++/*
++ * RW-semaphores are a spinlock plus a reader-depth count.
++ *
++ * Note that the semantics are different from the usual
++ * Linux rw-sems, in PREEMPT_RT mode we do not allow
++ * multiple readers to hold the lock at once, we only allow
++ * a read-lock owner to read-lock recursively. This is
++ * better for latency, makes the implementation inherently
++ * fair and makes it simpler as well.
++ */
++
++#include <linux/rtmutex.h>
++
++struct rw_semaphore {
++ struct rt_mutex lock;
++ int read_depth;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++};
++
++#define __RWSEM_INITIALIZER(name) \
++ { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
++ RW_DEP_MAP_INIT(name) }
++
++#define DECLARE_RWSEM(lockname) \
++ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
++
++extern void __rt_rwsem_init(struct rw_semaphore *rwsem, char *name,
++ struct lock_class_key *key);
++
++# define rt_init_rwsem(sem) \
++do { \
++ static struct lock_class_key __key; \
++ \
++ rt_mutex_init(&(sem)->lock); \
++ __rt_rwsem_init((sem), #sem, &__key); \
++} while (0)
++
++extern void rt_down_write(struct rw_semaphore *rwsem);
++extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
++extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
++extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
++ struct lockdep_map *nest);
++extern void rt_down_read(struct rw_semaphore *rwsem);
++extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
++extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
++extern void rt_up_read(struct rw_semaphore *rwsem);
++extern void rt_up_write(struct rw_semaphore *rwsem);
++extern void rt_downgrade_write(struct rw_semaphore *rwsem);
++
++#define init_rwsem(sem) rt_init_rwsem(sem)
++#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
++
++static inline void down_read(struct rw_semaphore *sem)
++{
++ rt_down_read(sem);
++}
++
++static inline int down_read_trylock(struct rw_semaphore *sem)
++{
++ return rt_down_read_trylock(sem);
++}
++
++static inline void down_write(struct rw_semaphore *sem)
++{
++ rt_down_write(sem);
++}
++
++static inline int down_write_trylock(struct rw_semaphore *sem)
++{
++ return rt_down_write_trylock(sem);
++}
++
++static inline void up_read(struct rw_semaphore *sem)
++{
++ rt_up_read(sem);
++}
++
++static inline void up_write(struct rw_semaphore *sem)
++{
++ rt_up_write(sem);
++}
++
++static inline void downgrade_write(struct rw_semaphore *sem)
++{
++ rt_downgrade_write(sem);
++}
++
++static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
++{
++ return rt_down_read_nested(sem, subclass);
++}
++
++static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
++{
++ rt_down_write_nested(sem, subclass);
++}
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++static inline void down_write_nest_lock(struct rw_semaphore *sem,
++ struct rw_semaphore *nest_lock)
++{
++ rt_down_write_nested_lock(sem, &nest_lock->dep_map);
++}
++
++#else
++
++static inline void down_write_nest_lock(struct rw_semaphore *sem,
++ struct rw_semaphore *nest_lock)
++{
++ rt_down_write_nested_lock(sem, NULL);
++}
++#endif
++#endif
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -38,9 +38,12 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o
+ obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
+ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
+ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
++
++ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
+ lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+ lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
++endif
+
+ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
+ obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
diff --git a/patches/sched-adjust-reset-on-fork-always.patch b/patches/sched-adjust-reset-on-fork-always.patch
new file mode 100644
index 0000000..8b7d6d5
--- /dev/null
+++ b/patches/sched-adjust-reset-on-fork-always.patch
@@ -0,0 +1,31 @@
+Subject: sched: Adjust sched_reset_on_fork when nothing else changes
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 20 Dec 2012 14:58:00 +0100
+
+If the policy and priority remain unchanged a possible modification of
+sched_reset_on_fork gets lost in the early exit path.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Cc: stable-rt@vger.kernel.org
+---
+ kernel/sched/core.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4117,10 +4117,13 @@ recheck:
+ }
+
+ /*
+- * If not changing anything there's no need to proceed further:
++ * If not changing anything there's no need to proceed
++ * further, but store a possible modification of
++ * reset_on_fork.
+ */
+ if (unlikely(policy == p->policy && (!rt_policy(policy) ||
+ param->sched_priority == p->rt_priority))) {
++ p->sched_reset_on_fork = reset_on_fork;
+ task_rq_unlock(rq, p, &flags);
+ return 0;
+ }
diff --git a/patches/sched-better-debug-output-for-might-sleep.patch b/patches/sched-better-debug-output-for-might-sleep.patch
new file mode 100644
index 0000000..d563e26
--- /dev/null
+++ b/patches/sched-better-debug-output-for-might-sleep.patch
@@ -0,0 +1,71 @@
+Subject: sched: Better debug output for might sleep
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 05 Oct 2012 08:56:15 +0100
+
+might sleep can tell us where interrupts have been disabled, but we
+have no idea what disabled preemption. Add some debug infrastructure.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h | 3 +++
+ kernel/sched/core.c | 23 +++++++++++++++++++++--
+ 2 files changed, 24 insertions(+), 2 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1642,6 +1642,9 @@ struct task_struct {
+ pte_t kmap_pte[KM_TYPE_NR];
+ # endif
+ #endif
++#ifdef CONFIG_DEBUG_PREEMPT
++ unsigned long preempt_disable_ip;
++#endif
+ };
+
+ #ifdef CONFIG_NUMA_BALANCING
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2789,8 +2789,13 @@ void __kprobes add_preempt_count(int val
+ DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
+ PREEMPT_MASK - 10);
+ #endif
+- if (preempt_count() == val)
+- trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
++ if (preempt_count() == val) {
++ unsigned long ip = get_parent_ip(CALLER_ADDR1);
++#ifdef CONFIG_DEBUG_PREEMPT
++ current->preempt_disable_ip = ip;
++#endif
++ trace_preempt_off(CALLER_ADDR0, ip);
++ }
+ }
+ EXPORT_SYMBOL(add_preempt_count);
+
+@@ -2833,6 +2838,13 @@ static noinline void __schedule_bug(stru
+ print_modules();
+ if (irqs_disabled())
+ print_irqtrace_events(prev);
++#ifdef DEBUG_PREEMPT
++ if (in_atomic_preempt_off()) {
++ pr_err("Preemption disabled at:");
++ print_ip_sym(current->preempt_disable_ip);
++ pr_cont("\n");
++ }
++#endif
+ dump_stack();
+ add_taint(TAINT_WARN);
+ }
+@@ -7308,6 +7320,13 @@ void __might_sleep(const char *file, int
+ debug_show_held_locks(current);
+ if (irqs_disabled())
+ print_irqtrace_events(current);
++#ifdef DEBUG_PREEMPT
++ if (!preempt_count_equals(preempt_offset)) {
++ pr_err("Preemption disabled at:");
++ print_ip_sym(current->preempt_disable_ip);
++ pr_cont("\n");
++ }
++#endif
+ dump_stack();
+ }
+ EXPORT_SYMBOL(__might_sleep);
diff --git a/patches/sched-clear-pf-thread-bound-on-fallback-rq.patch b/patches/sched-clear-pf-thread-bound-on-fallback-rq.patch
new file mode 100644
index 0000000..ebcd586
--- /dev/null
+++ b/patches/sched-clear-pf-thread-bound-on-fallback-rq.patch
@@ -0,0 +1,24 @@
+Subject: sched-clear-pf-thread-bound-on-fallback-rq.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 04 Nov 2011 20:48:36 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1192,6 +1192,12 @@ out:
+ }
+ }
+
++ /*
++ * Clear PF_THREAD_BOUND, otherwise we wreckage
++ * migrate_disable/enable. See optimization for
++ * PF_THREAD_BOUND tasks there.
++ */
++ p->flags &= ~PF_THREAD_BOUND;
+ return dest_cpu;
+ }
+
diff --git a/patches/sched-cond-resched.patch b/patches/sched-cond-resched.patch
new file mode 100644
index 0000000..fb436a6
--- /dev/null
+++ b/patches/sched-cond-resched.patch
@@ -0,0 +1,32 @@
+Subject: sched-cond-resched.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 07 Jun 2011 11:25:03 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4315,9 +4315,17 @@ static inline int should_resched(void)
+
+ static void __cond_resched(void)
+ {
+- add_preempt_count(PREEMPT_ACTIVE);
+- __schedule();
+- sub_preempt_count(PREEMPT_ACTIVE);
++ do {
++ add_preempt_count(PREEMPT_ACTIVE);
++ __schedule();
++ sub_preempt_count(PREEMPT_ACTIVE);
++ /*
++ * Check again in case we missed a preemption
++ * opportunity between schedule and now.
++ */
++ barrier();
++
++ } while (need_resched());
+ }
+
+ int __sched _cond_resched(void)
diff --git a/patches/sched-consider-pi-boosting-in-setscheduler.patch b/patches/sched-consider-pi-boosting-in-setscheduler.patch
new file mode 100644
index 0000000..064cc44
--- /dev/null
+++ b/patches/sched-consider-pi-boosting-in-setscheduler.patch
@@ -0,0 +1,161 @@
+Subject: sched: Consider pi boosting in setscheduler
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 20 Dec 2012 15:13:49 +0100
+
+If a PI boosted task policy/priority is modified by a setscheduler()
+call we unconditionally dequeue and requeue the task if it is on the
+runqueue even if the new priority is lower than the current effective
+boosted priority. This can result in undesired reordering of the
+priority bucket list.
+
+If the new priority is less or equal than the current effective we
+just store the new parameters in the task struct and leave the
+scheduler class and the runqueue untouched. This is handled when the
+task deboosts itself. Only if the new priority is higher than the
+effective boosted priority we apply the change immediately.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Cc: stable-rt@vger.kernel.org
+---
+ include/linux/sched.h | 5 +++++
+ kernel/rtmutex.c | 12 ++++++++++++
+ kernel/sched/core.c | 40 +++++++++++++++++++++++++++++++---------
+ 3 files changed, 48 insertions(+), 9 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2175,6 +2175,7 @@ extern unsigned int sysctl_sched_cfs_ban
+ #ifdef CONFIG_RT_MUTEXES
+ extern int rt_mutex_getprio(struct task_struct *p);
+ extern void rt_mutex_setprio(struct task_struct *p, int prio);
++extern int rt_mutex_check_prio(struct task_struct *task, int newprio);
+ extern void rt_mutex_adjust_pi(struct task_struct *p);
+ static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+ {
+@@ -2185,6 +2186,10 @@ static inline int rt_mutex_getprio(struc
+ {
+ return p->normal_prio;
+ }
++static inline int rt_mutex_check_prio(struct task_struct *task, int newprio)
++{
++ return 0;
++}
+ # define rt_mutex_adjust_pi(p) do { } while (0)
+ static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+ {
+--- a/kernel/rtmutex.c
++++ b/kernel/rtmutex.c
+@@ -124,6 +124,18 @@ int rt_mutex_getprio(struct task_struct
+ }
+
+ /*
++ * Called by sched_setscheduler() to check whether the priority change
++ * is overruled by a possible priority boosting.
++ */
++int rt_mutex_check_prio(struct task_struct *task, int newprio)
++{
++ if (!task_has_pi_waiters(task))
++ return 0;
++
++ return task_top_pi_waiter(task)->pi_list_entry.prio <= newprio;
++}
++
++/*
+ * Adjust the priority of a task, after its pi_waiters got modified.
+ *
+ * This can be both boosting and unboosting. task->pi_lock must be held.
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3764,7 +3764,8 @@ EXPORT_SYMBOL(sleep_on_timeout);
+ * This function changes the 'effective' priority of a task. It does
+ * not touch ->normal_prio like __setscheduler().
+ *
+- * Used by the rt_mutex code to implement priority inheritance logic.
++ * Used by the rt_mutex code to implement priority inheritance
++ * logic. Call site only calls if the priority of the task changed.
+ */
+ void rt_mutex_setprio(struct task_struct *p, int prio)
+ {
+@@ -3987,20 +3988,25 @@ static struct task_struct *find_process_
+ return pid ? find_task_by_vpid(pid) : current;
+ }
+
+-/* Actually do priority change: must hold rq lock. */
+-static void
+-__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
++static void __setscheduler_params(struct task_struct *p, int policy, int prio)
+ {
+ p->policy = policy;
+ p->rt_priority = prio;
+ p->normal_prio = normal_prio(p);
++ set_load_weight(p);
++}
++
++/* Actually do priority change: must hold rq lock. */
++static void
++__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
++{
++ __setscheduler_params(p, policy, prio);
+ /* we are holding p->pi_lock already */
+ p->prio = rt_mutex_getprio(p);
+ if (rt_prio(p->prio))
+ p->sched_class = &rt_sched_class;
+ else
+ p->sched_class = &fair_sched_class;
+- set_load_weight(p);
+ }
+
+ /*
+@@ -4022,6 +4028,7 @@ static bool check_same_owner(struct task
+ static int __sched_setscheduler(struct task_struct *p, int policy,
+ const struct sched_param *param, bool user)
+ {
++ int newprio = MAX_RT_PRIO - 1 - param->sched_priority;
+ int retval, oldprio, oldpolicy = -1, on_rq, running;
+ unsigned long flags;
+ const struct sched_class *prev_class;
+@@ -4149,6 +4156,25 @@ recheck:
+ task_rq_unlock(rq, p, &flags);
+ goto recheck;
+ }
++
++ p->sched_reset_on_fork = reset_on_fork;
++ oldprio = p->prio;
++
++ /*
++ * Special case for priority boosted tasks.
++ *
++ * If the new priority is lower or equal (user space view)
++ * than the current (boosted) priority, we just store the new
++ * normal parameters and do not touch the scheduler class and
++ * the runqueue. This will be done when the task deboost
++ * itself.
++ */
++ if (rt_mutex_check_prio(p, newprio)) {
++ __setscheduler_params(p, policy, param->sched_priority);
++ task_rq_unlock(rq, p, &flags);
++ return 0;
++ }
++
+ on_rq = p->on_rq;
+ running = task_current(rq, p);
+ if (on_rq)
+@@ -4156,9 +4182,6 @@ recheck:
+ if (running)
+ p->sched_class->put_prev_task(rq, p);
+
+- p->sched_reset_on_fork = reset_on_fork;
+-
+- oldprio = p->prio;
+ prev_class = p->sched_class;
+ __setscheduler(rq, p, policy, param->sched_priority);
+
+@@ -4171,7 +4194,6 @@ recheck:
+ */
+ enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
+ }
+-
+ check_class_changed(rq, p, prev_class, oldprio);
+ task_rq_unlock(rq, p, &flags);
+
diff --git a/patches/sched-delay-put-task.patch b/patches/sched-delay-put-task.patch
new file mode 100644
index 0000000..38ffa56
--- /dev/null
+++ b/patches/sched-delay-put-task.patch
@@ -0,0 +1,78 @@
+Subject: sched-delay-put-task.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 31 May 2011 16:59:16 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h | 13 +++++++++++++
+ kernel/fork.c | 15 ++++++++++++++-
+ 2 files changed, 27 insertions(+), 1 deletion(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1623,6 +1623,9 @@ struct task_struct {
+ #ifdef CONFIG_UPROBES
+ struct uprobe_task *utask;
+ #endif
++#ifdef CONFIG_PREEMPT_RT_BASE
++ struct rcu_head put_rcu;
++#endif
+ };
+
+ /* Future-safe accessor for struct task_struct's cpus_allowed. */
+@@ -1813,6 +1816,15 @@ extern struct pid *cad_pid;
+ extern void free_task(struct task_struct *tsk);
+ #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++extern void __put_task_struct_cb(struct rcu_head *rhp);
++
++static inline void put_task_struct(struct task_struct *t)
++{
++ if (atomic_dec_and_test(&t->usage))
++ call_rcu(&t->put_rcu, __put_task_struct_cb);
++}
++#else
+ extern void __put_task_struct(struct task_struct *t);
+
+ static inline void put_task_struct(struct task_struct *t)
+@@ -1820,6 +1832,7 @@ static inline void put_task_struct(struc
+ if (atomic_dec_and_test(&t->usage))
+ __put_task_struct(t);
+ }
++#endif
+
+ extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
+ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -229,7 +229,9 @@ static inline void put_signal_struct(str
+ if (atomic_dec_and_test(&sig->sigcnt))
+ free_signal_struct(sig);
+ }
+-
++#ifdef CONFIG_PREEMPT_RT_BASE
++static
++#endif
+ void __put_task_struct(struct task_struct *tsk)
+ {
+ WARN_ON(!tsk->exit_state);
+@@ -244,7 +246,18 @@ void __put_task_struct(struct task_struc
+ if (!profile_handoff_task(tsk))
+ free_task(tsk);
+ }
++#ifndef CONFIG_PREEMPT_RT_BASE
+ EXPORT_SYMBOL_GPL(__put_task_struct);
++#else
++void __put_task_struct_cb(struct rcu_head *rhp)
++{
++ struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
++
++ __put_task_struct(tsk);
++
++}
++EXPORT_SYMBOL_GPL(__put_task_struct_cb);
++#endif
+
+ void __init __weak arch_task_cache_init(void) { }
+
diff --git a/patches/sched-disable-rt-group-sched-on-rt.patch b/patches/sched-disable-rt-group-sched-on-rt.patch
new file mode 100644
index 0000000..8fd377e
--- /dev/null
+++ b/patches/sched-disable-rt-group-sched-on-rt.patch
@@ -0,0 +1,28 @@
+Subject: sched: Disable CONFIG_RT_GROUP_SCHED on RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 17:03:52 +0200
+
+Carsten reported problems when running:
+
+ taskset 01 chrt -f 1 sleep 1
+
+from within rc.local on a F15 machine. The task stays running and
+never gets on the run queue because some of the run queues have
+rt_throttled=1 which does not go away. Works nice from a ssh login
+shell. Disabling CONFIG_RT_GROUP_SCHED solves that as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ init/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -946,6 +946,7 @@ config RT_GROUP_SCHED
+ bool "Group scheduling for SCHED_RR/FIFO"
+ depends on EXPERIMENTAL
+ depends on CGROUP_SCHED
++ depends on !PREEMPT_RT_FULL
+ default n
+ help
+ This feature lets you explicitly allocate real CPU bandwidth
diff --git a/patches/sched-disable-ttwu-queue.patch b/patches/sched-disable-ttwu-queue.patch
new file mode 100644
index 0000000..3f429e3
--- /dev/null
+++ b/patches/sched-disable-ttwu-queue.patch
@@ -0,0 +1,27 @@
+Subject: sched-disable-ttwu-queue.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 13 Sep 2011 16:42:35 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/features.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/sched/features.h
++++ b/kernel/sched/features.h
+@@ -57,11 +57,15 @@ SCHED_FEAT(OWNER_SPIN, true)
+ */
+ SCHED_FEAT(NONTASK_POWER, true)
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * Queue remote wakeups on the target CPU and process them
+ * using the scheduler IPI. Reduces rq->lock contention/bounces.
+ */
+ SCHED_FEAT(TTWU_QUEUE, true)
++#else
++SCHED_FEAT(TTWU_QUEUE, false)
++#endif
+
+ SCHED_FEAT(FORCE_SD_OVERLAP, false)
+ SCHED_FEAT(RT_RUNTIME_SHARE, true)
diff --git a/patches/sched-enqueue-to-head.patch b/patches/sched-enqueue-to-head.patch
new file mode 100644
index 0000000..8f19417
--- /dev/null
+++ b/patches/sched-enqueue-to-head.patch
@@ -0,0 +1,68 @@
+Subject: sched: Queue RT tasks to head when prio drops
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 04 Dec 2012 08:56:41 +0100
+
+The following scenario does not work correctly:
+
+Runqueue of CPUx contains two runnable and pinned tasks:
+ T1: SCHED_FIFO, prio 80
+ T2: SCHED_FIFO, prio 80
+
+T1 is on the cpu and executes the following syscalls (classic priority
+ceiling scenario):
+
+ sys_sched_setscheduler(pid(T1), SCHED_FIFO, .prio = 90);
+ ...
+ sys_sched_setscheduler(pid(T1), SCHED_FIFO, .prio = 80);
+ ...
+
+Now T1 gets preempted by T3 (SCHED_FIFO, prio 95). After T3 goes back
+to sleep the scheduler picks T2. Surprise!
+
+The same happens w/o actual preemption when T1 is forced into the
+scheduler due to a sporadic NEED_RESCHED event. The scheduler invokes
+pick_next_task() which returns T2. So T1 gets preempted and scheduled
+out.
+
+This happens because sched_setscheduler() dequeues T1 from the prio 90
+list and then enqueues it on the tail of the prio 80 list behind T2.
+This violates the POSIX spec and surprises user space which relies on
+the guarantee that SCHED_FIFO tasks are not scheduled out unless they
+give the CPU up voluntarily or are preempted by a higher priority
+task. In the latter case the preempted task must get back on the CPU
+after the preempting task schedules out again.
+
+We fixed a similar issue already in commit 60db48c (sched: Queue a
+deboosted task to the head of the RT prio queue). The same treatment
+is necessary for sched_setscheduler(). So enqueue to head of the prio
+bucket list if the priority of the task is lowered.
+
+It might be possible that existing user space relies on the current
+behaviour, but it can be considered highly unlikely due to the corner
+case nature of the application scenario.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Cc: stable-rt@vger.kernel.org
+---
+ kernel/sched/core.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4164,8 +4164,13 @@ recheck:
+
+ if (running)
+ p->sched_class->set_curr_task(rq);
+- if (on_rq)
+- enqueue_task(rq, p, 0);
++ if (on_rq) {
++ /*
++ * We enqueue to tail when the priority of a task is
++ * increased (user space view).
++ */
++ enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
++ }
+
+ check_class_changed(rq, p, prev_class, oldprio);
+ task_rq_unlock(rq, p, &flags);
diff --git a/patches/sched-limit-nr-migrate.patch b/patches/sched-limit-nr-migrate.patch
new file mode 100644
index 0000000..334cd3d
--- /dev/null
+++ b/patches/sched-limit-nr-migrate.patch
@@ -0,0 +1,23 @@
+Subject: sched-limit-nr-migrate.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 06 Jun 2011 12:12:51 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -272,7 +272,11 @@ late_initcall(sched_init_debug);
+ * Number of tasks to iterate in a single balance run.
+ * Limited because this is done with IRQs disabled.
+ */
++#ifndef CONFIG_PREEMPT_RT_FULL
+ const_debug unsigned int sysctl_sched_nr_migrate = 32;
++#else
++const_debug unsigned int sysctl_sched_nr_migrate = 8;
++#endif
+
+ /*
+ * period over which we average the RT time consumption, measured
diff --git a/patches/sched-might-sleep-do-not-account-rcu-depth.patch b/patches/sched-might-sleep-do-not-account-rcu-depth.patch
new file mode 100644
index 0000000..d746c66
--- /dev/null
+++ b/patches/sched-might-sleep-do-not-account-rcu-depth.patch
@@ -0,0 +1,45 @@
+Subject: sched-might-sleep-do-not-account-rcu-depth.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 07 Jun 2011 09:19:06 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rcupdate.h | 7 +++++++
+ kernel/sched/core.c | 3 ++-
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -182,6 +182,11 @@ void synchronize_rcu(void);
+ * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
+ */
+ #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
++#ifndef CONFIG_PREEMPT_RT_FULL
++#define sched_rcu_preempt_depth() rcu_preempt_depth()
++#else
++static inline int sched_rcu_preempt_depth(void) { return 0; }
++#endif
+
+ #else /* #ifdef CONFIG_PREEMPT_RCU */
+
+@@ -205,6 +210,8 @@ static inline int rcu_preempt_depth(void
+ return 0;
+ }
+
++#define sched_rcu_preempt_depth() rcu_preempt_depth()
++
+ #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
+ /* Internal to kernel */
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7058,7 +7058,8 @@ void __init sched_init(void)
+ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+ static inline int preempt_count_equals(int preempt_offset)
+ {
+- int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
++ int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
++ sched_rcu_preempt_depth();
+
+ return (nested == preempt_offset);
+ }
diff --git a/patches/sched-migrate-disable.patch b/patches/sched-migrate-disable.patch
new file mode 100644
index 0000000..1960a04
--- /dev/null
+++ b/patches/sched-migrate-disable.patch
@@ -0,0 +1,192 @@
+Subject: sched-migrate-disable.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 16 Jun 2011 13:26:08 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/preempt.h | 8 ++++
+ include/linux/sched.h | 13 +++++--
+ kernel/sched/core.c | 88 +++++++++++++++++++++++++++++++++++++++++++++---
+ lib/smp_processor_id.c | 6 +--
+ 4 files changed, 104 insertions(+), 11 deletions(-)
+
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -108,6 +108,14 @@ do { \
+
+ #endif /* CONFIG_PREEMPT_COUNT */
+
++#ifdef CONFIG_SMP
++extern void migrate_disable(void);
++extern void migrate_enable(void);
++#else
++# define migrate_disable() do { } while (0)
++# define migrate_enable() do { } while (0)
++#endif
++
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ # define preempt_disable_rt() preempt_disable()
+ # define preempt_enable_rt() preempt_enable()
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1279,6 +1279,7 @@ struct task_struct {
+ #endif
+
+ unsigned int policy;
++ int migrate_disable;
+ int nr_cpus_allowed;
+ cpumask_t cpus_allowed;
+
+@@ -1630,9 +1631,6 @@ struct task_struct {
+ #endif
+ };
+
+-/* Future-safe accessor for struct task_struct's cpus_allowed. */
+-#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+-
+ #ifdef CONFIG_NUMA_BALANCING
+ extern void task_numa_fault(int node, int pages, bool migrated);
+ extern void set_numabalancing_state(bool enabled);
+@@ -2812,6 +2810,15 @@ static inline void set_task_cpu(struct t
+
+ #endif /* CONFIG_SMP */
+
++/* Future-safe accessor for struct task_struct's cpus_allowed. */
++static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
++{
++ if (p->migrate_disable)
++ return cpumask_of(task_cpu(p));
++
++ return &p->cpus_allowed;
++}
++
+ extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
+ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4734,11 +4734,12 @@ void __cpuinit init_idle(struct task_str
+ #ifdef CONFIG_SMP
+ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ {
+- if (p->sched_class && p->sched_class->set_cpus_allowed)
+- p->sched_class->set_cpus_allowed(p, new_mask);
+-
++ if (!p->migrate_disable) {
++ if (p->sched_class && p->sched_class->set_cpus_allowed)
++ p->sched_class->set_cpus_allowed(p, new_mask);
++ p->nr_cpus_allowed = cpumask_weight(new_mask);
++ }
+ cpumask_copy(&p->cpus_allowed, new_mask);
+- p->nr_cpus_allowed = cpumask_weight(new_mask);
+ }
+
+ /*
+@@ -4789,7 +4790,7 @@ int set_cpus_allowed_ptr(struct task_str
+ do_set_cpus_allowed(p, new_mask);
+
+ /* Can the task run on the task's current CPU? If so, we're done */
+- if (cpumask_test_cpu(task_cpu(p), new_mask))
++ if (cpumask_test_cpu(task_cpu(p), new_mask) || p->migrate_disable)
+ goto out;
+
+ dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+@@ -4808,6 +4809,83 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+
++void migrate_disable(void)
++{
++ struct task_struct *p = current;
++ const struct cpumask *mask;
++ unsigned long flags;
++ struct rq *rq;
++
++ preempt_disable();
++ if (p->migrate_disable) {
++ p->migrate_disable++;
++ preempt_enable();
++ return;
++ }
++
++ pin_current_cpu();
++ if (unlikely(!scheduler_running)) {
++ p->migrate_disable = 1;
++ preempt_enable();
++ return;
++ }
++ rq = task_rq_lock(p, &flags);
++ p->migrate_disable = 1;
++ mask = tsk_cpus_allowed(p);
++
++ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
++
++ if (!cpumask_equal(&p->cpus_allowed, mask)) {
++ if (p->sched_class->set_cpus_allowed)
++ p->sched_class->set_cpus_allowed(p, mask);
++ p->nr_cpus_allowed = cpumask_weight(mask);
++ }
++ task_rq_unlock(rq, p, &flags);
++ preempt_enable();
++}
++EXPORT_SYMBOL(migrate_disable);
++
++void migrate_enable(void)
++{
++ struct task_struct *p = current;
++ const struct cpumask *mask;
++ unsigned long flags;
++ struct rq *rq;
++
++ WARN_ON_ONCE(p->migrate_disable <= 0);
++
++ preempt_disable();
++ if (p->migrate_disable > 1) {
++ p->migrate_disable--;
++ preempt_enable();
++ return;
++ }
++
++ if (unlikely(!scheduler_running)) {
++ p->migrate_disable = 0;
++ unpin_current_cpu();
++ preempt_enable();
++ return;
++ }
++
++ rq = task_rq_lock(p, &flags);
++ p->migrate_disable = 0;
++ mask = tsk_cpus_allowed(p);
++
++ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
++
++ if (!cpumask_equal(&p->cpus_allowed, mask)) {
++ if (p->sched_class->set_cpus_allowed)
++ p->sched_class->set_cpus_allowed(p, mask);
++ p->nr_cpus_allowed = cpumask_weight(mask);
++ }
++
++ task_rq_unlock(rq, p, &flags);
++ unpin_current_cpu();
++ preempt_enable();
++}
++EXPORT_SYMBOL(migrate_enable);
++
+ /*
+ * Move (not current) task off this cpu, onto dest cpu. We're doing
+ * this because either it can't run here any more (set_cpus_allowed()
+--- a/lib/smp_processor_id.c
++++ b/lib/smp_processor_id.c
+@@ -39,9 +39,9 @@ notrace unsigned int debug_smp_processor
+ if (!printk_ratelimit())
+ goto out_enable;
+
+- printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] "
+- "code: %s/%d\n",
+- preempt_count() - 1, current->comm, current->pid);
++ printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x %08x] "
++ "code: %s/%d\n", preempt_count() - 1,
++ current->migrate_disable, current->comm, current->pid);
+ print_symbol("caller is %s\n", (long)__builtin_return_address(0));
+ dump_stack();
+
diff --git a/patches/sched-mmdrop-delayed.patch b/patches/sched-mmdrop-delayed.patch
new file mode 100644
index 0000000..d7a4f03
--- /dev/null
+++ b/patches/sched-mmdrop-delayed.patch
@@ -0,0 +1,134 @@
+Subject: sched-mmdrop-delayed.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 06 Jun 2011 12:20:33 +0200
+
+Needs thread context (pgd_lock) -> ifdeffed. workqueues wont work with
+RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/mm_types.h | 4 ++++
+ include/linux/sched.h | 12 ++++++++++++
+ kernel/fork.c | 13 +++++++++++++
+ kernel/sched/core.c | 19 +++++++++++++++++--
+ 4 files changed, 46 insertions(+), 2 deletions(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -11,6 +11,7 @@
+ #include <linux/completion.h>
+ #include <linux/cpumask.h>
+ #include <linux/page-debug-flags.h>
++#include <linux/rcupdate.h>
+ #include <linux/uprobes.h>
+ #include <asm/page.h>
+ #include <asm/mmu.h>
+@@ -440,6 +441,9 @@ struct mm_struct {
+ int first_nid;
+ #endif
+ struct uprobes_state uprobes_state;
++#ifdef CONFIG_PREEMPT_RT_BASE
++ struct rcu_head delayed_drop;
++#endif
+ };
+
+ /* first nid will either be a valid NID or one of these values */
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2354,12 +2354,24 @@ extern struct mm_struct * mm_alloc(void)
+
+ /* mmdrop drops the mm and the page tables */
+ extern void __mmdrop(struct mm_struct *);
++
+ static inline void mmdrop(struct mm_struct * mm)
+ {
+ if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+ __mmdrop(mm);
+ }
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++extern void __mmdrop_delayed(struct rcu_head *rhp);
++static inline void mmdrop_delayed(struct mm_struct *mm)
++{
++ if (atomic_dec_and_test(&mm->mm_count))
++ call_rcu(&mm->delayed_drop, __mmdrop_delayed);
++}
++#else
++# define mmdrop_delayed(mm) mmdrop(mm)
++#endif
++
+ /* mmput gets rid of the mappings and all user-space */
+ extern void mmput(struct mm_struct *);
+ /* Grab a reference to a task's mm, if it is not already going away */
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -616,6 +616,19 @@ void __mmdrop(struct mm_struct *mm)
+ }
+ EXPORT_SYMBOL_GPL(__mmdrop);
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++/*
++ * RCU callback for delayed mm drop. Not strictly rcu, but we don't
++ * want another facility to make this work.
++ */
++void __mmdrop_delayed(struct rcu_head *rhp)
++{
++ struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
++
++ __mmdrop(mm);
++}
++#endif
++
+ /*
+ * Decrement the use count and release all resources for an mm.
+ */
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1845,8 +1845,12 @@ static void finish_task_switch(struct rq
+ finish_arch_post_lock_switch();
+
+ fire_sched_in_preempt_notifiers(current);
++ /*
++ * We use mmdrop_delayed() here so we don't have to do the
++ * full __mmdrop() when we are the last user.
++ */
+ if (mm)
+- mmdrop(mm);
++ mmdrop_delayed(mm);
+ if (unlikely(prev_state == TASK_DEAD)) {
+ /*
+ * Remove function-return probe instances associated with this
+@@ -4833,6 +4837,8 @@ static int migration_cpu_stop(void *data
+
+ #ifdef CONFIG_HOTPLUG_CPU
+
++static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
++
+ /*
+ * Ensures that the idle task is using init_mm right before its cpu goes
+ * offline.
+@@ -4845,7 +4851,12 @@ void idle_task_exit(void)
+
+ if (mm != &init_mm)
+ switch_mm(mm, &init_mm, current);
+- mmdrop(mm);
++
++ /*
++ * Defer the cleanup to an alive cpu. On RT we can neither
++ * call mmdrop() nor mmdrop_delayed() from here.
++ */
++ per_cpu(idle_last_mm, smp_processor_id()) = mm;
+ }
+
+ /*
+@@ -5162,6 +5173,10 @@ migration_call(struct notifier_block *nf
+
+ case CPU_DEAD:
+ calc_load_migrate(rq);
++ if (per_cpu(idle_last_mm, cpu)) {
++ mmdrop(per_cpu(idle_last_mm, cpu));
++ per_cpu(idle_last_mm, cpu) = NULL;
++ }
+ break;
+ #endif
+ }
diff --git a/patches/sched-rt-fix-migrate_enable-thinko.patch b/patches/sched-rt-fix-migrate_enable-thinko.patch
new file mode 100644
index 0000000..7634a60
--- /dev/null
+++ b/patches/sched-rt-fix-migrate_enable-thinko.patch
@@ -0,0 +1,63 @@
+Subject: sched, rt: Fix migrate_enable() thinko
+From: Mike Galbraith <efault@gmx.de>
+Date: Tue, 23 Aug 2011 16:12:43 +0200
+
+Assigning mask = tsk_cpus_allowed(p) after p->migrate_disable = 0 ensures
+that we won't see a mask change.. no push/pull, we stack tasks on one CPU.
+
+Also add a couple fields to sched_debug for the next guy.
+
+[ Build fix from Stratos Psomadakis <psomas@gentoo.org> ]
+
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Cc: Paul E. McKenney <paulmck@us.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/1314108763.6689.4.camel@marge.simson.net
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/sched/core.c | 4 +++-
+ kernel/sched/debug.c | 7 +++++++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4886,12 +4886,14 @@ void migrate_enable(void)
+ */
+ rq = this_rq();
+ raw_spin_lock_irqsave(&rq->lock, flags);
+- p->migrate_disable = 0;
+ mask = tsk_cpus_allowed(p);
++ p->migrate_disable = 0;
+
+ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
+
+ if (!cpumask_equal(&p->cpus_allowed, mask)) {
++ /* Get the mask now that migration is enabled */
++ mask = tsk_cpus_allowed(p);
+ if (p->sched_class->set_cpus_allowed)
+ p->sched_class->set_cpus_allowed(p, mask);
+ p->nr_cpus_allowed = cpumask_weight(mask);
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -253,6 +253,9 @@ void print_rt_rq(struct seq_file *m, int
+ P(rt_throttled);
+ PN(rt_time);
+ PN(rt_runtime);
++#ifdef CONFIG_SMP
++ P(rt_nr_migratory);
++#endif
+
+ #undef PN
+ #undef P
+@@ -507,6 +510,10 @@ void proc_sched_show_task(struct task_st
+ P(se.load.weight);
+ P(policy);
+ P(prio);
++#ifdef CONFIG_PREEMPT_RT_FULL
++ P(migrate_disable);
++#endif
++ P(nr_cpus_allowed);
+ #undef PN
+ #undef __PN
+ #undef P
diff --git a/patches/sched-rt-mutex-wakeup.patch b/patches/sched-rt-mutex-wakeup.patch
new file mode 100644
index 0000000..c5ced9e
--- /dev/null
+++ b/patches/sched-rt-mutex-wakeup.patch
@@ -0,0 +1,84 @@
+Subject: sched-rt-mutex-wakeup.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 25 Jun 2011 09:21:04 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h | 3 +++
+ kernel/sched/core.c | 31 ++++++++++++++++++++++++++++++-
+ 2 files changed, 33 insertions(+), 1 deletion(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1062,6 +1062,7 @@ struct sched_domain;
+ #define WF_SYNC 0x01 /* waker goes to sleep after wakup */
+ #define WF_FORK 0x02 /* child wakeup after fork */
+ #define WF_MIGRATED 0x04 /* internal use, task got migrated */
++#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
+
+ #define ENQUEUE_WAKEUP 1
+ #define ENQUEUE_HEAD 2
+@@ -1238,6 +1239,7 @@ enum perf_event_task_context {
+
+ struct task_struct {
+ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
++ volatile long saved_state; /* saved state for "spinlock sleepers" */
+ void *stack;
+ atomic_t usage;
+ unsigned int flags; /* per process flags, defined below */
+@@ -2250,6 +2252,7 @@ extern void xtime_update(unsigned long t
+
+ extern int wake_up_state(struct task_struct *tsk, unsigned int state);
+ extern int wake_up_process(struct task_struct *tsk);
++extern int wake_up_lock_sleeper(struct task_struct * tsk);
+ extern void wake_up_new_task(struct task_struct *tsk);
+ #ifdef CONFIG_SMP
+ extern void kick_process(struct task_struct *tsk);
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1438,8 +1438,25 @@ try_to_wake_up(struct task_struct *p, un
+
+ smp_wmb();
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
+- if (!(p->state & state))
++ if (!(p->state & state)) {
++ /*
++ * The task might be running due to a spinlock sleeper
++ * wakeup. Check the saved state and set it to running
++ * if the wakeup condition is true.
++ */
++ if (!(wake_flags & WF_LOCK_SLEEPER)) {
++ if (p->saved_state & state)
++ p->saved_state = TASK_RUNNING;
++ }
+ goto out;
++ }
++
++ /*
++ * If this is a regular wakeup, then we can unconditionally
++ * clear the saved state of a "lock sleeper".
++ */
++ if (!(wake_flags & WF_LOCK_SLEEPER))
++ p->saved_state = TASK_RUNNING;
+
+ success = 1; /* we're going to change ->state */
+ cpu = task_cpu(p);
+@@ -1533,6 +1550,18 @@ int wake_up_process(struct task_struct *
+ }
+ EXPORT_SYMBOL(wake_up_process);
+
++/**
++ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
++ * @p: The process to be woken up.
++ *
++ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
++ * the nature of the wakeup.
++ */
++int wake_up_lock_sleeper(struct task_struct *p)
++{
++ return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
++}
++
+ int wake_up_state(struct task_struct *p, unsigned int state)
+ {
+ return try_to_wake_up(p, state, 0);
diff --git a/patches/sched-teach-migrate_disable-about-atomic-contexts.patch b/patches/sched-teach-migrate_disable-about-atomic-contexts.patch
new file mode 100644
index 0000000..2871f82
--- /dev/null
+++ b/patches/sched-teach-migrate_disable-about-atomic-contexts.patch
@@ -0,0 +1,88 @@
+Subject: sched: Teach migrate_disable about atomic contexts
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 02 Sep 2011 14:41:37 +0200
+
+Subject: sched: teach migrate_disable about atomic contexts
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri Sep 02 14:29:27 CEST 2011
+
+ <NMI> [<ffffffff812dafd8>] spin_bug+0x94/0xa8
+ [<ffffffff812db07f>] do_raw_spin_lock+0x43/0xea
+ [<ffffffff814fa9be>] _raw_spin_lock_irqsave+0x6b/0x85
+ [<ffffffff8106ff9e>] ? migrate_disable+0x75/0x12d
+ [<ffffffff81078aaf>] ? pin_current_cpu+0x36/0xb0
+ [<ffffffff8106ff9e>] migrate_disable+0x75/0x12d
+ [<ffffffff81115b9d>] pagefault_disable+0xe/0x1f
+ [<ffffffff81047027>] copy_from_user_nmi+0x74/0xe6
+ [<ffffffff810489d7>] perf_callchain_user+0xf3/0x135
+
+Now clearly we can't go around taking locks from NMI context, cure
+this by short-circuiting migrate_disable() when we're in an atomic
+context already.
+
+Add some extra debugging to avoid things like:
+
+ preempt_disable()
+ migrate_disable();
+
+ preempt_enable();
+ migrate_enable();
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/1314967297.1301.14.camel@twins
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/n/tip-wbot4vsmwhi8vmbf83hsclk6@git.kernel.org
+---
+ include/linux/sched.h | 3 +++
+ kernel/sched/core.c | 21 +++++++++++++++++++++
+ 2 files changed, 24 insertions(+)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1281,6 +1281,9 @@ struct task_struct {
+ unsigned int policy;
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ int migrate_disable;
++# ifdef CONFIG_SCHED_DEBUG
++ int migrate_disable_atomic;
++# endif
+ #endif
+ int nr_cpus_allowed;
+ cpumask_t cpus_allowed;
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4817,6 +4817,17 @@ void migrate_disable(void)
+ unsigned long flags;
+ struct rq *rq;
+
++ if (in_atomic()) {
++#ifdef CONFIG_SCHED_DEBUG
++ p->migrate_disable_atomic++;
++#endif
++ return;
++ }
++
++#ifdef CONFIG_SCHED_DEBUG
++ WARN_ON_ONCE(p->migrate_disable_atomic);
++#endif
++
+ preempt_disable();
+ if (p->migrate_disable) {
+ p->migrate_disable++;
+@@ -4865,6 +4876,16 @@ void migrate_enable(void)
+ unsigned long flags;
+ struct rq *rq;
+
++ if (in_atomic()) {
++#ifdef CONFIG_SCHED_DEBUG
++ p->migrate_disable_atomic--;
++#endif
++ return;
++ }
++
++#ifdef CONFIG_SCHED_DEBUG
++ WARN_ON_ONCE(p->migrate_disable_atomic);
++#endif
+ WARN_ON_ONCE(p->migrate_disable <= 0);
+
+ preempt_disable();
diff --git a/patches/sched-ttwu-ensure-success-return-is-correct.patch b/patches/sched-ttwu-ensure-success-return-is-correct.patch
new file mode 100644
index 0000000..a4a5369
--- /dev/null
+++ b/patches/sched-ttwu-ensure-success-return-is-correct.patch
@@ -0,0 +1,34 @@
+Subject: sched: ttwu: Return success when only changing the saved_state value
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 13 Dec 2011 21:42:19 +0100
+
+When a task blocks on a rt lock, it saves the current state in
+p->saved_state, so a lock related wake up will not destroy the
+original state.
+
+When a real wakeup happens, while the task is running due to a lock
+wakeup already, we update p->saved_state to TASK_RUNNING, but we do
+not return success, which might cause another wakeup in the waitqueue
+code and the task remains in the waitqueue list. Return success in
+that case as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ kernel/sched/core.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1445,8 +1445,10 @@ try_to_wake_up(struct task_struct *p, un
+ * if the wakeup condition is true.
+ */
+ if (!(wake_flags & WF_LOCK_SLEEPER)) {
+- if (p->saved_state & state)
++ if (p->saved_state & state) {
+ p->saved_state = TASK_RUNNING;
++ success = 1;
++ }
+ }
+ goto out;
+ }
diff --git a/patches/scsi-fcoe-rt-aware.patch b/patches/scsi-fcoe-rt-aware.patch
new file mode 100644
index 0000000..8a30fc3
--- /dev/null
+++ b/patches/scsi-fcoe-rt-aware.patch
@@ -0,0 +1,111 @@
+Subject: scsi-fcoe-rt-aware.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 12 Nov 2011 14:00:48 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/scsi/fcoe/fcoe.c | 18 +++++++++---------
+ drivers/scsi/fcoe/fcoe_ctlr.c | 4 ++--
+ drivers/scsi/libfc/fc_exch.c | 4 ++--
+ 3 files changed, 13 insertions(+), 13 deletions(-)
+
+--- a/drivers/scsi/fcoe/fcoe.c
++++ b/drivers/scsi/fcoe/fcoe.c
+@@ -1272,7 +1272,7 @@ static void fcoe_percpu_thread_destroy(u
+ struct sk_buff *skb;
+ #ifdef CONFIG_SMP
+ struct fcoe_percpu_s *p0;
+- unsigned targ_cpu = get_cpu();
++ unsigned targ_cpu = get_cpu_light();
+ #endif /* CONFIG_SMP */
+
+ FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
+@@ -1328,7 +1328,7 @@ static void fcoe_percpu_thread_destroy(u
+ kfree_skb(skb);
+ spin_unlock_bh(&p->fcoe_rx_list.lock);
+ }
+- put_cpu();
++ put_cpu_light();
+ #else
+ /*
+ * This a non-SMP scenario where the singular Rx thread is
+@@ -1546,11 +1546,11 @@ err2:
+ static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
+ {
+ struct fcoe_percpu_s *fps;
+- int rc;
++ int rc, cpu = get_cpu_light();
+
+- fps = &get_cpu_var(fcoe_percpu);
++ fps = &per_cpu(fcoe_percpu, cpu);
+ rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
+- put_cpu_var(fcoe_percpu);
++ put_cpu_light();
+
+ return rc;
+ }
+@@ -1745,11 +1745,11 @@ static inline int fcoe_filter_frames(str
+ return 0;
+ }
+
+- stats = per_cpu_ptr(lport->stats, get_cpu());
++ stats = per_cpu_ptr(lport->stats, get_cpu_light());
+ stats->InvalidCRCCount++;
+ if (stats->InvalidCRCCount < 5)
+ printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
+- put_cpu();
++ put_cpu_light();
+ return -EINVAL;
+ }
+
+@@ -1825,13 +1825,13 @@ static void fcoe_recv_frame(struct sk_bu
+ goto drop;
+
+ if (!fcoe_filter_frames(lport, fp)) {
+- put_cpu();
++ put_cpu_light();
+ fc_exch_recv(lport, fp);
+ return;
+ }
+ drop:
+ stats->ErrorFrames++;
+- put_cpu();
++ put_cpu_light();
+ kfree_skb(skb);
+ }
+
+--- a/drivers/scsi/fcoe/fcoe_ctlr.c
++++ b/drivers/scsi/fcoe/fcoe_ctlr.c
+@@ -792,7 +792,7 @@ static unsigned long fcoe_ctlr_age_fcfs(
+
+ INIT_LIST_HEAD(&del_list);
+
+- stats = per_cpu_ptr(fip->lp->stats, get_cpu());
++ stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
+
+ list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
+ deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
+@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs(
+ sel_time = fcf->time;
+ }
+ }
+- put_cpu();
++ put_cpu_light();
+
+ list_for_each_entry_safe(fcf, next, &del_list, list) {
+ /* Removes fcf from current list */
+--- a/drivers/scsi/libfc/fc_exch.c
++++ b/drivers/scsi/libfc/fc_exch.c
+@@ -730,10 +730,10 @@ static struct fc_exch *fc_exch_em_alloc(
+ }
+ memset(ep, 0, sizeof(*ep));
+
+- cpu = get_cpu();
++ cpu = get_cpu_light();
+ pool = per_cpu_ptr(mp->pool, cpu);
+ spin_lock_bh(&pool->lock);
+- put_cpu();
++ put_cpu_light();
+
+ /* peek cache of free slot */
+ if (pool->left != FC_XID_UNKNOWN) {
diff --git a/patches/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch b/patches/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch
new file mode 100644
index 0000000..0a4f191
--- /dev/null
+++ b/patches/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch
@@ -0,0 +1,47 @@
+Subject: scsi: qla2xxx: Use local_irq_save_nort() in qla2x00_poll
+From: John Kacur <jkacur@redhat.com>
+Date: Fri, 27 Apr 2012 12:48:46 +0200
+
+RT triggers the following:
+
+[ 11.307652] [<ffffffff81077b27>] __might_sleep+0xe7/0x110
+[ 11.307663] [<ffffffff8150e524>] rt_spin_lock+0x24/0x60
+[ 11.307670] [<ffffffff8150da78>] ? rt_spin_lock_slowunlock+0x78/0x90
+[ 11.307703] [<ffffffffa0272d83>] qla24xx_intr_handler+0x63/0x2d0 [qla2xxx]
+[ 11.307736] [<ffffffffa0262307>] qla2x00_poll+0x67/0x90 [qla2xxx]
+
+Function qla2x00_poll does local_irq_save() before calling qla24xx_intr_handler
+which has a spinlock. Since spinlocks are sleepable on rt, it is not allowed
+to call them with interrupts disabled. Therefore we use local_irq_save_nort()
+instead which saves flags without disabling interrupts.
+
+This fix needs to be applied to v3.0-rt, v3.2-rt and v3.4-rt
+
+Suggested-by: Thomas Gleixner
+Signed-off-by: John Kacur <jkacur@redhat.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: David Sommerseth <davids@redhat.com>
+Link: http://lkml.kernel.org/r/1335523726-10024-1-git-send-email-jkacur@redhat.com
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ drivers/scsi/qla2xxx/qla_inline.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/qla2xxx/qla_inline.h
++++ b/drivers/scsi/qla2xxx/qla_inline.h
+@@ -36,12 +36,12 @@ qla2x00_poll(struct rsp_que *rsp)
+ {
+ unsigned long flags;
+ struct qla_hw_data *ha = rsp->hw;
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ if (IS_QLA82XX(ha))
+ qla82xx_poll(0, rsp);
+ else
+ ha->isp_ops->intr_handler(0, rsp);
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ }
+
+ static inline uint8_t *
diff --git a/patches/seqlock-prevent-rt-starvation.patch b/patches/seqlock-prevent-rt-starvation.patch
new file mode 100644
index 0000000..8e61bab
--- /dev/null
+++ b/patches/seqlock-prevent-rt-starvation.patch
@@ -0,0 +1,185 @@
+Subject: seqlock: Prevent rt starvation
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 22 Feb 2012 12:03:30 +0100
+
+If a low prio writer gets preempted while holding the seqlock write
+locked, a high prio reader spins forever on RT.
+
+To prevent this let the reader grab the spinlock, so it blocks and
+eventually boosts the writer. This way the writer can proceed and
+endless spinning is prevented.
+
+For seqcount writers we disable preemption over the update code
+path. Thaanks to Al Viro for distangling some VFS code to make that
+possible.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+
+---
+ include/linux/seqlock.h | 55 +++++++++++++++++++++++++++++++++++++++---------
+ include/net/dst.h | 2 -
+ include/net/neighbour.h | 4 +--
+ 3 files changed, 48 insertions(+), 13 deletions(-)
+
+--- a/include/linux/seqlock.h
++++ b/include/linux/seqlock.h
+@@ -146,18 +146,30 @@ static inline int read_seqcount_retry(co
+ * Sequence counter only version assumes that callers are using their
+ * own mutexing.
+ */
+-static inline void write_seqcount_begin(seqcount_t *s)
++static inline void __write_seqcount_begin(seqcount_t *s)
+ {
+ s->sequence++;
+ smp_wmb();
+ }
+
+-static inline void write_seqcount_end(seqcount_t *s)
++static inline void write_seqcount_begin(seqcount_t *s)
++{
++ preempt_disable_rt();
++ __write_seqcount_begin(s);
++}
++
++static inline void __write_seqcount_end(seqcount_t *s)
+ {
+ smp_wmb();
+ s->sequence++;
+ }
+
++static inline void write_seqcount_end(seqcount_t *s)
++{
++ __write_seqcount_end(s);
++ preempt_enable_rt();
++}
++
+ /**
+ * write_seqcount_barrier - invalidate in-progress read-side seq operations
+ * @s: pointer to seqcount_t
+@@ -198,10 +210,33 @@ typedef struct {
+ /*
+ * Read side functions for starting and finalizing a read side section.
+ */
++#ifndef CONFIG_PREEMPT_RT_FULL
+ static inline unsigned read_seqbegin(const seqlock_t *sl)
+ {
+ return read_seqcount_begin(&sl->seqcount);
+ }
++#else
++/*
++ * Starvation safe read side for RT
++ */
++static inline unsigned read_seqbegin(seqlock_t *sl)
++{
++ unsigned ret;
++
++repeat:
++ ret = ACCESS_ONCE(sl->seqcount.sequence);
++ if (unlikely(ret & 1)) {
++ /*
++ * Take the lock and let the writer proceed (i.e. evtl
++ * boost it), otherwise we could loop here forever.
++ */
++ spin_lock(&sl->lock);
++ spin_unlock(&sl->lock);
++ goto repeat;
++ }
++ return ret;
++}
++#endif
+
+ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
+ {
+@@ -216,36 +251,36 @@ static inline unsigned read_seqretry(con
+ static inline void write_seqlock(seqlock_t *sl)
+ {
+ spin_lock(&sl->lock);
+- write_seqcount_begin(&sl->seqcount);
++ __write_seqcount_begin(&sl->seqcount);
+ }
+
+ static inline void write_sequnlock(seqlock_t *sl)
+ {
+- write_seqcount_end(&sl->seqcount);
++ __write_seqcount_end(&sl->seqcount);
+ spin_unlock(&sl->lock);
+ }
+
+ static inline void write_seqlock_bh(seqlock_t *sl)
+ {
+ spin_lock_bh(&sl->lock);
+- write_seqcount_begin(&sl->seqcount);
++ __write_seqcount_begin(&sl->seqcount);
+ }
+
+ static inline void write_sequnlock_bh(seqlock_t *sl)
+ {
+- write_seqcount_end(&sl->seqcount);
++ __write_seqcount_end(&sl->seqcount);
+ spin_unlock_bh(&sl->lock);
+ }
+
+ static inline void write_seqlock_irq(seqlock_t *sl)
+ {
+ spin_lock_irq(&sl->lock);
+- write_seqcount_begin(&sl->seqcount);
++ __write_seqcount_begin(&sl->seqcount);
+ }
+
+ static inline void write_sequnlock_irq(seqlock_t *sl)
+ {
+- write_seqcount_end(&sl->seqcount);
++ __write_seqcount_end(&sl->seqcount);
+ spin_unlock_irq(&sl->lock);
+ }
+
+@@ -254,7 +289,7 @@ static inline unsigned long __write_seql
+ unsigned long flags;
+
+ spin_lock_irqsave(&sl->lock, flags);
+- write_seqcount_begin(&sl->seqcount);
++ __write_seqcount_begin(&sl->seqcount);
+ return flags;
+ }
+
+@@ -264,7 +299,7 @@ static inline unsigned long __write_seql
+ static inline void
+ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
+ {
+- write_seqcount_end(&sl->seqcount);
++ __write_seqcount_end(&sl->seqcount);
+ spin_unlock_irqrestore(&sl->lock, flags);
+ }
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -392,7 +392,7 @@ static inline void dst_confirm(struct ds
+ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
+ struct sk_buff *skb)
+ {
+- const struct hh_cache *hh;
++ struct hh_cache *hh;
+
+ if (dst->pending_confirm) {
+ unsigned long now = jiffies;
+--- a/include/net/neighbour.h
++++ b/include/net/neighbour.h
+@@ -334,7 +334,7 @@ static inline int neigh_hh_bridge(struct
+ }
+ #endif
+
+-static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
++static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
+ {
+ unsigned int seq;
+ int hh_len;
+@@ -389,7 +389,7 @@ struct neighbour_cb {
+
+ #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
+
+-static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
++static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
+ const struct net_device *dev)
+ {
+ unsigned int seq;
diff --git a/patches/seqlock-remove-unused-functions.patch b/patches/seqlock-remove-unused-functions.patch
new file mode 100644
index 0000000..59e11c9
--- /dev/null
+++ b/patches/seqlock-remove-unused-functions.patch
@@ -0,0 +1,44 @@
+Subject: seqlock: Remove unused functions
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Jul 2011 18:38:22 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/seqlock.h | 21 ---------------------
+ 1 file changed, 21 deletions(-)
+
+--- a/include/linux/seqlock.h
++++ b/include/linux/seqlock.h
+@@ -69,17 +69,6 @@ static inline void write_sequnlock(seqlo
+ spin_unlock(&sl->lock);
+ }
+
+-static inline int write_tryseqlock(seqlock_t *sl)
+-{
+- int ret = spin_trylock(&sl->lock);
+-
+- if (ret) {
+- ++sl->sequence;
+- smp_wmb();
+- }
+- return ret;
+-}
+-
+ /* Start of read calculation -- fetch last complete writer token */
+ static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
+ {
+@@ -269,14 +258,4 @@ static inline void write_seqcount_barrie
+ #define write_sequnlock_bh(lock) \
+ do { write_sequnlock(lock); local_bh_enable(); } while(0)
+
+-#define read_seqbegin_irqsave(lock, flags) \
+- ({ local_irq_save(flags); read_seqbegin(lock); })
+-
+-#define read_seqretry_irqrestore(lock, iv, flags) \
+- ({ \
+- int ret = read_seqretry(lock, iv); \
+- local_irq_restore(flags); \
+- ret; \
+- })
+-
+ #endif /* __LINUX_SEQLOCK_H */
diff --git a/patches/seqlock-use-seqcount.patch b/patches/seqlock-use-seqcount.patch
new file mode 100644
index 0000000..37b1872
--- /dev/null
+++ b/patches/seqlock-use-seqcount.patch
@@ -0,0 +1,218 @@
+Subject: seqlock: Use seqcount
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Jul 2011 18:40:26 +0200
+
+No point in having different implementations for the same thing.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/seqlock.h | 176 +++++++++++++++++++++++++-----------------------
+ 1 file changed, 93 insertions(+), 83 deletions(-)
+
+--- a/include/linux/seqlock.h
++++ b/include/linux/seqlock.h
+@@ -30,81 +30,12 @@
+ #include <linux/preempt.h>
+ #include <asm/processor.h>
+
+-typedef struct {
+- unsigned sequence;
+- spinlock_t lock;
+-} seqlock_t;
+-
+-/*
+- * These macros triggered gcc-3.x compile-time problems. We think these are
+- * OK now. Be cautious.
+- */
+-#define __SEQLOCK_UNLOCKED(lockname) \
+- { 0, __SPIN_LOCK_UNLOCKED(lockname) }
+-
+-#define seqlock_init(x) \
+- do { \
+- (x)->sequence = 0; \
+- spin_lock_init(&(x)->lock); \
+- } while (0)
+-
+-#define DEFINE_SEQLOCK(x) \
+- seqlock_t x = __SEQLOCK_UNLOCKED(x)
+-
+-/* Lock out other writers and update the count.
+- * Acts like a normal spin_lock/unlock.
+- * Don't need preempt_disable() because that is in the spin_lock already.
+- */
+-static inline void write_seqlock(seqlock_t *sl)
+-{
+- spin_lock(&sl->lock);
+- ++sl->sequence;
+- smp_wmb();
+-}
+-
+-static inline void write_sequnlock(seqlock_t *sl)
+-{
+- smp_wmb();
+- sl->sequence++;
+- spin_unlock(&sl->lock);
+-}
+-
+-/* Start of read calculation -- fetch last complete writer token */
+-static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
+-{
+- unsigned ret;
+-
+-repeat:
+- ret = ACCESS_ONCE(sl->sequence);
+- if (unlikely(ret & 1)) {
+- cpu_relax();
+- goto repeat;
+- }
+- smp_rmb();
+-
+- return ret;
+-}
+-
+-/*
+- * Test if reader processed invalid data.
+- *
+- * If sequence value changed then writer changed data while in section.
+- */
+-static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
+-{
+- smp_rmb();
+-
+- return unlikely(sl->sequence != start);
+-}
+-
+-
+ /*
+ * Version using sequence counter only.
+ * This can be used when code has its own mutex protecting the
+ * updating starting before the write_seqcountbeqin() and ending
+ * after the write_seqcount_end().
+ */
+-
+ typedef struct seqcount {
+ unsigned sequence;
+ } seqcount_t;
+@@ -207,7 +138,6 @@ static inline int __read_seqcount_retry(
+ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
+ {
+ smp_rmb();
+-
+ return __read_seqcount_retry(s, start);
+ }
+
+@@ -241,21 +171,101 @@ static inline void write_seqcount_barrie
+ s->sequence+=2;
+ }
+
++typedef struct {
++ struct seqcount seqcount;
++ spinlock_t lock;
++} seqlock_t;
++
+ /*
+- * Possible sw/hw IRQ protected versions of the interfaces.
++ * These macros triggered gcc-3.x compile-time problems. We think these are
++ * OK now. Be cautious.
+ */
++#define __SEQLOCK_UNLOCKED(lockname) \
++ { \
++ .seqcount = SEQCNT_ZERO, \
++ .lock = __SPIN_LOCK_UNLOCKED(lockname) \
++ }
++
++#define seqlock_init(x) \
++ do { \
++ seqcount_init(&(x)->seqcount); \
++ spin_lock_init(&(x)->lock); \
++ } while (0)
++
++#define DEFINE_SEQLOCK(x) \
++ seqlock_t x = __SEQLOCK_UNLOCKED(x)
++
++/*
++ * Read side functions for starting and finalizing a read side section.
++ */
++static inline unsigned read_seqbegin(const seqlock_t *sl)
++{
++ return read_seqcount_begin(&sl->seqcount);
++}
++
++static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
++{
++ return read_seqcount_retry(&sl->seqcount, start);
++}
++
++/*
++ * Lock out other writers and update the count.
++ * Acts like a normal spin_lock/unlock.
++ * Don't need preempt_disable() because that is in the spin_lock already.
++ */
++static inline void write_seqlock(seqlock_t *sl)
++{
++ spin_lock(&sl->lock);
++ write_seqcount_begin(&sl->seqcount);
++}
++
++static inline void write_sequnlock(seqlock_t *sl)
++{
++ write_seqcount_end(&sl->seqcount);
++ spin_unlock(&sl->lock);
++}
++
++static inline void write_seqlock_bh(seqlock_t *sl)
++{
++ spin_lock_bh(&sl->lock);
++ write_seqcount_begin(&sl->seqcount);
++}
++
++static inline void write_sequnlock_bh(seqlock_t *sl)
++{
++ write_seqcount_end(&sl->seqcount);
++ spin_unlock_bh(&sl->lock);
++}
++
++static inline void write_seqlock_irq(seqlock_t *sl)
++{
++ spin_lock_irq(&sl->lock);
++ write_seqcount_begin(&sl->seqcount);
++}
++
++static inline void write_sequnlock_irq(seqlock_t *sl)
++{
++ write_seqcount_end(&sl->seqcount);
++ spin_unlock_irq(&sl->lock);
++}
++
++static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&sl->lock, flags);
++ write_seqcount_begin(&sl->seqcount);
++ return flags;
++}
++
+ #define write_seqlock_irqsave(lock, flags) \
+- do { local_irq_save(flags); write_seqlock(lock); } while (0)
+-#define write_seqlock_irq(lock) \
+- do { local_irq_disable(); write_seqlock(lock); } while (0)
+-#define write_seqlock_bh(lock) \
+- do { local_bh_disable(); write_seqlock(lock); } while (0)
+-
+-#define write_sequnlock_irqrestore(lock, flags) \
+- do { write_sequnlock(lock); local_irq_restore(flags); } while(0)
+-#define write_sequnlock_irq(lock) \
+- do { write_sequnlock(lock); local_irq_enable(); } while(0)
+-#define write_sequnlock_bh(lock) \
+- do { write_sequnlock(lock); local_bh_enable(); } while(0)
++ do { flags = __write_seqlock_irqsave(lock); } while (0)
++
++static inline void
++write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
++{
++ write_seqcount_end(&sl->seqcount);
++ spin_unlock_irqrestore(&sl->lock, flags);
++}
+
+ #endif /* __LINUX_SEQLOCK_H */
diff --git a/patches/series b/patches/series
new file mode 100644
index 0000000..068b68b
--- /dev/null
+++ b/patches/series
@@ -0,0 +1,603 @@
+###########################################################
+# DELTA against a known Linus release
+###########################################################
+
+############################################################
+# UPSTREAM changes queued
+############################################################
+fix-1-2-slub-do-not-dereference-null-pointer-in-node_match.patch
+fix-2-2-slub-tid-must-be-retrieved-from-the-percpu-area-of-the-current-processor.patch
+fix-rq-3elock-vs-logbuf_lock-unlock-race.patch
+genirq-add-default-mask-cmdline-option.patch
+of-fixup-resursive-locking-code-paths.patch
+of-convert-devtree-lock-from-rw_lock-to-raw-spinlock.patch
+
+locking-various-init-fixes.patch
+intel_idle-convert-i7300_idle_lock-to-raw-spinlock.patch
+ntp-make-ntp-lock-raw-sigh.patch
+seqlock-remove-unused-functions.patch
+seqlock-use-seqcount.patch
+generic-cmpxchg-use-raw-local-irq.patch
+
+0001-of-fix-recursive-locking-in-of_get_next_available_ch.patch
+
+############################################################
+# UPSTREAM FIXES, patches pending
+############################################################
+
+############################################################
+# Stuff broken upstream, patches submitted
+############################################################
+
+############################################################
+# Stuff which needs addressing upstream, but requires more
+# information
+############################################################
+x86-hpet-disable-msi-on-lenovo-w510.patch
+
+############################################################
+# Stuff broken upstream, need to be sent
+############################################################
+
+############################################################
+# Submitted on LKML
+############################################################
+early-printk-consolidate.patch
+
+# SRCU
+0001-kernel-srcu-merge-common-code-into-a-macro.patch
+0002-kernel-SRCU-provide-a-static-initializer.patch
+
+############################################################
+# Submitted to mips ML
+############################################################
+
+############################################################
+# Submitted to ARM ML
+############################################################
+arm-mark-pmu-interupt-no-thread.patch
+arm-allow-irq-threading.patch
+
+############################################################
+# Submitted to PPC ML
+############################################################
+ppc-mark-low-level-handlers-no-thread.patch
+
+############################################################
+# Submitted on LKML
+############################################################
+
+timekeeping-do-not-calc-crap-over-and-over.patch
+timekeeping-make-jiffies-lock-internal.patch
+timekeeping-move-lock-out-of-timekeeper.patch
+timekeeping-split-timekeeper-lock.patch
+timekeeping-store-cycle-last-in-timekeeper.patch
+timekeeping-delay-clock-cycle-last-update.patch
+timekeeping-implement-shadow-timekeeper.patch
+timekeeping-shorten-seq-count-region.patch
+
+############################################################
+# Submitted to net-dev
+############################################################
+
+############################################################
+# Pending in tip
+############################################################
+
+############################################################
+# Stuff which should go upstream ASAP
+############################################################
+
+# SCHED BLOCK/WQ
+block-shorten-interrupt-disabled-regions.patch
+
+# Timekeeping split jiffies lock. Needs a good argument :)
+timekeeping-split-jiffies-lock.patch
+
+# CHECKME: Should local_irq_enable() generally do a preemption check ?
+mips-enable-interrupts-in-signal.patch
+
+# Tracing
+tracing-account-for-preempt-off-in-preempt_schedule.patch
+
+# PTRACE/SIGNAL crap
+signal-revert-ptrace-preempt-magic.patch
+
+# ARM IRQF_NO_TRHEAD / IRQ THREADING SUPPORT
+arm-convert-boot-lock-to-raw.patch
+arm-omap-make-wakeupgen_lock-raw.patch
+
+# PREEMPT_ENABLE_NO_RESCHED
+
+# SIGNALS / POSIXTIMERS
+posix-timers-no-broadcast.patch
+signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
+oleg-signal-rt-fix.patch
+
+# SCHED
+
+# GENERIC CMPXCHG
+
+# SHORTEN PREEMPT DISABLED
+drivers-random-reduce-preempt-disabled-region.patch
+
+# CLOCKSOURCE
+arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch
+clocksource-tclib-allow-higher-clockrates.patch
+
+# DRIVERS NET
+drivers-net-tulip-add-missing-pci-disable.patch
+drivers-net-8139-disable-irq-nosync.patch
+
+# PREEMPT
+
+# PAGEFAULT DISABLE
+mm-prepare-pf-disable-discoupling.patch
+arch-use-pagefault-disabled.patch
+peter_zijlstra-frob-pagefault_disable.patch
+peterz-raw_pagefault_disable.patch
+# highmem-explicitly-disable-preemption.patch -- peterz
+filemap-fix-up.patch
+mm-remove-preempt-count-from-pf.patch
+
+# PM
+suspend-prevernt-might-sleep-splats.patch
+
+# MM/LISTS
+list-add-list-last-entry.patch
+mm-page-alloc-use-list-last-entry.patch
+mm-slab-move-debug-out.patch
+
+# INCLUDE MESS
+pid-h-include-atomic-h.patch
+sysctl-include-atomic-h.patch
+
+# NETWORKING
+net-flip-lock-dep-thingy.patch
+
+# SOFTIRQ
+softirq-thread-do-softirq.patch
+softirq-split-out-code.patch
+
+# X86
+x86-io-apic-migra-no-unmask.patch
+fix-rt-int3-x86_32-3.2-rt.patch
+
+# RCU
+
+# LOCKING INIT FIXES
+
+# PCI
+pci-access-use-__wake_up_all_locked.patch
+
+#####################################################
+# Stuff which should go mainline, but wants some care
+#####################################################
+
+# SEQLOCK
+
+# ANON RW SEMAPHORES
+
+# TRACING
+latency-hist.patch
+
+# HW LATENCY DETECTOR - this really wants a rewrite
+# HW latency detector
+hwlatdetect.patch
+
+##################################################
+# REAL RT STUFF starts here
+##################################################
+
+# Add RT to version
+localversion.patch
+
+# PRINTK
+printk-kill.patch
+printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch
+
+# BASE RT CONFIG
+rt-preempt-base-config.patch
+
+# WARN/BUG_ON_RT
+bug-rt-dependend-variants.patch
+
+# LOCAL_IRQ_RT/NON_RT
+local-irq-rt-depending-variants.patch
+
+# PREEMPT NORT
+preempt-nort-rt-variants.patch
+
+# ANNOTATE local_irq_disable sites
+ata-disable-interrupts-if-non-rt.patch
+ide-use-nort-local-irq-variants.patch
+infiniband-mellanox-ib-use-nort-irq.patch
+inpt-gameport-use-local-irq-nort.patch
+acpi-use-local-irq-nort.patch
+user-use-local-irq-nort.patch
+resource-counters-use-localirq-nort.patch
+usb-hcd-use-local-irq-nort.patch
+mm-scatterlist-dont-disable-irqs-on-RT.patch
+
+# Sigh
+signal-fix-up-rcu-wreckage.patch
+
+# ANNOTATE BUG/WARNON
+net-wireless-warn-nort.patch
+
+# BIT SPINLOCKS - SIGH
+mm-cgroup-page-bit-spinlock.patch
+fs-replace-bh_uptodate_lock-for-rt.patch
+fs-jbd-replace-bh_state-lock.patch
+
+# GENIRQ
+genirq-nodebug-shirq.patch
+genirq-disable-irqpoll-on-rt.patch
+genirq-force-threading.patch
+
+# DRIVERS NET
+drivers-net-fix-livelock-issues.patch
+drivers-net-vortex-fix-locking-issues.patch
+drivers-net-gianfar-make-rt-aware.patch
+
+# DRIVERS USB
+# Revisit. Looks weird
+#usb-rt-support.patch
+usb-fix-mouse-problem-copying-large-data.patch
+
+# LOCAL_IRQ_LOCKS
+local-var.patch
+rt-local-irq-lock.patch
+cpu-rt-variants.patch
+
+# MM SLAB
+mm-slab-wrap-functions.patch
+mm-slab-more-lock-breaks.patch
+
+# MM PAGE_ALLOC
+mm-page_alloc-rt-friendly-per-cpu-pages.patch
+mm-page_alloc-reduce-lock-sections-further.patch
+mm-page-alloc-fix.patch
+
+# MM SWAP
+mm-convert-swap-to-percpu-locked.patch
+
+# MM vmstat
+mm-make-vmstat-rt-aware.patch
+
+# MM memory
+#mm-memory-rt.patch - ZAP... is unused
+mm-shrink-the-page-frame-to-rt-size.patch
+re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch
+
+# MM SLAB only
+mm-allow-slab-rt.patch
+
+# Revisit for avr/frv/ia64/mn10300/sh/sparc ...
+#mm-quicklists-percpu-locked.patch
+
+# RADIX TREE
+radix-tree-rt-aware.patch
+
+# PANIC
+panic-disable-random-on-rt.patch
+
+# IPC
+ipc-make-rt-aware.patch
+ipc-mqueue-add-a-critical-section-to-avoid-a-deadlock.patch
+
+# RELAY
+relay-fix-timer-madness.patch
+
+# NETWORKING
+
+# WORKQUEUE SIGH
+
+# TIMERS
+timers-prepare-for-full-preemption.patch
+timers-preempt-rt-support.patch
+timers-mov-printk_tick-to-soft-interrupt.patch
+timer-delay-waking-softirqs-from-the-jiffy-tick.patch
+timers-avoid-the-base-null-otptimization-on-rt.patch
+
+# More PRINTK
+rfc-printk-don-27t-call-printk_tick-in-printk_needs_cpu.patch
+
+# HRTIMERS
+hrtimers-prepare-full-preemption.patch
+hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch
+timer-fd-avoid-live-lock.patch
+
+# POSIX-CPU-TIMERS
+posix-timers-thread-posix-cpu-timers-on-rt.patch
+posix-timers-shorten-cpu-timers-thread.patch
+posix-timers-avoid-wakeups-when-no-timers-are-active.patch
+
+# SCHEDULER
+sched-delay-put-task.patch
+sched-limit-nr-migrate.patch
+sched-mmdrop-delayed.patch
+sched-rt-mutex-wakeup.patch
+sched-might-sleep-do-not-account-rcu-depth.patch
+# CHECKME sched-load-balance-break-on-rq-contention.patch
+sched-cond-resched.patch
+cond-resched-softirq-rt.patch
+cond-resched-lock-rt-tweak.patch
+sched-disable-ttwu-queue.patch
+sched-disable-rt-group-sched-on-rt.patch
+sched-ttwu-ensure-success-return-is-correct.patch
+
+# STOP MACHINE
+stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch
+stomp-machine-mark-stomper-thread.patch
+stomp-machine-raw-lock.patch
+
+# MIGRATE DISABLE AND PER CPU
+hotplug-light-get-online-cpus.patch
+hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch
+re-migrate_disable-race-with-cpu-hotplug-3f.patch
+sched-migrate-disable.patch
+hotplug-use-migrate-disable.patch
+hotplug-call-cpu_unplug_begin-a-little-early.patch
+
+ftrace-migrate-disable-tracing.patch
+rt-tracing-show-padding-as-unsigned-short.patch
+
+migrate-disable-rt-variant.patch
+peter_zijlstra-frob-migrate_disable.patch
+peter_zijlstra-frob-migrate_disable-2.patch
+sched-rt-fix-migrate_enable-thinko.patch
+sched-teach-migrate_disable-about-atomic-contexts.patch
+rt-sched-postpone-actual-migration-disalbe-to-schedule.patch
+rt-sched-do-not-compare-cpu-masks-in-scheduler.patch
+rt-sched-have-migrate_disable-ignore-bounded-threads.patch
+sched-clear-pf-thread-bound-on-fallback-rq.patch
+
+# FTRACE
+# XXX checkme ftrace-crap.patch
+# CHECKME rt-ring-buffer-convert-reader_lock-from-raw_spin_lock-into-spin_lock.patch
+# CHECKME rfc-ring-buffer-rt-check-for-irqs-disabled-before-grabbing-reader-lock.patch
+
+# NETWORKING
+net-netif_rx_ni-migrate-disable.patch
+
+# NOHZ
+softirq-sanitize-softirq-pending.patch
+
+# LOCKDEP
+lockdep-no-softirq-accounting-on-rt.patch
+
+# SOFTIRQ local lock
+mutex-no-spin-on-rt.patch
+softirq-local-lock.patch
+softirq-export-in-serving-softirq.patch
+harirq-h-define-softirq_count-as-oul-to-kill-build-warning.patch
+# XXX checkme softirq-fix-unplug-deadlock.patch
+softirq-disable-softirq-stacks-for-rt.patch
+softirq-make-fifo.patch
+tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch
+irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch
+
+# LOCAL VARS and GETCPU STUFF
+local-vars-migrate-disable.patch
+
+# RAID5
+md-raid5-percpu-handling-rt-aware.patch
+
+# FUTEX/RTMUTEX
+rtmutex-futex-prepare-rt.patch
+futex-requeue-pi-fix.patch
+0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch
+
+# RTMUTEX
+rtmutex-lock-killable.patch
+rt-mutex-add-sleeping-spinlocks-support.patch
+spinlock-types-separate-raw.patch
+rtmutex-avoid-include-hell.patch
+rt-add-rt-spinlock-to-headers.patch
+rt-add-rt-to-mutex-headers.patch
+rwsem-add-rt-variant.patch
+rt-add-rt-locks.patch
+
+# RTMUTEX Fallout
+tasklist-lock-fix-section-conflict.patch
+
+# NOHZ/RTMUTEX
+timer-handle-idle-trylock-in-get-next-timer-irq.patch
+
+# RCU
+peter_zijlstra-frob-rcu.patch
+rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch
+rcu-tiny-merge-bh.patch
+patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch
+
+# LGLOCKS - lovely
+lglocks-rt.patch
+
+# DRIVERS SERIAL
+drivers-serial-cleanup-locking-for-rt.patch
+drivers-serial-call-flush_to_ldisc-when-the-irq-is-t.patch
+drivers-tty-fix-omap-lock-crap.patch
+drivers-tty-pl011-irq-disable-madness.patch
+rt-serial-warn-fix.patch
+
+# FS
+fs-namespace-preemption-fix.patch
+mm-protect-activate-switch-mm.patch
+fs-block-rt-support.patch
+fs-ntfs-disable-interrupt-non-rt.patch
+
+# X86
+x86-mce-timer-hrtimer.patch
+x86-stackprot-no-random-on-rt.patch
+x86-use-gen-rwsem-spinlocks-rt.patch
+x86-disable-debug-stack.patch
+
+# CPU get light
+epoll-use-get-cpu-light.patch
+mm-vmalloc-use-get-cpu-light.patch
+
+# WORKQUEUE more fixes
+workqueue-use-locallock.patch
+# CHECKME workqueue-sanity.patch
+# CHECKME workqueue-fix-PF_THREAD_BOUND.patch
+# CHECKME workqueue-hotplug-fix.patch
+# CHECKME workqueue-more-hotplug-fallout.patch
+
+# DEBUGOBJECTS
+debugobjects-rt.patch
+
+# JUMPLABEL
+jump-label-rt.patch
+
+# NET
+skbufhead-raw-lock.patch
+
+# PERF
+perf-move-irq-work-to-softirq-in-rt.patch
+
+# CONSOLE. NEEDS more thought !!!
+printk-rt-aware.patch
+
+# POWERC
+power-use-generic-rwsem-on-rt.patch
+power-disable-highmem-on-rt.patch
+
+# ARM
+arm-disable-highmem-on-rt.patch
+arm-at91-tclib-default-to-tclib-timer-for-rt.patch
+
+# MIPS
+mips-disable-highmem-on-rt.patch
+
+# NETWORK livelock fix
+net-tx-action-avoid-livelock-on-rt.patch
+
+# NETWORK DEBUGGING AID
+ping-sysrq.patch
+
+# KGDB
+kgb-serial-hackaround.patch
+
+# SYSFS - RT indicator
+sysfs-realtime-entry.patch
+
+# KMAP/HIGHMEM
+mm-rt-kmap-atomic-scheduling.patch
+0002-x86-highmem-add-a-already-used-pte-check.patch
+0003-arm-highmem-flush-tlb-on-unmap.patch
+arm-enable-highmem-for-rt.patch
+
+# IPC
+ipc-sem-rework-semaphore-wakeups.patch
+
+# SYSRQ
+
+# KVM require constant freq TSC (smp function call -> cpufreq)
+x86-kvm-require-const-tsc-for-rt.patch
+
+# SCSI/FCOE
+scsi-fcoe-rt-aware.patch
+
+# X86 crypto
+x86-crypto-reduce-preempt-disabled-regions.patch
+
+# Device mapper
+dm-make-rt-aware.patch
+
+# ACPI
+# Dropped those two as they cause a scheduling in atomic failure and
+# we have no clue why we made those locks raw in the first place.
+# acpi-make-gbl-hardware-lock-raw.patch
+# acpi-make-ec-lock-raw-as-well.patch
+
+# This one is just a follow up to the raw spin locks
+# Simple raw spinlock based waitqueue
+# wait-simple-version.patch
+# acpi-gpe-use-wait-simple.patch
+
+# CPUMASK OFFSTACK
+cpumask-disable-offstack-on-rt.patch
+
+# Various fixes - fold them back
+seqlock-prevent-rt-starvation.patch
+#fs-protect-opencoded-isize-seqcount.patch
+#net-u64-stat-protect-seqcount.patch
+rfc-sched-rt-fix-wait_task_interactive-to-test-rt_spin_lock-state.patch
+
+cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
+
+softirq-preempt-fix-3-re.patch
+scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch
+upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch
+
+# FS LIVELOCK PREVENTION
+rt-introduce-cpu-chill.patch
+fs-dcache-use-cpu-chill-in-trylock-loops.patch
+net-use-cpu-chill.patch
+
+# LOCKDEP
+lockdep-selftest-convert-spinlock-to-raw-spinlock.patch
+lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch
+
+fs-jbd-pull-plug-when-waiting-for-space.patch
+perf-make-swevent-hrtimer-irqsafe.patch
+cpu-rt-rework-cpu-down.patch
+
+# Stable-rt stuff: Fold back when Steve grabbed it
+random-make-it-work-on-rt.patch
+softirq-init-softirq-local-lock-after-per-cpu-section-is-set-up.patch
+mm-page-alloc-use-local-lock-on-target-cpu.patch
+rt-rw-lockdep-annotations.patch
+sched-better-debug-output-for-might-sleep.patch
+stomp-machine-deal-clever-with-stopper-lock.patch
+
+# 3.6 specific updates
+net-another-local-irq-disable-alloc-atomic-headache.patch
+net-use-cpu-light-in-ip-send-unicast-reply.patch
+peterz-srcu-crypto-chain.patch
+x86-perf-uncore-deal-with-kfree.patch
+softirq-make-serving-softirqs-a-task-flag.patch
+softirq-split-handling-function.patch
+softirq-split-locks.patch
+
+rcu-tiny-solve-rt-mistery.patch
+mm-enable-slub.patch
+hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch
+
+rcu-disable-rcu-fast-no-hz-on-rt.patch
+net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch
+softirq-adapt-nohz-pending-debug-code-to-new-scheme.patch
+net-netif-rx-ni-use-local-bh-disable.patch
+
+preempt-lazy-support.patch
+x86-preempt-lazy.patch
+arm-preempt-lazy-support.patch
+
+# 3.8 changes
+net-make-devnet_rename_seq-a-mutex.patch
+powerpc-fsl-msi-use-a-different-locklcass-for-the-ca.patch
+i2c-omap-drop-the-lock-hard-irq-context.patch
+spi-omap-mcspi-check-condition-also-after-timeout.patch
+HACK-printk-drop-the-logbuf_lock-more-often.patch
+
+# Enable full RT
+powerpc-preempt-lazy-support.patch
+wait-simple-implementation.patch
+rcutiny-use-simple-waitqueue.patch
+treercu-use-simple-waitqueue.patch
+sched-adjust-reset-on-fork-always.patch
+sched-enqueue-to-head.patch
+sched-consider-pi-boosting-in-setscheduler.patch
+block-use-cpu-chill.patch
+
+mm-bounce-local-irq-save-nort.patch
+mmci-remove-bogus-irq-save.patch
+slub-enable-irqs-for-no-wait.patch
+idle-state.patch
+might-sleep-check-for-idle.patch
+wait-simple-rework-for-completions.patch
+completion-use-simple-wait-queues.patch
+
+kconfig-disable-a-few-options-rt.patch
+kconfig-preempt-rt-full.patch
diff --git a/patches/signal-fix-up-rcu-wreckage.patch b/patches/signal-fix-up-rcu-wreckage.patch
new file mode 100644
index 0000000..bee7aba
--- /dev/null
+++ b/patches/signal-fix-up-rcu-wreckage.patch
@@ -0,0 +1,35 @@
+Subject: signal-fix-up-rcu-wreckage.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 22 Jul 2011 08:07:08 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/signal.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -1391,12 +1391,12 @@ struct sighand_struct *__lock_task_sigha
+ struct sighand_struct *sighand;
+
+ for (;;) {
+- local_irq_save(*flags);
++ local_irq_save_nort(*flags);
+ rcu_read_lock();
+ sighand = rcu_dereference(tsk->sighand);
+ if (unlikely(sighand == NULL)) {
+ rcu_read_unlock();
+- local_irq_restore(*flags);
++ local_irq_restore_nort(*flags);
+ break;
+ }
+
+@@ -1407,7 +1407,7 @@ struct sighand_struct *__lock_task_sigha
+ }
+ spin_unlock(&sighand->siglock);
+ rcu_read_unlock();
+- local_irq_restore(*flags);
++ local_irq_restore_nort(*flags);
+ }
+
+ return sighand;
diff --git a/patches/signal-revert-ptrace-preempt-magic.patch b/patches/signal-revert-ptrace-preempt-magic.patch
new file mode 100644
index 0000000..6a07b93
--- /dev/null
+++ b/patches/signal-revert-ptrace-preempt-magic.patch
@@ -0,0 +1,27 @@
+Subject: signal-revert-ptrace-preempt-magic.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 21 Sep 2011 19:57:12 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/signal.c | 8 --------
+ 1 file changed, 8 deletions(-)
+
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -1902,15 +1902,7 @@ static void ptrace_stop(int exit_code, i
+ if (gstop_done && ptrace_reparented(current))
+ do_notify_parent_cldstop(current, false, why);
+
+- /*
+- * Don't want to allow preemption here, because
+- * sys_ptrace() needs this task to be inactive.
+- *
+- * XXX: implement read_unlock_no_resched().
+- */
+- preempt_disable();
+ read_unlock(&tasklist_lock);
+- preempt_enable_no_resched();
+ freezable_schedule();
+ } else {
+ /*
diff --git a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
new file mode 100644
index 0000000..809a02f
--- /dev/null
+++ b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
@@ -0,0 +1,205 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:56 -0500
+Subject: signals: Allow rt tasks to cache one sigqueue struct
+
+To avoid allocation allow rt tasks to cache one sigqueue struct in
+task struct.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/sched.h | 1
+ include/linux/signal.h | 1
+ kernel/exit.c | 2 -
+ kernel/fork.c | 1
+ kernel/signal.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++---
+ 5 files changed, 83 insertions(+), 5 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1407,6 +1407,7 @@ struct task_struct {
+ /* signal handlers */
+ struct signal_struct *signal;
+ struct sighand_struct *sighand;
++ struct sigqueue *sigqueue_cache;
+
+ sigset_t blocked, real_blocked;
+ sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
+--- a/include/linux/signal.h
++++ b/include/linux/signal.h
+@@ -226,6 +226,7 @@ static inline void init_sigpending(struc
+ }
+
+ extern void flush_sigqueue(struct sigpending *queue);
++extern void flush_task_sigqueue(struct task_struct *tsk);
+
+ /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
+ static inline int valid_signal(unsigned long sig)
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -143,7 +143,7 @@ static void __exit_signal(struct task_st
+ * Do this under ->siglock, we can race with another thread
+ * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
+ */
+- flush_sigqueue(&tsk->pending);
++ flush_task_sigqueue(tsk);
+ tsk->sighand = NULL;
+ spin_unlock(&sighand->siglock);
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1230,6 +1230,7 @@ static struct task_struct *copy_process(
+ spin_lock_init(&p->alloc_lock);
+
+ init_sigpending(&p->pending);
++ p->sigqueue_cache = NULL;
+
+ p->utime = p->stime = p->gtime = 0;
+ p->utimescaled = p->stimescaled = 0;
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -348,13 +348,45 @@ static bool task_participate_group_stop(
+ return false;
+ }
+
++#ifdef __HAVE_ARCH_CMPXCHG
++static inline struct sigqueue *get_task_cache(struct task_struct *t)
++{
++ struct sigqueue *q = t->sigqueue_cache;
++
++ if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
++ return NULL;
++ return q;
++}
++
++static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
++{
++ if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
++ return 0;
++ return 1;
++}
++
++#else
++
++static inline struct sigqueue *get_task_cache(struct task_struct *t)
++{
++ return NULL;
++}
++
++static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
++{
++ return 1;
++}
++
++#endif
++
+ /*
+ * allocate a new signal queue record
+ * - this may be called without locks if and only if t == current, otherwise an
+ * appropriate lock must be held to stop the target task from exiting
+ */
+ static struct sigqueue *
+-__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
++__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
++ int override_rlimit, int fromslab)
+ {
+ struct sigqueue *q = NULL;
+ struct user_struct *user;
+@@ -371,7 +403,10 @@ __sigqueue_alloc(int sig, struct task_st
+ if (override_rlimit ||
+ atomic_read(&user->sigpending) <=
+ task_rlimit(t, RLIMIT_SIGPENDING)) {
+- q = kmem_cache_alloc(sigqueue_cachep, flags);
++ if (!fromslab)
++ q = get_task_cache(t);
++ if (!q)
++ q = kmem_cache_alloc(sigqueue_cachep, flags);
+ } else {
+ print_dropped_signal(sig);
+ }
+@@ -388,6 +423,13 @@ __sigqueue_alloc(int sig, struct task_st
+ return q;
+ }
+
++static struct sigqueue *
++__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
++ int override_rlimit)
++{
++ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
++}
++
+ static void __sigqueue_free(struct sigqueue *q)
+ {
+ if (q->flags & SIGQUEUE_PREALLOC)
+@@ -397,6 +439,21 @@ static void __sigqueue_free(struct sigqu
+ kmem_cache_free(sigqueue_cachep, q);
+ }
+
++static void sigqueue_free_current(struct sigqueue *q)
++{
++ struct user_struct *up;
++
++ if (q->flags & SIGQUEUE_PREALLOC)
++ return;
++
++ up = q->user;
++ if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
++ atomic_dec(&up->sigpending);
++ free_uid(up);
++ } else
++ __sigqueue_free(q);
++}
++
+ void flush_sigqueue(struct sigpending *queue)
+ {
+ struct sigqueue *q;
+@@ -410,6 +467,21 @@ void flush_sigqueue(struct sigpending *q
+ }
+
+ /*
++ * Called from __exit_signal. Flush tsk->pending and
++ * tsk->sigqueue_cache
++ */
++void flush_task_sigqueue(struct task_struct *tsk)
++{
++ struct sigqueue *q;
++
++ flush_sigqueue(&tsk->pending);
++
++ q = get_task_cache(tsk);
++ if (q)
++ kmem_cache_free(sigqueue_cachep, q);
++}
++
++/*
+ * Flush all pending signals for a task.
+ */
+ void __flush_signals(struct task_struct *t)
+@@ -561,7 +633,7 @@ static void collect_signal(int sig, stru
+ still_pending:
+ list_del_init(&first->list);
+ copy_siginfo(info, &first->info);
+- __sigqueue_free(first);
++ sigqueue_free_current(first);
+ } else {
+ /*
+ * Ok, it wasn't in the queue. This must be
+@@ -607,6 +679,8 @@ int dequeue_signal(struct task_struct *t
+ {
+ int signr;
+
++ WARN_ON_ONCE(tsk != current);
++
+ /* We only dequeue private signals from ourselves, we don't let
+ * signalfd steal them
+ */
+@@ -1545,7 +1619,8 @@ EXPORT_SYMBOL(kill_pid);
+ */
+ struct sigqueue *sigqueue_alloc(void)
+ {
+- struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
++ /* Preallocated sigqueue objects always from the slabcache ! */
++ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
+
+ if (q)
+ q->flags |= SIGQUEUE_PREALLOC;
diff --git a/patches/skbufhead-raw-lock.patch b/patches/skbufhead-raw-lock.patch
new file mode 100644
index 0000000..3f27b9e
--- /dev/null
+++ b/patches/skbufhead-raw-lock.patch
@@ -0,0 +1,127 @@
+Subject: skbufhead-raw-lock.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 12 Jul 2011 15:38:34 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/netdevice.h | 1 +
+ include/linux/skbuff.h | 7 +++++++
+ net/core/dev.c | 26 ++++++++++++++++++++------
+ 3 files changed, 28 insertions(+), 6 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1783,6 +1783,7 @@ struct softnet_data {
+ unsigned int dropped;
+ struct sk_buff_head input_pkt_queue;
+ struct napi_struct backlog;
++ struct sk_buff_head tofree_queue;
+ };
+
+ static inline void input_queue_head_incr(struct softnet_data *sd)
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -132,6 +132,7 @@ struct sk_buff_head {
+
+ __u32 qlen;
+ spinlock_t lock;
++ raw_spinlock_t raw_lock;
+ };
+
+ struct sk_buff;
+@@ -1008,6 +1009,12 @@ static inline void skb_queue_head_init(s
+ __skb_queue_head_init(list);
+ }
+
++static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
++{
++ raw_spin_lock_init(&list->raw_lock);
++ __skb_queue_head_init(list);
++}
++
+ static inline void skb_queue_head_init_class(struct sk_buff_head *list,
+ struct lock_class_key *class)
+ {
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -225,14 +225,14 @@ static inline struct hlist_head *dev_ind
+ static inline void rps_lock(struct softnet_data *sd)
+ {
+ #ifdef CONFIG_RPS
+- spin_lock(&sd->input_pkt_queue.lock);
++ raw_spin_lock(&sd->input_pkt_queue.raw_lock);
+ #endif
+ }
+
+ static inline void rps_unlock(struct softnet_data *sd)
+ {
+ #ifdef CONFIG_RPS
+- spin_unlock(&sd->input_pkt_queue.lock);
++ raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
+ #endif
+ }
+
+@@ -3528,7 +3528,7 @@ static void flush_backlog(void *arg)
+ skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
+ if (skb->dev == dev) {
+ __skb_unlink(skb, &sd->input_pkt_queue);
+- kfree_skb(skb);
++ __skb_queue_tail(&sd->tofree_queue, skb);
+ input_queue_head_incr(sd);
+ }
+ }
+@@ -3537,10 +3537,13 @@ static void flush_backlog(void *arg)
+ skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
+ if (skb->dev == dev) {
+ __skb_unlink(skb, &sd->process_queue);
+- kfree_skb(skb);
++ __skb_queue_tail(&sd->tofree_queue, skb);
+ input_queue_head_incr(sd);
+ }
+ }
++
++ if (!skb_queue_empty(&sd->tofree_queue))
++ raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ }
+
+ static int napi_gro_complete(struct sk_buff *skb)
+@@ -4045,10 +4048,17 @@ static void net_rx_action(struct softirq
+ struct softnet_data *sd = &__get_cpu_var(softnet_data);
+ unsigned long time_limit = jiffies + 2;
+ int budget = netdev_budget;
++ struct sk_buff *skb;
+ void *have;
+
+ local_irq_disable();
+
++ while ((skb = __skb_dequeue(&sd->tofree_queue))) {
++ local_irq_enable();
++ kfree_skb(skb);
++ local_irq_disable();
++ }
++
+ while (!list_empty(&sd->poll_list)) {
+ struct napi_struct *n;
+ int work, weight;
+@@ -6530,6 +6540,9 @@ static int dev_cpu_callback(struct notif
+ netif_rx(skb);
+ input_queue_head_incr(oldsd);
+ }
++ while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
++ kfree_skb(skb);
++ }
+
+ return NOTIFY_OK;
+ }
+@@ -6802,8 +6815,9 @@ static int __init net_dev_init(void)
+ struct softnet_data *sd = &per_cpu(softnet_data, i);
+
+ memset(sd, 0, sizeof(*sd));
+- skb_queue_head_init(&sd->input_pkt_queue);
+- skb_queue_head_init(&sd->process_queue);
++ skb_queue_head_init_raw(&sd->input_pkt_queue);
++ skb_queue_head_init_raw(&sd->process_queue);
++ skb_queue_head_init_raw(&sd->tofree_queue);
+ sd->completion_queue = NULL;
+ INIT_LIST_HEAD(&sd->poll_list);
+ sd->output_queue = NULL;
diff --git a/patches/slub-enable-irqs-for-no-wait.patch b/patches/slub-enable-irqs-for-no-wait.patch
new file mode 100644
index 0000000..cd0904f
--- /dev/null
+++ b/patches/slub-enable-irqs-for-no-wait.patch
@@ -0,0 +1,46 @@
+Subject: slub: Enable irqs for __GFP_WAIT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 09 Jan 2013 12:08:15 +0100
+
+SYSTEM_RUNNING might be too late for enabling interrupts. Allocations
+with GFP_WAIT can happen before that. So use this as an indicator.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ mm/slub.c | 13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1280,14 +1280,15 @@ static struct page *allocate_slab(struct
+ struct page *page;
+ struct kmem_cache_order_objects oo = s->oo;
+ gfp_t alloc_gfp;
++ bool enableirqs;
+
+ flags &= gfp_allowed_mask;
+
++ enableirqs = (flags & __GFP_WAIT) != 0;
+ #ifdef CONFIG_PREEMPT_RT_FULL
+- if (system_state == SYSTEM_RUNNING)
+-#else
+- if (flags & __GFP_WAIT)
++ enableirqs |= system_state == SYSTEM_RUNNING;
+ #endif
++ if (enableirqs)
+ local_irq_enable();
+
+ flags |= s->allocflags;
+@@ -1327,11 +1328,7 @@ static struct page *allocate_slab(struct
+ kmemcheck_mark_unallocated_pages(page, pages);
+ }
+
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- if (system_state == SYSTEM_RUNNING)
+-#else
+- if (flags & __GFP_WAIT)
+-#endif
++ if (enableirqs)
+ local_irq_disable();
+ if (!page)
+ return NULL;
diff --git a/patches/softirq-adapt-nohz-pending-debug-code-to-new-scheme.patch b/patches/softirq-adapt-nohz-pending-debug-code-to-new-scheme.patch
new file mode 100644
index 0000000..f1599ff
--- /dev/null
+++ b/patches/softirq-adapt-nohz-pending-debug-code-to-new-scheme.patch
@@ -0,0 +1,174 @@
+Subject: softirq: Adapt NOHZ softirq pending check to new RT scheme
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 28 Oct 2012 13:46:16 +0000
+
+We can't rely on ksoftirqd anymore and we need to check the tasks
+which run a particular softirq and if such a task is pi blocked ignore
+the other pending bits of that task as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/softirq.c | 83 ++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 58 insertions(+), 25 deletions(-)
+
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -66,46 +66,71 @@ char *softirq_to_name[NR_SOFTIRQS] = {
+
+ #ifdef CONFIG_NO_HZ
+ # ifdef CONFIG_PREEMPT_RT_FULL
++
++struct softirq_runner {
++ struct task_struct *runner[NR_SOFTIRQS];
++};
++
++static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
++
++static inline void softirq_set_runner(unsigned int sirq)
++{
++ struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
++
++ sr->runner[sirq] = current;
++}
++
++static inline void softirq_clr_runner(unsigned int sirq)
++{
++ struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
++
++ sr->runner[sirq] = NULL;
++}
++
+ /*
+- * On preempt-rt a softirq might be blocked on a lock. There might be
+- * no other runnable task on this CPU because the lock owner runs on
+- * some other CPU. So we have to go into idle with the pending bit
+- * set. Therefor we need to check this otherwise we warn about false
+- * positives which confuses users and defeats the whole purpose of
+- * this test.
++ * On preempt-rt a softirq running context might be blocked on a
++ * lock. There might be no other runnable task on this CPU because the
++ * lock owner runs on some other CPU. So we have to go into idle with
++ * the pending bit set. Therefor we need to check this otherwise we
++ * warn about false positives which confuses users and defeats the
++ * whole purpose of this test.
+ *
+ * This code is called with interrupts disabled.
+ */
+ void softirq_check_pending_idle(void)
+ {
+ static int rate_limit;
+- u32 warnpending = 0, pending;
++ struct softirq_runner *sr = &__get_cpu_var(softirq_runners);
++ u32 warnpending;
++ int i;
+
+ if (rate_limit >= 10)
+ return;
+
+- pending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
+- if (pending) {
+- struct task_struct *tsk;
++ warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
++ for (i = 0; i < NR_SOFTIRQS; i++) {
++ struct task_struct *tsk = sr->runner[i];
+
+- tsk = __get_cpu_var(ksoftirqd);
+ /*
+ * The wakeup code in rtmutex.c wakes up the task
+ * _before_ it sets pi_blocked_on to NULL under
+ * tsk->pi_lock. So we need to check for both: state
+ * and pi_blocked_on.
+ */
+- raw_spin_lock(&tsk->pi_lock);
+-
+- if (!tsk->pi_blocked_on && !(tsk->state == TASK_RUNNING))
+- warnpending = 1;
+-
+- raw_spin_unlock(&tsk->pi_lock);
++ if (tsk) {
++ raw_spin_lock(&tsk->pi_lock);
++ if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
++ /* Clear all bits pending in that task */
++ warnpending &= ~(tsk->softirqs_raised);
++ warnpending &= ~(1 << i);
++ }
++ raw_spin_unlock(&tsk->pi_lock);
++ }
+ }
+
+ if (warnpending) {
+ printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
+- pending);
++ warnpending);
+ rate_limit++;
+ }
+ }
+@@ -125,6 +150,10 @@ void softirq_check_pending_idle(void)
+ }
+ }
+ # endif
++
++#else /* !NO_HZ */
++static inline void softirq_set_runner(unsigned int sirq) { }
++static inline void softirq_clr_runner(unsigned int sirq) { }
+ #endif
+
+ /*
+@@ -478,6 +507,7 @@ static void do_current_softirqs(int need
+ */
+ lock_softirq(i);
+ local_irq_disable();
++ softirq_set_runner(i);
+ /*
+ * Check with the local_softirq_pending() bits,
+ * whether we need to process this still or if someone
+@@ -488,6 +518,7 @@ static void do_current_softirqs(int need
+ set_softirq_pending(pending & ~mask);
+ do_single_softirq(i, need_rcu_bh_qs);
+ }
++ softirq_clr_runner(i);
+ unlock_softirq(i);
+ WARN_ON(current->softirq_nestcnt != 1);
+ }
+@@ -558,7 +589,7 @@ void thread_do_softirq(void)
+ }
+ }
+
+-void __raise_softirq_irqoff(unsigned int nr)
++static void do_raise_softirq_irqoff(unsigned int nr)
+ {
+ trace_softirq_raise(nr);
+ or_softirq_pending(1UL << nr);
+@@ -575,12 +606,19 @@ void __raise_softirq_irqoff(unsigned int
+ __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr);
+ }
+
++void __raise_softirq_irqoff(unsigned int nr)
++{
++ do_raise_softirq_irqoff(nr);
++ if (!in_irq() && !current->softirq_nestcnt)
++ wakeup_softirqd();
++}
++
+ /*
+ * This function must run with irqs disabled!
+ */
+ void raise_softirq_irqoff(unsigned int nr)
+ {
+- __raise_softirq_irqoff(nr);
++ do_raise_softirq_irqoff(nr);
+
+ /*
+ * If we're in an hard interrupt we let irq return code deal
+@@ -602,11 +640,6 @@ void raise_softirq_irqoff(unsigned int n
+ wakeup_softirqd();
+ }
+
+-void do_raise_softirq_irqoff(unsigned int nr)
+-{
+- raise_softirq_irqoff(nr);
+-}
+-
+ static inline int ksoftirqd_softirq_pending(void)
+ {
+ return current->softirqs_raised;
diff --git a/patches/softirq-disable-softirq-stacks-for-rt.patch b/patches/softirq-disable-softirq-stacks-for-rt.patch
new file mode 100644
index 0000000..2d42abe
--- /dev/null
+++ b/patches/softirq-disable-softirq-stacks-for-rt.patch
@@ -0,0 +1,173 @@
+Subject: softirq-disable-softirq-stacks-for-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 18 Jul 2011 13:59:17 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/powerpc/kernel/irq.c | 2 ++
+ arch/powerpc/kernel/misc_32.S | 2 ++
+ arch/powerpc/kernel/misc_64.S | 2 ++
+ arch/sh/kernel/irq.c | 2 ++
+ arch/sparc/kernel/irq_64.c | 2 ++
+ arch/x86/kernel/entry_64.S | 2 ++
+ arch/x86/kernel/irq_32.c | 2 ++
+ arch/x86/kernel/irq_64.c | 3 ++-
+ include/linux/interrupt.h | 3 +--
+ 9 files changed, 17 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/kernel/irq.c
++++ b/arch/powerpc/kernel/irq.c
+@@ -584,6 +584,7 @@ void irq_ctx_init(void)
+ }
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ static inline void do_softirq_onstack(void)
+ {
+ struct thread_info *curtp, *irqtp;
+@@ -620,6 +621,7 @@ void do_softirq(void)
+
+ local_irq_restore(flags);
+ }
++#endif
+
+ irq_hw_number_t virq_to_hw(unsigned int virq)
+ {
+--- a/arch/powerpc/kernel/misc_32.S
++++ b/arch/powerpc/kernel/misc_32.S
+@@ -36,6 +36,7 @@
+
+ .text
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ _GLOBAL(call_do_softirq)
+ mflr r0
+ stw r0,4(r1)
+@@ -46,6 +47,7 @@ _GLOBAL(call_do_softirq)
+ lwz r0,4(r1)
+ mtlr r0
+ blr
++#endif
+
+ _GLOBAL(call_handle_irq)
+ mflr r0
+--- a/arch/powerpc/kernel/misc_64.S
++++ b/arch/powerpc/kernel/misc_64.S
+@@ -29,6 +29,7 @@
+
+ .text
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ _GLOBAL(call_do_softirq)
+ mflr r0
+ std r0,16(r1)
+@@ -39,6 +40,7 @@ _GLOBAL(call_do_softirq)
+ ld r0,16(r1)
+ mtlr r0
+ blr
++#endif
+
+ _GLOBAL(call_handle_irq)
+ ld r8,0(r6)
+--- a/arch/sh/kernel/irq.c
++++ b/arch/sh/kernel/irq.c
+@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu)
+ hardirq_ctx[cpu] = NULL;
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ asmlinkage void do_softirq(void)
+ {
+ unsigned long flags;
+@@ -191,6 +192,7 @@ asmlinkage void do_softirq(void)
+
+ local_irq_restore(flags);
+ }
++#endif
+ #else
+ static inline void handle_one_irq(unsigned int irq)
+ {
+--- a/arch/sparc/kernel/irq_64.c
++++ b/arch/sparc/kernel/irq_64.c
+@@ -698,6 +698,7 @@ void __irq_entry handler_irq(int pil, st
+ set_irq_regs(old_regs);
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ void do_softirq(void)
+ {
+ unsigned long flags;
+@@ -723,6 +724,7 @@ void do_softirq(void)
+
+ local_irq_restore(flags);
+ }
++#endif
+
+ #ifdef CONFIG_HOTPLUG_CPU
+ void fixup_irqs(void)
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -1337,6 +1337,7 @@ bad_gs:
+ jmp 2b
+ .previous
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /* Call softirq on interrupt stack. Interrupts are off. */
+ ENTRY(call_softirq)
+ CFI_STARTPROC
+@@ -1356,6 +1357,7 @@ ENTRY(call_softirq)
+ ret
+ CFI_ENDPROC
+ END(call_softirq)
++#endif
+
+ #ifdef CONFIG_XEN
+ zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
+--- a/arch/x86/kernel/irq_32.c
++++ b/arch/x86/kernel/irq_32.c
+@@ -149,6 +149,7 @@ void __cpuinit irq_ctx_init(int cpu)
+ cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ asmlinkage void do_softirq(void)
+ {
+ unsigned long flags;
+@@ -179,6 +180,7 @@ asmlinkage void do_softirq(void)
+
+ local_irq_restore(flags);
+ }
++#endif
+
+ bool handle_irq(unsigned irq, struct pt_regs *regs)
+ {
+--- a/arch/x86/kernel/irq_64.c
++++ b/arch/x86/kernel/irq_64.c
+@@ -88,7 +88,7 @@ bool handle_irq(unsigned irq, struct pt_
+ return true;
+ }
+
+-
++#ifndef CONFIG_PREEMPT_RT_FULL
+ extern void call_softirq(void);
+
+ asmlinkage void do_softirq(void)
+@@ -108,3 +108,4 @@ asmlinkage void do_softirq(void)
+ }
+ local_irq_restore(flags);
+ }
++#endif
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -445,10 +445,9 @@ struct softirq_action
+ void (*action)(struct softirq_action *);
+ };
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ asmlinkage void do_softirq(void);
+ asmlinkage void __do_softirq(void);
+-
+-#ifndef CONFIG_PREEMPT_RT_FULL
+ static inline void thread_do_softirq(void) { do_softirq(); }
+ #else
+ extern void thread_do_softirq(void);
diff --git a/patches/softirq-export-in-serving-softirq.patch b/patches/softirq-export-in-serving-softirq.patch
new file mode 100644
index 0000000..a8b79f3
--- /dev/null
+++ b/patches/softirq-export-in-serving-softirq.patch
@@ -0,0 +1,28 @@
+Subject: softirq: Export in_serving_softirq()
+From: John Kacur <jkacur@redhat.com>
+Date: Mon, 14 Nov 2011 02:44:43 +0100
+
+ERROR: "in_serving_softirq" [net/sched/cls_cgroup.ko] undefined!
+
+The above can be fixed by exporting in_serving_softirq
+
+Signed-off-by: John Kacur <jkacur@redhat.com>
+Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
+Cc: stable-rt@vger.kernel.org
+Link: http://lkml.kernel.org/r/1321235083-21756-2-git-send-email-jkacur@redhat.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/softirq.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -431,6 +431,7 @@ int in_serving_softirq(void)
+ preempt_enable();
+ return res;
+ }
++EXPORT_SYMBOL(in_serving_softirq);
+
+ /*
+ * Called with bh and local interrupts disabled. For full RT cpu must
diff --git a/patches/softirq-init-softirq-local-lock-after-per-cpu-section-is-set-up.patch b/patches/softirq-init-softirq-local-lock-after-per-cpu-section-is-set-up.patch
new file mode 100644
index 0000000..3481dd7
--- /dev/null
+++ b/patches/softirq-init-softirq-local-lock-after-per-cpu-section-is-set-up.patch
@@ -0,0 +1,133 @@
+Subject: softirq: Init softirq local lock after per cpu section is set up
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Thu, 04 Oct 2012 11:02:04 -0400
+
+I discovered this bug when booting 3.4-rt on my powerpc box. It crashed
+with the following report:
+
+------------[ cut here ]------------
+kernel BUG at /work/rt/stable-rt.git/kernel/rtmutex_common.h:75!
+Oops: Exception in kernel mode, sig: 5 [#1]
+PREEMPT SMP NR_CPUS=64 NUMA PA Semi PWRficient
+Modules linked in:
+NIP: c0000000004aa03c LR: c0000000004aa01c CTR: c00000000009b2ac
+REGS: c00000003e8d7950 TRAP: 0700 Not tainted (3.4.11-test-rt19)
+MSR: 9000000000029032 <SF,HV,EE,ME,IR,DR,RI> CR: 24000082 XER: 20000000
+SOFTE: 0
+TASK = c00000003e8fdcd0[11] 'ksoftirqd/1' THREAD: c00000003e8d4000 CPU: 1
+GPR00: 0000000000000001 c00000003e8d7bd0 c000000000d6cbb0 0000000000000000
+GPR04: c00000003e8fdcd0 0000000000000000 0000000024004082 c000000000011454
+GPR08: 0000000000000000 0000000080000001 c00000003e8fdcd1 0000000000000000
+GPR12: 0000000024000084 c00000000fff0280 ffffffffffffffff 000000003ffffad8
+GPR16: ffffffffffffffff 000000000072c798 0000000000000060 0000000000000000
+GPR20: 0000000000642741 000000000072c858 000000003ffffaf0 0000000000000417
+GPR24: 000000000072dcd0 c00000003e7ff990 0000000000000000 0000000000000001
+GPR28: 0000000000000000 c000000000792340 c000000000ccec78 c000000001182338
+NIP [c0000000004aa03c] .wakeup_next_waiter+0x44/0xb8
+LR [c0000000004aa01c] .wakeup_next_waiter+0x24/0xb8
+Call Trace:
+[c00000003e8d7bd0] [c0000000004aa01c] .wakeup_next_waiter+0x24/0xb8 (unreliable)
+[c00000003e8d7c60] [c0000000004a0320] .rt_spin_lock_slowunlock+0x8c/0xe4
+[c00000003e8d7ce0] [c0000000004a07cc] .rt_spin_unlock+0x54/0x64
+[c00000003e8d7d60] [c0000000000636bc] .__thread_do_softirq+0x130/0x174
+[c00000003e8d7df0] [c00000000006379c] .run_ksoftirqd+0x9c/0x1a4
+[c00000003e8d7ea0] [c000000000080b68] .kthread+0xa8/0xb4
+[c00000003e8d7f90] [c00000000001c2f8] .kernel_thread+0x54/0x70
+Instruction dump:
+60000000 e86d01c8 38630730 4bff7061 60000000 ebbf0008 7c7c1b78 e81d0040
+7fe00278 7c000074 7800d182 68000001 <0b000000> e88d01c8 387d0010 38840738
+
+The rtmutex_common.h:75 is:
+
+rt_mutex_top_waiter(struct rt_mutex *lock)
+{
+ struct rt_mutex_waiter *w;
+
+ w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter,
+ list_entry);
+ BUG_ON(w->lock != lock);
+
+ return w;
+}
+
+Where the waiter->lock is corrupted. I saw various other random bugs
+that all had to with the softirq lock and plist. As plist needs to be
+initialized before it is used I investigated how this lock is
+initialized. It's initialized with:
+
+void __init softirq_early_init(void)
+{
+ local_irq_lock_init(local_softirq_lock);
+}
+
+Where:
+
+#define local_irq_lock_init(lvar) \
+ do { \
+ int __cpu; \
+ for_each_possible_cpu(__cpu) \
+ spin_lock_init(&per_cpu(lvar, __cpu).lock); \
+ } while (0)
+
+As the softirq lock is a local_irq_lock, which is a per_cpu lock, the
+initialization is done to all per_cpu versions of the lock. But lets
+look at where the softirq_early_init() is called from.
+
+In init/main.c: start_kernel()
+
+/*
+ * Interrupts are still disabled. Do necessary setups, then
+ * enable them
+ */
+ softirq_early_init();
+ tick_init();
+ boot_cpu_init();
+ page_address_init();
+ printk(KERN_NOTICE "%s", linux_banner);
+ setup_arch(&command_line);
+ mm_init_owner(&init_mm, &init_task);
+ mm_init_cpumask(&init_mm);
+ setup_command_line(command_line);
+ setup_nr_cpu_ids();
+ setup_per_cpu_areas();
+ smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
+
+One of the first things that is called is the initialization of the
+softirq lock. But if you look further down, we see the per_cpu areas
+have not been set up yet. Thus initializing a local_irq_lock() before
+the per_cpu section is set up, may not work as it is initializing the
+per cpu locks before the per cpu exists.
+
+By moving the softirq_early_init() right after setup_per_cpu_areas(),
+the kernel boots fine.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: Clark Williams <clark@redhat.com>
+Cc: John Kacur <jkacur@redhat.com>
+Cc: Carsten Emde <cbe@osadl.org>
+Cc: vomlehn@texas.net
+Link: http://lkml.kernel.org/r/1349362924.6755.18.camel@gandalf.local.home
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ init/main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/init/main.c
++++ b/init/main.c
+@@ -493,7 +493,6 @@ asmlinkage void __init start_kernel(void
+ * Interrupts are still disabled. Do necessary setups, then
+ * enable them
+ */
+- softirq_early_init();
+ tick_init();
+ boot_cpu_init();
+ page_address_init();
+@@ -504,6 +503,7 @@ asmlinkage void __init start_kernel(void
+ setup_command_line(command_line);
+ setup_nr_cpu_ids();
+ setup_per_cpu_areas();
++ softirq_early_init();
+ smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
+
+ build_all_zonelists(NULL, NULL);
diff --git a/patches/softirq-local-lock.patch b/patches/softirq-local-lock.patch
new file mode 100644
index 0000000..12d490e
--- /dev/null
+++ b/patches/softirq-local-lock.patch
@@ -0,0 +1,315 @@
+Subject: softirq-local-lock.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 28 Jun 2011 15:57:18 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/hardirq.h | 16 +++-
+ include/linux/interrupt.h | 12 +++
+ include/linux/sched.h | 1
+ init/main.c | 1
+ kernel/softirq.c | 166 +++++++++++++++++++++++++++++++++++++++++++++-
+ 5 files changed, 191 insertions(+), 5 deletions(-)
+
+--- a/include/linux/hardirq.h
++++ b/include/linux/hardirq.h
+@@ -61,7 +61,11 @@
+ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
+ #define NMI_OFFSET (1UL << NMI_SHIFT)
+
+-#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
++#else
++# define SOFTIRQ_DISABLE_OFFSET (0)
++#endif
+
+ #ifndef PREEMPT_ACTIVE
+ #define PREEMPT_ACTIVE_BITS 1
+@@ -74,10 +78,17 @@
+ #endif
+
+ #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
+-#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
+ #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+ | NMI_MASK))
+
++#ifndef CONFIG_PREEMPT_RT_FULL
++# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
++# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
++#else
++# define softirq_count() (0U)
++extern int in_serving_softirq(void);
++#endif
++
+ /*
+ * Are we doing bottom half or hardware interrupt processing?
+ * Are we in a softirq context? Interrupt context?
+@@ -87,7 +98,6 @@
+ #define in_irq() (hardirq_count())
+ #define in_softirq() (softirq_count())
+ #define in_interrupt() (irq_count())
+-#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
+
+ /*
+ * Are we in NMI context?
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -447,7 +447,13 @@ struct softirq_action
+
+ asmlinkage void do_softirq(void);
+ asmlinkage void __do_softirq(void);
++
++#ifndef CONFIG_PREEMPT_RT_FULL
+ static inline void thread_do_softirq(void) { do_softirq(); }
++#else
++extern void thread_do_softirq(void);
++#endif
++
+ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
+ extern void softirq_init(void);
+ extern void __raise_softirq_irqoff(unsigned int nr);
+@@ -634,6 +640,12 @@ void tasklet_hrtimer_cancel(struct taskl
+ tasklet_kill(&ttimer->tasklet);
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++extern void softirq_early_init(void);
++#else
++static inline void softirq_early_init(void) { }
++#endif
++
+ /*
+ * Autoprobing for irqs:
+ *
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1633,6 +1633,7 @@ struct task_struct {
+ #endif
+ #ifdef CONFIG_PREEMPT_RT_BASE
+ struct rcu_head put_rcu;
++ int softirq_nestcnt;
+ #endif
+ };
+
+--- a/init/main.c
++++ b/init/main.c
+@@ -493,6 +493,7 @@ asmlinkage void __init start_kernel(void
+ * Interrupts are still disabled. Do necessary setups, then
+ * enable them
+ */
++ softirq_early_init();
+ tick_init();
+ boot_cpu_init();
+ page_address_init();
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -25,6 +25,7 @@
+ #include <linux/smp.h>
+ #include <linux/smpboot.h>
+ #include <linux/tick.h>
++#include <linux/locallock.h>
+
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/irq.h>
+@@ -168,6 +169,7 @@ static void handle_pending_softirqs(u32
+ local_irq_disable();
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /*
+ * preempt_count and SOFTIRQ_OFFSET usage:
+ * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
+@@ -360,6 +362,162 @@ asmlinkage void do_softirq(void)
+
+ #endif
+
++static inline void local_bh_disable_nort(void) { local_bh_disable(); }
++static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
++
++#else /* !PREEMPT_RT_FULL */
++
++/*
++ * On RT we serialize softirq execution with a cpu local lock
++ */
++static DEFINE_LOCAL_IRQ_LOCK(local_softirq_lock);
++static DEFINE_PER_CPU(struct task_struct *, local_softirq_runner);
++
++static void __do_softirq(void);
++
++void __init softirq_early_init(void)
++{
++ local_irq_lock_init(local_softirq_lock);
++}
++
++void local_bh_disable(void)
++{
++ migrate_disable();
++ current->softirq_nestcnt++;
++}
++EXPORT_SYMBOL(local_bh_disable);
++
++void local_bh_enable(void)
++{
++ if (WARN_ON(current->softirq_nestcnt == 0))
++ return;
++
++ if ((current->softirq_nestcnt == 1) &&
++ local_softirq_pending() &&
++ local_trylock(local_softirq_lock)) {
++
++ local_irq_disable();
++ if (local_softirq_pending())
++ __do_softirq();
++ local_irq_enable();
++ local_unlock(local_softirq_lock);
++ WARN_ON(current->softirq_nestcnt != 1);
++ }
++ current->softirq_nestcnt--;
++ migrate_enable();
++}
++EXPORT_SYMBOL(local_bh_enable);
++
++void local_bh_enable_ip(unsigned long ip)
++{
++ local_bh_enable();
++}
++EXPORT_SYMBOL(local_bh_enable_ip);
++
++/* For tracing */
++int notrace __in_softirq(void)
++{
++ if (__get_cpu_var(local_softirq_lock).owner == current)
++ return __get_cpu_var(local_softirq_lock).nestcnt;
++ return 0;
++}
++
++int in_serving_softirq(void)
++{
++ int res;
++
++ preempt_disable();
++ res = __get_cpu_var(local_softirq_runner) == current;
++ preempt_enable();
++ return res;
++}
++
++/*
++ * Called with bh and local interrupts disabled. For full RT cpu must
++ * be pinned.
++ */
++static void __do_softirq(void)
++{
++ u32 pending = local_softirq_pending();
++ int cpu = smp_processor_id();
++
++ current->softirq_nestcnt++;
++
++ /* Reset the pending bitmask before enabling irqs */
++ set_softirq_pending(0);
++
++ __get_cpu_var(local_softirq_runner) = current;
++
++ lockdep_softirq_enter();
++
++ handle_pending_softirqs(pending, cpu);
++
++ pending = local_softirq_pending();
++ if (pending)
++ wakeup_softirqd();
++
++ lockdep_softirq_exit();
++ __get_cpu_var(local_softirq_runner) = NULL;
++
++ current->softirq_nestcnt--;
++}
++
++static int __thread_do_softirq(int cpu)
++{
++ /*
++ * Prevent the current cpu from going offline.
++ * pin_current_cpu() can reenable preemption and block on the
++ * hotplug mutex. When it returns, the current cpu is
++ * pinned. It might be the wrong one, but the offline check
++ * below catches that.
++ */
++ pin_current_cpu();
++ /*
++ * If called from ksoftirqd (cpu >= 0) we need to check
++ * whether we are on the wrong cpu due to cpu offlining. If
++ * called via thread_do_softirq() no action required.
++ */
++ if (cpu >= 0 && cpu_is_offline(cpu)) {
++ unpin_current_cpu();
++ return -1;
++ }
++ preempt_enable();
++ local_lock(local_softirq_lock);
++ local_irq_disable();
++ /*
++ * We cannot switch stacks on RT as we want to be able to
++ * schedule!
++ */
++ if (local_softirq_pending())
++ __do_softirq();
++ local_unlock(local_softirq_lock);
++ unpin_current_cpu();
++ preempt_disable();
++ local_irq_enable();
++ return 0;
++}
++
++/*
++ * Called from netif_rx_ni(). Preemption enabled.
++ */
++void thread_do_softirq(void)
++{
++ if (!in_serving_softirq()) {
++ preempt_disable();
++ __thread_do_softirq(-1);
++ preempt_enable();
++ }
++}
++
++static int ksoftirqd_do_softirq(int cpu)
++{
++ return __thread_do_softirq(cpu);
++}
++
++static inline void local_bh_disable_nort(void) { }
++static inline void _local_bh_enable_nort(void) { }
++
++#endif /* PREEMPT_RT_FULL */
+ /*
+ * Enter an interrupt context.
+ */
+@@ -373,9 +531,9 @@ void irq_enter(void)
+ * Prevent raise_softirq from needlessly waking up ksoftirqd
+ * here, as softirq will be serviced on return from interrupt.
+ */
+- local_bh_disable();
++ local_bh_disable_nort();
+ tick_check_idle(cpu);
+- _local_bh_enable();
++ _local_bh_enable_nort();
+ }
+
+ __irq_enter();
+@@ -383,6 +541,7 @@ void irq_enter(void)
+
+ static inline void invoke_softirq(void)
+ {
++#ifndef CONFIG_PREEMPT_RT_FULL
+ if (!force_irqthreads) {
+ #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
+ __do_softirq();
+@@ -395,6 +554,9 @@ static inline void invoke_softirq(void)
+ wakeup_softirqd();
+ __local_bh_enable(SOFTIRQ_OFFSET);
+ }
++#else
++ wakeup_softirqd();
++#endif
+ }
+
+ /*
diff --git a/patches/softirq-make-fifo.patch b/patches/softirq-make-fifo.patch
new file mode 100644
index 0000000..82f3235
--- /dev/null
+++ b/patches/softirq-make-fifo.patch
@@ -0,0 +1,50 @@
+Subject: softirq-make-fifo.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 21 Jul 2011 21:06:43 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/softirq.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -364,6 +364,8 @@ asmlinkage void do_softirq(void)
+
+ static inline void local_bh_disable_nort(void) { local_bh_disable(); }
+ static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
++static void ksoftirqd_set_sched_params(unsigned int cpu) { }
++static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { }
+
+ #else /* !PREEMPT_RT_FULL */
+
+@@ -518,6 +520,20 @@ static int ksoftirqd_do_softirq(int cpu)
+ static inline void local_bh_disable_nort(void) { }
+ static inline void _local_bh_enable_nort(void) { }
+
++static inline void ksoftirqd_set_sched_params(unsigned int cpu)
++{
++ struct sched_param param = { .sched_priority = 1 };
++
++ sched_setscheduler(current, SCHED_FIFO, &param);
++}
++
++static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online)
++{
++ struct sched_param param = { .sched_priority = 0 };
++
++ sched_setscheduler(current, SCHED_NORMAL, &param);
++}
++
+ #endif /* PREEMPT_RT_FULL */
+ /*
+ * Enter an interrupt context.
+@@ -1065,6 +1081,8 @@ static struct notifier_block __cpuinitda
+
+ static struct smp_hotplug_thread softirq_threads = {
+ .store = &ksoftirqd,
++ .setup = ksoftirqd_set_sched_params,
++ .cleanup = ksoftirqd_clr_sched_params,
+ .thread_should_run = ksoftirqd_should_run,
+ .thread_fn = run_ksoftirqd,
+ .thread_comm = "ksoftirqd/%u",
diff --git a/patches/softirq-make-serving-softirqs-a-task-flag.patch b/patches/softirq-make-serving-softirqs-a-task-flag.patch
new file mode 100644
index 0000000..fbd7bed
--- /dev/null
+++ b/patches/softirq-make-serving-softirqs-a-task-flag.patch
@@ -0,0 +1,74 @@
+Subject: softirq: Make serving softirqs a task flag
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 04 Oct 2012 14:30:25 +0100
+
+Avoid the percpu softirq_runner pointer magic by using a task flag.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h | 1 +
+ kernel/softirq.c | 20 +++-----------------
+ 2 files changed, 4 insertions(+), 17 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1856,6 +1856,7 @@ extern void thread_group_cputime_adjuste
+ /*
+ * Per process flags
+ */
++#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
+ #define PF_EXITING 0x00000004 /* getting shut down */
+ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
+ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -375,7 +375,6 @@ static void ksoftirqd_clr_sched_params(u
+ * On RT we serialize softirq execution with a cpu local lock
+ */
+ static DEFINE_LOCAL_IRQ_LOCK(local_softirq_lock);
+-static DEFINE_PER_CPU(struct task_struct *, local_softirq_runner);
+
+ static void __do_softirq_common(int need_rcu_bh_qs);
+
+@@ -430,22 +429,9 @@ void _local_bh_enable(void)
+ }
+ EXPORT_SYMBOL(_local_bh_enable);
+
+-/* For tracing */
+-int notrace __in_softirq(void)
+-{
+- if (__get_cpu_var(local_softirq_lock).owner == current)
+- return __get_cpu_var(local_softirq_lock).nestcnt;
+- return 0;
+-}
+-
+ int in_serving_softirq(void)
+ {
+- int res;
+-
+- preempt_disable();
+- res = __get_cpu_var(local_softirq_runner) == current;
+- preempt_enable();
+- return res;
++ return current->flags & PF_IN_SOFTIRQ;
+ }
+ EXPORT_SYMBOL(in_serving_softirq);
+
+@@ -463,7 +449,7 @@ static void __do_softirq_common(int need
+ /* Reset the pending bitmask before enabling irqs */
+ set_softirq_pending(0);
+
+- __get_cpu_var(local_softirq_runner) = current;
++ current->flags |= PF_IN_SOFTIRQ;
+
+ lockdep_softirq_enter();
+
+@@ -474,7 +460,7 @@ static void __do_softirq_common(int need
+ wakeup_softirqd();
+
+ lockdep_softirq_exit();
+- __get_cpu_var(local_softirq_runner) = NULL;
++ current->flags &= ~PF_IN_SOFTIRQ;
+
+ current->softirq_nestcnt--;
+ }
diff --git a/patches/softirq-preempt-fix-3-re.patch b/patches/softirq-preempt-fix-3-re.patch
new file mode 100644
index 0000000..85b8188
--- /dev/null
+++ b/patches/softirq-preempt-fix-3-re.patch
@@ -0,0 +1,145 @@
+Subject: softirq: Check preemption after reenabling interrupts
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 13 Nov 2011 17:17:09 +0100 (CET)
+
+raise_softirq_irqoff() disables interrupts and wakes the softirq
+daemon, but after reenabling interrupts there is no preemption check,
+so the execution of the softirq thread might be delayed arbitrarily.
+
+In principle we could add that check to local_irq_enable/restore, but
+that's overkill as the rasie_softirq_irqoff() sections are the only
+ones which show this behaviour.
+
+Reported-by: Carsten Emde <cbe@osadl.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ block/blk-iopoll.c | 3 +++
+ block/blk-softirq.c | 3 +++
+ include/linux/preempt.h | 3 +++
+ net/core/dev.c | 6 ++++++
+ 4 files changed, 15 insertions(+)
+
+--- a/block/blk-iopoll.c
++++ b/block/blk-iopoll.c
+@@ -38,6 +38,7 @@ void blk_iopoll_sched(struct blk_iopoll
+ list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
+ __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ local_irq_restore(flags);
++ preempt_check_resched_rt();
+ }
+ EXPORT_SYMBOL(blk_iopoll_sched);
+
+@@ -135,6 +136,7 @@ static void blk_iopoll_softirq(struct so
+ __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+
+ local_irq_enable();
++ preempt_check_resched_rt();
+ }
+
+ /**
+@@ -204,6 +206,7 @@ static int __cpuinit blk_iopoll_cpu_noti
+ &__get_cpu_var(blk_cpu_iopoll));
+ __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ local_irq_enable();
++ preempt_check_resched_rt();
+ }
+
+ return NOTIFY_OK;
+--- a/block/blk-softirq.c
++++ b/block/blk-softirq.c
+@@ -51,6 +51,7 @@ static void trigger_softirq(void *data)
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+
+ local_irq_restore(flags);
++ preempt_check_resched_rt();
+ }
+
+ /*
+@@ -93,6 +94,7 @@ static int __cpuinit blk_cpu_notify(stru
+ &__get_cpu_var(blk_cpu_done));
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+ local_irq_enable();
++ preempt_check_resched_rt();
+ }
+
+ return NOTIFY_OK;
+@@ -150,6 +152,7 @@ do_local:
+ goto do_local;
+
+ local_irq_restore(flags);
++ preempt_check_resched_rt();
+ }
+
+ /**
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -56,8 +56,10 @@ do { \
+
+ #ifndef CONFIG_PREEMPT_RT_BASE
+ # define preempt_enable_no_resched() sched_preempt_enable_no_resched()
++# define preempt_check_resched_rt() do { } while (0)
+ #else
+ # define preempt_enable_no_resched() preempt_enable()
++# define preempt_check_resched_rt() preempt_check_resched()
+ #endif
+
+ #define preempt_enable() \
+@@ -105,6 +107,7 @@ do { \
+ #define preempt_disable_notrace() do { } while (0)
+ #define preempt_enable_no_resched_notrace() do { } while (0)
+ #define preempt_enable_notrace() do { } while (0)
++#define preempt_check_resched_rt() do { } while (0)
+
+ #endif /* CONFIG_PREEMPT_COUNT */
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1946,6 +1946,7 @@ static inline void __netif_reschedule(st
+ sd->output_queue_tailp = &q->next_sched;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
++ preempt_check_resched_rt();
+ }
+
+ void __netif_schedule(struct Qdisc *q)
+@@ -1967,6 +1968,7 @@ void dev_kfree_skb_irq(struct sk_buff *s
+ sd->completion_queue = skb;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
++ preempt_check_resched_rt();
+ }
+ }
+ EXPORT_SYMBOL(dev_kfree_skb_irq);
+@@ -3052,6 +3054,7 @@ enqueue:
+ rps_unlock(sd);
+
+ local_irq_restore(flags);
++ preempt_check_resched_rt();
+
+ atomic_long_inc(&skb->dev->rx_dropped);
+ kfree_skb(skb);
+@@ -3932,6 +3935,7 @@ static void net_rps_action_and_irq_enabl
+ } else
+ #endif
+ local_irq_enable();
++ preempt_check_resched_rt();
+ }
+
+ static int process_backlog(struct napi_struct *napi, int quota)
+@@ -4004,6 +4008,7 @@ void __napi_schedule(struct napi_struct
+ local_irq_save(flags);
+ ____napi_schedule(&__get_cpu_var(softnet_data), n);
+ local_irq_restore(flags);
++ preempt_check_resched_rt();
+ }
+ EXPORT_SYMBOL(__napi_schedule);
+
+@@ -6560,6 +6565,7 @@ static int dev_cpu_callback(struct notif
+
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_enable();
++ preempt_check_resched_rt();
+
+ /* Process offline CPU's input_pkt_queue */
+ while ((skb = __skb_dequeue(&oldsd->process_queue))) {
diff --git a/patches/softirq-sanitize-softirq-pending.patch b/patches/softirq-sanitize-softirq-pending.patch
new file mode 100644
index 0000000..e4b841f
--- /dev/null
+++ b/patches/softirq-sanitize-softirq-pending.patch
@@ -0,0 +1,113 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 13:16:38 -0500
+Subject: softirq: Sanitize softirq pending for NOHZ/RT
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/interrupt.h | 2 +
+ kernel/softirq.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
+ kernel/time/tick-sched.c | 9 ------
+ 3 files changed, 66 insertions(+), 8 deletions(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -455,6 +455,8 @@ extern void __raise_softirq_irqoff(unsig
+ extern void raise_softirq_irqoff(unsigned int nr);
+ extern void raise_softirq(unsigned int nr);
+
++extern void softirq_check_pending_idle(void);
++
+ /* This is the worklist that queues up per-cpu softirq work.
+ *
+ * send_remote_sendirq() adds work to these lists, and
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -62,6 +62,69 @@ char *softirq_to_name[NR_SOFTIRQS] = {
+ "TASKLET", "SCHED", "HRTIMER", "RCU"
+ };
+
++#ifdef CONFIG_NO_HZ
++# ifdef CONFIG_PREEMPT_RT_FULL
++/*
++ * On preempt-rt a softirq might be blocked on a lock. There might be
++ * no other runnable task on this CPU because the lock owner runs on
++ * some other CPU. So we have to go into idle with the pending bit
++ * set. Therefor we need to check this otherwise we warn about false
++ * positives which confuses users and defeats the whole purpose of
++ * this test.
++ *
++ * This code is called with interrupts disabled.
++ */
++void softirq_check_pending_idle(void)
++{
++ static int rate_limit;
++ u32 warnpending = 0, pending;
++
++ if (rate_limit >= 10)
++ return;
++
++ pending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
++ if (pending) {
++ struct task_struct *tsk;
++
++ tsk = __get_cpu_var(ksoftirqd);
++ /*
++ * The wakeup code in rtmutex.c wakes up the task
++ * _before_ it sets pi_blocked_on to NULL under
++ * tsk->pi_lock. So we need to check for both: state
++ * and pi_blocked_on.
++ */
++ raw_spin_lock(&tsk->pi_lock);
++
++ if (!tsk->pi_blocked_on && !(tsk->state == TASK_RUNNING))
++ warnpending = 1;
++
++ raw_spin_unlock(&tsk->pi_lock);
++ }
++
++ if (warnpending) {
++ printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
++ pending);
++ rate_limit++;
++ }
++}
++# else
++/*
++ * On !PREEMPT_RT we just printk rate limited:
++ */
++void softirq_check_pending_idle(void)
++{
++ static int rate_limit;
++
++ if (rate_limit < 10 &&
++ (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
++ printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
++ local_softirq_pending());
++ rate_limit++;
++ }
++}
++# endif
++#endif
++
+ /*
+ * we cannot loop indefinitely here to avoid userspace starvation,
+ * but we also don't want to introduce a worst case 1/HZ latency
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -481,14 +481,7 @@ static bool can_stop_idle_tick(int cpu,
+ return false;
+
+ if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
+- static int ratelimit;
+-
+- if (ratelimit < 10 &&
+- (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
+- printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
+- (unsigned int) local_softirq_pending());
+- ratelimit++;
+- }
++ softirq_check_pending_idle();
+ return false;
+ }
+
diff --git a/patches/softirq-split-handling-function.patch b/patches/softirq-split-handling-function.patch
new file mode 100644
index 0000000..707aa44
--- /dev/null
+++ b/patches/softirq-split-handling-function.patch
@@ -0,0 +1,68 @@
+Subject: softirq: Split handling function
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 04 Oct 2012 15:33:53 +0100
+
+Split out the inner handling function, so RT can reuse it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/softirq.c | 43 +++++++++++++++++++++++--------------------
+ 1 file changed, 23 insertions(+), 20 deletions(-)
+
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -142,31 +142,34 @@ static void wakeup_softirqd(void)
+ wake_up_process(tsk);
+ }
+
+-static void handle_pending_softirqs(u32 pending, int cpu, int need_rcu_bh_qs)
++static void handle_softirq(unsigned int vec_nr, int cpu, int need_rcu_bh_qs)
+ {
+- struct softirq_action *h = softirq_vec;
++ struct softirq_action *h = softirq_vec + vec_nr;
+ unsigned int prev_count = preempt_count();
+
+- local_irq_enable();
+- for ( ; pending; h++, pending >>= 1) {
+- unsigned int vec_nr = h - softirq_vec;
++ kstat_incr_softirqs_this_cpu(vec_nr);
++ trace_softirq_entry(vec_nr);
++ h->action(h);
++ trace_softirq_exit(vec_nr);
++
++ if (unlikely(prev_count != preempt_count())) {
++ pr_err("softirq %u %s %p preempt count leak: %08x -> %08x\n",
++ vec_nr, softirq_to_name[vec_nr], h->action,
++ prev_count, (unsigned int) preempt_count());
++ preempt_count() = prev_count;
++ }
++ if (need_rcu_bh_qs)
++ rcu_bh_qs(cpu);
++}
+
+- if (!(pending & 1))
+- continue;
++static void handle_pending_softirqs(u32 pending, int cpu, int need_rcu_bh_qs)
++{
++ unsigned int vec_nr;
+
+- kstat_incr_softirqs_this_cpu(vec_nr);
+- trace_softirq_entry(vec_nr);
+- h->action(h);
+- trace_softirq_exit(vec_nr);
+- if (unlikely(prev_count != preempt_count())) {
+- printk(KERN_ERR
+- "huh, entered softirq %u %s %p with preempt_count %08x exited with %08x?\n",
+- vec_nr, softirq_to_name[vec_nr], h->action,
+- prev_count, (unsigned int) preempt_count());
+- preempt_count() = prev_count;
+- }
+- if (need_rcu_bh_qs)
+- rcu_bh_qs(cpu);
++ local_irq_enable();
++ for (vec_nr = 0; pending; vec_nr++, pending >>= 1) {
++ if (pending & 1)
++ handle_softirq(vec_nr, cpu, need_rcu_bh_qs);
+ }
+ local_irq_disable();
+ }
diff --git a/patches/softirq-split-locks.patch b/patches/softirq-split-locks.patch
new file mode 100644
index 0000000..905f673
--- /dev/null
+++ b/patches/softirq-split-locks.patch
@@ -0,0 +1,452 @@
+Subject: softirq: Split softirq locks
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 04 Oct 2012 14:20:47 +0100
+
+The 3.x RT series removed the split softirq implementation in favour
+of pushing softirq processing into the context of the thread which
+raised it. Though this prevents us from handling the various softirqs
+at different priorities. Now instead of reintroducing the split
+softirq threads we split the locks which serialize the softirq
+processing.
+
+If a softirq is raised in context of a thread, then the softirq is
+noted on a per thread field, if the thread is in a bh disabled
+region. If the softirq is raised from hard interrupt context, then the
+bit is set in the flag field of ksoftirqd and ksoftirqd is invoked.
+When a thread leaves a bh disabled region, then it tries to execute
+the softirqs which have been raised in its own context. It acquires
+the per softirq / per cpu lock for the softirq and then checks,
+whether the softirq is still pending in the per cpu
+local_softirq_pending() field. If yes, it runs the softirq. If no,
+then some other task executed it already. This allows for zero config
+softirq elevation in the context of user space tasks or interrupt
+threads.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h | 1
+ kernel/softirq.c | 305 ++++++++++++++++++++++++++++++--------------------
+ 2 files changed, 185 insertions(+), 121 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1635,6 +1635,7 @@ struct task_struct {
+ #ifdef CONFIG_PREEMPT_RT_BASE
+ struct rcu_head put_rcu;
+ int softirq_nestcnt;
++ unsigned int softirqs_raised;
+ #endif
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ # if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -162,6 +162,7 @@ static void handle_softirq(unsigned int
+ rcu_bh_qs(cpu);
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ static void handle_pending_softirqs(u32 pending, int cpu, int need_rcu_bh_qs)
+ {
+ unsigned int vec_nr;
+@@ -174,7 +175,19 @@ static void handle_pending_softirqs(u32
+ local_irq_disable();
+ }
+
+-#ifndef CONFIG_PREEMPT_RT_FULL
++static void run_ksoftirqd(unsigned int cpu)
++{
++ local_irq_disable();
++ if (ksoftirqd_softirq_pending()) {
++ __do_softirq();
++ rcu_note_context_switch(cpu);
++ local_irq_enable();
++ cond_resched();
++ return;
++ }
++ local_irq_enable();
++}
++
+ /*
+ * preempt_count and SOFTIRQ_OFFSET usage:
+ * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
+@@ -367,28 +380,117 @@ asmlinkage void do_softirq(void)
+
+ #endif
+
++/*
++ * This function must run with irqs disabled!
++ */
++void raise_softirq_irqoff(unsigned int nr)
++{
++ __raise_softirq_irqoff(nr);
++
++ /*
++ * If we're in an interrupt or softirq, we're done
++ * (this also catches softirq-disabled code). We will
++ * actually run the softirq once we return from
++ * the irq or softirq.
++ *
++ * Otherwise we wake up ksoftirqd to make sure we
++ * schedule the softirq soon.
++ */
++ if (!in_interrupt())
++ wakeup_softirqd();
++}
++
++void __raise_softirq_irqoff(unsigned int nr)
++{
++ trace_softirq_raise(nr);
++ or_softirq_pending(1UL << nr);
++}
++
+ static inline void local_bh_disable_nort(void) { local_bh_disable(); }
+ static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
+ static void ksoftirqd_set_sched_params(unsigned int cpu) { }
+ static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { }
+
++static inline int ksoftirqd_softirq_pending(void)
++{
++ return local_softirq_pending();
++}
++
+ #else /* !PREEMPT_RT_FULL */
+
+ /*
+- * On RT we serialize softirq execution with a cpu local lock
++ * On RT we serialize softirq execution with a cpu local lock per softirq
+ */
+-static DEFINE_LOCAL_IRQ_LOCK(local_softirq_lock);
++static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
+
+-static void __do_softirq_common(int need_rcu_bh_qs);
++void __init softirq_early_init(void)
++{
++ int i;
+
+-void __do_softirq(void)
++ for (i = 0; i < NR_SOFTIRQS; i++)
++ local_irq_lock_init(local_softirq_locks[i]);
++}
++
++static void lock_softirq(int which)
+ {
+- __do_softirq_common(0);
++ __local_lock(&__get_cpu_var(local_softirq_locks[which]));
+ }
+
+-void __init softirq_early_init(void)
++static void unlock_softirq(int which)
+ {
+- local_irq_lock_init(local_softirq_lock);
++ __local_unlock(&__get_cpu_var(local_softirq_locks[which]));
++}
++
++static void do_single_softirq(int which, int need_rcu_bh_qs)
++{
++ unsigned long old_flags = current->flags;
++
++ current->flags &= ~PF_MEMALLOC;
++ vtime_account(current);
++ current->flags |= PF_IN_SOFTIRQ;
++ lockdep_softirq_enter();
++ local_irq_enable();
++ handle_softirq(which, smp_processor_id(), need_rcu_bh_qs);
++ local_irq_disable();
++ lockdep_softirq_exit();
++ current->flags &= ~PF_IN_SOFTIRQ;
++ vtime_account(current);
++ tsk_restore_flags(current, old_flags, PF_MEMALLOC);
++}
++
++/*
++ * Called with interrupts disabled. Process softirqs which were raised
++ * in current context (or on behalf of ksoftirqd).
++ */
++static void do_current_softirqs(int need_rcu_bh_qs)
++{
++ while (current->softirqs_raised) {
++ int i = __ffs(current->softirqs_raised);
++ unsigned int pending, mask = (1U << i);
++
++ current->softirqs_raised &= ~mask;
++ local_irq_enable();
++
++ /*
++ * If the lock is contended, we boost the owner to
++ * process the softirq or leave the critical section
++ * now.
++ */
++ lock_softirq(i);
++ local_irq_disable();
++ /*
++ * Check with the local_softirq_pending() bits,
++ * whether we need to process this still or if someone
++ * else took care of it.
++ */
++ pending = local_softirq_pending();
++ if (pending & mask) {
++ set_softirq_pending(pending & ~mask);
++ do_single_softirq(i, need_rcu_bh_qs);
++ }
++ unlock_softirq(i);
++ WARN_ON(current->softirq_nestcnt != 1);
++ }
+ }
+
+ void local_bh_disable(void)
+@@ -403,17 +505,11 @@ void local_bh_enable(void)
+ if (WARN_ON(current->softirq_nestcnt == 0))
+ return;
+
+- if ((current->softirq_nestcnt == 1) &&
+- local_softirq_pending() &&
+- local_trylock(local_softirq_lock)) {
++ local_irq_disable();
++ if (current->softirq_nestcnt == 1 && current->softirqs_raised)
++ do_current_softirqs(1);
++ local_irq_enable();
+
+- local_irq_disable();
+- if (local_softirq_pending())
+- __do_softirq();
+- local_irq_enable();
+- local_unlock(local_softirq_lock);
+- WARN_ON(current->softirq_nestcnt != 1);
+- }
+ current->softirq_nestcnt--;
+ migrate_enable();
+ }
+@@ -438,86 +534,82 @@ int in_serving_softirq(void)
+ }
+ EXPORT_SYMBOL(in_serving_softirq);
+
+-/*
+- * Called with bh and local interrupts disabled. For full RT cpu must
+- * be pinned.
+- */
+-static void __do_softirq_common(int need_rcu_bh_qs)
++/* Called with preemption disabled */
++static void run_ksoftirqd(unsigned int cpu)
+ {
+- u32 pending = local_softirq_pending();
+- int cpu = smp_processor_id();
+-
++ local_irq_disable();
+ current->softirq_nestcnt++;
+-
+- /* Reset the pending bitmask before enabling irqs */
+- set_softirq_pending(0);
+-
+- current->flags |= PF_IN_SOFTIRQ;
+-
+- lockdep_softirq_enter();
+-
+- handle_pending_softirqs(pending, cpu, need_rcu_bh_qs);
+-
+- pending = local_softirq_pending();
+- if (pending)
+- wakeup_softirqd();
+-
+- lockdep_softirq_exit();
+- current->flags &= ~PF_IN_SOFTIRQ;
+-
++ do_current_softirqs(1);
+ current->softirq_nestcnt--;
++ rcu_note_context_switch(cpu);
++ local_irq_enable();
+ }
+
+-static int __thread_do_softirq(int cpu)
++/*
++ * Called from netif_rx_ni(). Preemption enabled, but migration
++ * disabled. So the cpu can't go away under us.
++ */
++void thread_do_softirq(void)
++{
++ if (!in_serving_softirq() && current->softirqs_raised) {
++ current->softirq_nestcnt++;
++ do_current_softirqs(0);
++ current->softirq_nestcnt--;
++ }
++}
++
++void __raise_softirq_irqoff(unsigned int nr)
+ {
++ trace_softirq_raise(nr);
++ or_softirq_pending(1UL << nr);
++
+ /*
+- * Prevent the current cpu from going offline.
+- * pin_current_cpu() can reenable preemption and block on the
+- * hotplug mutex. When it returns, the current cpu is
+- * pinned. It might be the wrong one, but the offline check
+- * below catches that.
++ * If we are not in a hard interrupt and inside a bh disabled
++ * region, we simply raise the flag on current. local_bh_enable()
++ * will make sure that the softirq is executed. Otherwise we
++ * delegate it to ksoftirqd.
+ */
+- pin_current_cpu();
++ if (!in_irq() && current->softirq_nestcnt)
++ current->softirqs_raised |= (1U << nr);
++ else if (__this_cpu_read(ksoftirqd))
++ __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr);
++}
++
++/*
++ * This function must run with irqs disabled!
++ */
++void raise_softirq_irqoff(unsigned int nr)
++{
++ __raise_softirq_irqoff(nr);
++
+ /*
+- * If called from ksoftirqd (cpu >= 0) we need to check
+- * whether we are on the wrong cpu due to cpu offlining. If
+- * called via thread_do_softirq() no action required.
++ * If we're in an hard interrupt we let irq return code deal
++ * with the wakeup of ksoftirqd.
+ */
+- if (cpu >= 0 && cpu_is_offline(cpu)) {
+- unpin_current_cpu();
+- return -1;
+- }
+- preempt_enable();
+- local_lock(local_softirq_lock);
+- local_irq_disable();
++ if (in_irq())
++ return;
++
+ /*
+- * We cannot switch stacks on RT as we want to be able to
+- * schedule!
++ * If we are in thread context but outside of a bh disabled
++ * region, we need to wake ksoftirqd as well.
++ *
++ * CHECKME: Some of the places which do that could be wrapped
++ * into local_bh_disable/enable pairs. Though it's unclear
++ * whether this is worth the effort. To find those places just
++ * raise a WARN() if the condition is met.
+ */
+- if (local_softirq_pending())
+- __do_softirq_common(cpu >= 0);
+- local_unlock(local_softirq_lock);
+- unpin_current_cpu();
+- preempt_disable();
+- local_irq_enable();
+- return 0;
++ if (!current->softirq_nestcnt)
++ wakeup_softirqd();
+ }
+
+-/*
+- * Called from netif_rx_ni(). Preemption enabled.
+- */
+-void thread_do_softirq(void)
++void do_raise_softirq_irqoff(unsigned int nr)
+ {
+- if (!in_serving_softirq()) {
+- preempt_disable();
+- __thread_do_softirq(-1);
+- preempt_enable();
+- }
++ raise_softirq_irqoff(nr);
+ }
+
+-static int ksoftirqd_do_softirq(int cpu)
++static inline int ksoftirqd_softirq_pending(void)
+ {
+- return __thread_do_softirq(cpu);
++ return current->softirqs_raised;
+ }
+
+ static inline void local_bh_disable_nort(void) { }
+@@ -528,6 +620,10 @@ static inline void ksoftirqd_set_sched_p
+ struct sched_param param = { .sched_priority = 1 };
+
+ sched_setscheduler(current, SCHED_FIFO, &param);
++ /* Take over all pending softirqs when starting */
++ local_irq_disable();
++ current->softirqs_raised = local_softirq_pending();
++ local_irq_enable();
+ }
+
+ static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online)
+@@ -574,8 +670,14 @@ static inline void invoke_softirq(void)
+ wakeup_softirqd();
+ __local_bh_enable(SOFTIRQ_OFFSET);
+ }
+-#else
++#else /* PREEMPT_RT_FULL */
++ unsigned long flags;
++
++ local_irq_save(flags);
++ if (__this_cpu_read(ksoftirqd) &&
++ __this_cpu_read(ksoftirqd)->softirqs_raised)
+ wakeup_softirqd();
++ local_irq_restore(flags);
+ #endif
+ }
+
+@@ -599,26 +701,6 @@ void irq_exit(void)
+ sched_preempt_enable_no_resched();
+ }
+
+-/*
+- * This function must run with irqs disabled!
+- */
+-inline void raise_softirq_irqoff(unsigned int nr)
+-{
+- __raise_softirq_irqoff(nr);
+-
+- /*
+- * If we're in an interrupt or softirq, we're done
+- * (this also catches softirq-disabled code). We will
+- * actually run the softirq once we return from
+- * the irq or softirq.
+- *
+- * Otherwise we wake up ksoftirqd to make sure we
+- * schedule the softirq soon.
+- */
+- if (!in_interrupt())
+- wakeup_softirqd();
+-}
+-
+ void raise_softirq(unsigned int nr)
+ {
+ unsigned long flags;
+@@ -628,12 +710,6 @@ void raise_softirq(unsigned int nr)
+ local_irq_restore(flags);
+ }
+
+-void __raise_softirq_irqoff(unsigned int nr)
+-{
+- trace_softirq_raise(nr);
+- or_softirq_pending(1UL << nr);
+-}
+-
+ void open_softirq(int nr, void (*action)(struct softirq_action *))
+ {
+ softirq_vec[nr].action = action;
+@@ -1079,20 +1155,7 @@ EXPORT_SYMBOL(tasklet_unlock_wait);
+
+ static int ksoftirqd_should_run(unsigned int cpu)
+ {
+- return local_softirq_pending();
+-}
+-
+-static void run_ksoftirqd(unsigned int cpu)
+-{
+- local_irq_disable();
+- if (local_softirq_pending()) {
+- __do_softirq();
+- rcu_note_context_switch(cpu);
+- local_irq_enable();
+- cond_resched();
+- return;
+- }
+- local_irq_enable();
++ return ksoftirqd_softirq_pending();
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
diff --git a/patches/softirq-split-out-code.patch b/patches/softirq-split-out-code.patch
new file mode 100644
index 0000000..cefcab2
--- /dev/null
+++ b/patches/softirq-split-out-code.patch
@@ -0,0 +1,101 @@
+Subject: softirq-split-out-code.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 28 Jun 2011 15:46:49 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/softirq.c | 62 ++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 30 insertions(+), 32 deletions(-)
+
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -77,6 +77,34 @@ static void wakeup_softirqd(void)
+ wake_up_process(tsk);
+ }
+
++static void handle_pending_softirqs(u32 pending, int cpu)
++{
++ struct softirq_action *h = softirq_vec;
++ unsigned int prev_count = preempt_count();
++
++ local_irq_enable();
++ for ( ; pending; h++, pending >>= 1) {
++ unsigned int vec_nr = h - softirq_vec;
++
++ if (!(pending & 1))
++ continue;
++
++ kstat_incr_softirqs_this_cpu(vec_nr);
++ trace_softirq_entry(vec_nr);
++ h->action(h);
++ trace_softirq_exit(vec_nr);
++ if (unlikely(prev_count != preempt_count())) {
++ printk(KERN_ERR
++ "huh, entered softirq %u %s %p with preempt_count %08x exited with %08x?\n",
++ vec_nr, softirq_to_name[vec_nr], h->action,
++ prev_count, (unsigned int) preempt_count());
++ preempt_count() = prev_count;
++ }
++ rcu_bh_qs(cpu);
++ }
++ local_irq_disable();
++}
++
+ /*
+ * preempt_count and SOFTIRQ_OFFSET usage:
+ * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
+@@ -207,7 +235,6 @@ EXPORT_SYMBOL(local_bh_enable_ip);
+
+ asmlinkage void __do_softirq(void)
+ {
+- struct softirq_action *h;
+ __u32 pending;
+ int max_restart = MAX_SOFTIRQ_RESTART;
+ int cpu;
+@@ -224,7 +251,7 @@ asmlinkage void __do_softirq(void)
+ vtime_account_irq_enter(current);
+
+ __local_bh_disable((unsigned long)__builtin_return_address(0),
+- SOFTIRQ_OFFSET);
++ SOFTIRQ_OFFSET);
+ lockdep_softirq_enter();
+
+ cpu = smp_processor_id();
+@@ -232,36 +259,7 @@ restart:
+ /* Reset the pending bitmask before enabling irqs */
+ set_softirq_pending(0);
+
+- local_irq_enable();
+-
+- h = softirq_vec;
+-
+- do {
+- if (pending & 1) {
+- unsigned int vec_nr = h - softirq_vec;
+- int prev_count = preempt_count();
+-
+- kstat_incr_softirqs_this_cpu(vec_nr);
+-
+- trace_softirq_entry(vec_nr);
+- h->action(h);
+- trace_softirq_exit(vec_nr);
+- if (unlikely(prev_count != preempt_count())) {
+- printk(KERN_ERR "huh, entered softirq %u %s %p"
+- "with preempt_count %08x,"
+- " exited with %08x?\n", vec_nr,
+- softirq_to_name[vec_nr], h->action,
+- prev_count, preempt_count());
+- preempt_count() = prev_count;
+- }
+-
+- rcu_bh_qs(cpu);
+- }
+- h++;
+- pending >>= 1;
+- } while (pending);
+-
+- local_irq_disable();
++ handle_pending_softirqs(pending, cpu);
+
+ pending = local_softirq_pending();
+ if (pending && --max_restart)
diff --git a/patches/softirq-thread-do-softirq.patch b/patches/softirq-thread-do-softirq.patch
new file mode 100644
index 0000000..8db1db6
--- /dev/null
+++ b/patches/softirq-thread-do-softirq.patch
@@ -0,0 +1,31 @@
+Subject: softirq-thread-do-softirq.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 28 Jun 2011 15:44:15 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/interrupt.h | 1 +
+ net/core/dev.c | 2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -443,6 +443,7 @@ struct softirq_action
+
+ asmlinkage void do_softirq(void);
+ asmlinkage void __do_softirq(void);
++static inline void thread_do_softirq(void) { do_softirq(); }
+ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
+ extern void softirq_init(void);
+ extern void __raise_softirq_irqoff(unsigned int nr);
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3118,7 +3118,7 @@ int netif_rx_ni(struct sk_buff *skb)
+ preempt_disable();
+ err = netif_rx(skb);
+ if (local_softirq_pending())
+- do_softirq();
++ thread_do_softirq();
+ preempt_enable();
+
+ return err;
diff --git a/patches/spi-omap-mcspi-check-condition-also-after-timeout.patch b/patches/spi-omap-mcspi-check-condition-also-after-timeout.patch
new file mode 100644
index 0000000..136be4a
--- /dev/null
+++ b/patches/spi-omap-mcspi-check-condition-also-after-timeout.patch
@@ -0,0 +1,33 @@
+From 65ef175b74710f70b6d89794c261e017f6f5d9ec Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 21 Mar 2013 12:46:49 +0100
+Subject: [PATCH 3/3] spi/omap-mcspi: check condition also after timeout
+
+It is possible that the handler gets interrupted after checking the
+status. After it resumes it the time out is due but the condition it was
+waiting for might be true. Therefore it is necessary to check the
+condition in case of an time out to be sure that the condition is not
+true _and_ the time passed by.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/spi/spi-omap2-mcspi.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/spi/spi-omap2-mcspi.c
++++ b/drivers/spi/spi-omap2-mcspi.c
+@@ -285,8 +285,12 @@ static int mcspi_wait_for_reg_bit(void _
+
+ timeout = jiffies + msecs_to_jiffies(1000);
+ while (!(__raw_readl(reg) & bit)) {
+- if (time_after(jiffies, timeout))
+- return -1;
++ if (time_after(jiffies, timeout)) {
++ if (!(__raw_readl(reg) & bit))
++ return -ETIMEDOUT;
++ else
++ return 0;
++ }
+ cpu_relax();
+ }
+ return 0;
diff --git a/patches/spinlock-types-separate-raw.patch b/patches/spinlock-types-separate-raw.patch
new file mode 100644
index 0000000..6611773
--- /dev/null
+++ b/patches/spinlock-types-separate-raw.patch
@@ -0,0 +1,204 @@
+Subject: spinlock-types-separate-raw.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 19:34:01 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rwlock_types.h | 4 +
+ include/linux/spinlock_types.h | 74 ------------------------------------
+ include/linux/spinlock_types_nort.h | 33 ++++++++++++++++
+ include/linux/spinlock_types_raw.h | 56 +++++++++++++++++++++++++++
+ 4 files changed, 95 insertions(+), 72 deletions(-)
+
+--- a/include/linux/rwlock_types.h
++++ b/include/linux/rwlock_types.h
+@@ -1,6 +1,10 @@
+ #ifndef __LINUX_RWLOCK_TYPES_H
+ #define __LINUX_RWLOCK_TYPES_H
+
++#if !defined(__LINUX_SPINLOCK_TYPES_H)
++# error "Do not include directly, include spinlock_types.h"
++#endif
++
+ /*
+ * include/linux/rwlock_types.h - generic rwlock type definitions
+ * and initializers
+--- a/include/linux/spinlock_types.h
++++ b/include/linux/spinlock_types.h
+@@ -9,79 +9,9 @@
+ * Released under the General Public License (GPL).
+ */
+
+-#if defined(CONFIG_SMP)
+-# include <asm/spinlock_types.h>
+-#else
+-# include <linux/spinlock_types_up.h>
+-#endif
++#include <linux/spinlock_types_raw.h>
+
+-#include <linux/lockdep.h>
+-
+-typedef struct raw_spinlock {
+- arch_spinlock_t raw_lock;
+-#ifdef CONFIG_GENERIC_LOCKBREAK
+- unsigned int break_lock;
+-#endif
+-#ifdef CONFIG_DEBUG_SPINLOCK
+- unsigned int magic, owner_cpu;
+- void *owner;
+-#endif
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+- struct lockdep_map dep_map;
+-#endif
+-} raw_spinlock_t;
+-
+-#define SPINLOCK_MAGIC 0xdead4ead
+-
+-#define SPINLOCK_OWNER_INIT ((void *)-1L)
+-
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
+-#else
+-# define SPIN_DEP_MAP_INIT(lockname)
+-#endif
+-
+-#ifdef CONFIG_DEBUG_SPINLOCK
+-# define SPIN_DEBUG_INIT(lockname) \
+- .magic = SPINLOCK_MAGIC, \
+- .owner_cpu = -1, \
+- .owner = SPINLOCK_OWNER_INIT,
+-#else
+-# define SPIN_DEBUG_INIT(lockname)
+-#endif
+-
+-#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
+- { \
+- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
+- SPIN_DEBUG_INIT(lockname) \
+- SPIN_DEP_MAP_INIT(lockname) }
+-
+-#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
+- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
+-
+-#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
+-
+-typedef struct spinlock {
+- union {
+- struct raw_spinlock rlock;
+-
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
+- struct {
+- u8 __padding[LOCK_PADSIZE];
+- struct lockdep_map dep_map;
+- };
+-#endif
+- };
+-} spinlock_t;
+-
+-#define __SPIN_LOCK_INITIALIZER(lockname) \
+- { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
+-
+-#define __SPIN_LOCK_UNLOCKED(lockname) \
+- (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
+-
+-#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
++#include <linux/spinlock_types_nort.h>
+
+ #include <linux/rwlock_types.h>
+
+--- /dev/null
++++ b/include/linux/spinlock_types_nort.h
+@@ -0,0 +1,33 @@
++#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
++#define __LINUX_SPINLOCK_TYPES_NORT_H
++
++#ifndef __LINUX_SPINLOCK_TYPES_H
++#error "Do not include directly. Include spinlock_types.h instead"
++#endif
++
++/*
++ * The non RT version maps spinlocks to raw_spinlocks
++ */
++typedef struct spinlock {
++ union {
++ struct raw_spinlock rlock;
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
++ struct {
++ u8 __padding[LOCK_PADSIZE];
++ struct lockdep_map dep_map;
++ };
++#endif
++ };
++} spinlock_t;
++
++#define __SPIN_LOCK_INITIALIZER(lockname) \
++ { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
++
++#define __SPIN_LOCK_UNLOCKED(lockname) \
++ (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
++
++#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
++
++#endif
+--- /dev/null
++++ b/include/linux/spinlock_types_raw.h
+@@ -0,0 +1,56 @@
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
++#define __LINUX_SPINLOCK_TYPES_RAW_H
++
++#if defined(CONFIG_SMP)
++# include <asm/spinlock_types.h>
++#else
++# include <linux/spinlock_types_up.h>
++#endif
++
++#include <linux/lockdep.h>
++
++typedef struct raw_spinlock {
++ arch_spinlock_t raw_lock;
++#ifdef CONFIG_GENERIC_LOCKBREAK
++ unsigned int break_lock;
++#endif
++#ifdef CONFIG_DEBUG_SPINLOCK
++ unsigned int magic, owner_cpu;
++ void *owner;
++#endif
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
++} raw_spinlock_t;
++
++#define SPINLOCK_MAGIC 0xdead4ead
++
++#define SPINLOCK_OWNER_INIT ((void *)-1L)
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
++#else
++# define SPIN_DEP_MAP_INIT(lockname)
++#endif
++
++#ifdef CONFIG_DEBUG_SPINLOCK
++# define SPIN_DEBUG_INIT(lockname) \
++ .magic = SPINLOCK_MAGIC, \
++ .owner_cpu = -1, \
++ .owner = SPINLOCK_OWNER_INIT,
++#else
++# define SPIN_DEBUG_INIT(lockname)
++#endif
++
++#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
++ { \
++ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
++ SPIN_DEBUG_INIT(lockname) \
++ SPIN_DEP_MAP_INIT(lockname) }
++
++#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
++ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
++
++#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
++
++#endif
diff --git a/patches/stomp-machine-deal-clever-with-stopper-lock.patch b/patches/stomp-machine-deal-clever-with-stopper-lock.patch
new file mode 100644
index 0000000..eaa9d4c
--- /dev/null
+++ b/patches/stomp-machine-deal-clever-with-stopper-lock.patch
@@ -0,0 +1,58 @@
+Subject: stomp_machine: Use mutex_trylock when called from inactive cpu
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 03 Oct 2012 17:21:53 +0100
+
+If the stop machinery is called from inactive CPU we cannot use
+mutex_lock, because some other stomp machine invokation might be in
+progress and the mutex can be contended. We cannot schedule from this
+context, so trylock and loop.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ kernel/stop_machine.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/kernel/stop_machine.c
++++ b/kernel/stop_machine.c
+@@ -158,7 +158,7 @@ static DEFINE_PER_CPU(struct cpu_stop_wo
+
+ static void queue_stop_cpus_work(const struct cpumask *cpumask,
+ cpu_stop_fn_t fn, void *arg,
+- struct cpu_stop_done *done)
++ struct cpu_stop_done *done, bool inactive)
+ {
+ struct cpu_stop_work *work;
+ unsigned int cpu;
+@@ -175,7 +175,12 @@ static void queue_stop_cpus_work(const s
+ * Make sure that all work is queued on all cpus before we
+ * any of the cpus can execute it.
+ */
+- mutex_lock(&stopper_lock);
++ if (!inactive) {
++ mutex_lock(&stopper_lock);
++ } else {
++ while (!mutex_trylock(&stopper_lock))
++ cpu_relax();
++ }
+ for_each_cpu(cpu, cpumask)
+ cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
+ &per_cpu(stop_cpus_work, cpu));
+@@ -188,7 +193,7 @@ static int __stop_cpus(const struct cpum
+ struct cpu_stop_done done;
+
+ cpu_stop_init_done(&done, cpumask_weight(cpumask));
+- queue_stop_cpus_work(cpumask, fn, arg, &done);
++ queue_stop_cpus_work(cpumask, fn, arg, &done, false);
+ wait_for_stop_done(&done);
+ return done.executed ? done.ret : -ENOENT;
+ }
+@@ -601,7 +606,7 @@ int stop_machine_from_inactive_cpu(int (
+ set_state(&smdata, STOPMACHINE_PREPARE);
+ cpu_stop_init_done(&done, num_active_cpus());
+ queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
+- &done);
++ &done, true);
+ ret = stop_machine_cpu_stop(&smdata);
+
+ /* Busy wait for completion. */
diff --git a/patches/stomp-machine-mark-stomper-thread.patch b/patches/stomp-machine-mark-stomper-thread.patch
new file mode 100644
index 0000000..5379d29
--- /dev/null
+++ b/patches/stomp-machine-mark-stomper-thread.patch
@@ -0,0 +1,30 @@
+Subject: stomp-machine-mark-stomper-thread.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 19:53:19 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched.h | 1 +
+ kernel/stop_machine.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1859,6 +1859,7 @@ extern void thread_group_cputime_adjuste
+ #define PF_FROZEN 0x00010000 /* frozen for system suspend */
+ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
+ #define PF_KSWAPD 0x00040000 /* I am kswapd */
++#define PF_STOMPER 0x00080000 /* I am a stomp machine thread */
+ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
+ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */
+ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
+--- a/kernel/stop_machine.c
++++ b/kernel/stop_machine.c
+@@ -327,6 +327,7 @@ static int __cpuinit cpu_stop_cpu_callba
+ if (IS_ERR(p))
+ return notifier_from_errno(PTR_ERR(p));
+ get_task_struct(p);
++ p->flags |= PF_STOMPER;
+ kthread_bind(p, cpu);
+ sched_set_stop_task(cpu, p);
+ stopper->thread = p;
diff --git a/patches/stomp-machine-raw-lock.patch b/patches/stomp-machine-raw-lock.patch
new file mode 100644
index 0000000..e11a9a1
--- /dev/null
+++ b/patches/stomp-machine-raw-lock.patch
@@ -0,0 +1,174 @@
+Subject: stomp-machine-raw-lock.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 29 Jun 2011 11:01:51 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/stop_machine.c | 58 +++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 41 insertions(+), 17 deletions(-)
+
+--- a/kernel/stop_machine.c
++++ b/kernel/stop_machine.c
+@@ -29,12 +29,12 @@ struct cpu_stop_done {
+ atomic_t nr_todo; /* nr left to execute */
+ bool executed; /* actually executed? */
+ int ret; /* collected return value */
+- struct completion completion; /* fired if nr_todo reaches 0 */
++ struct task_struct *waiter; /* woken when nr_todo reaches 0 */
+ };
+
+ /* the actual stopper, one per every possible cpu, enabled on online cpus */
+ struct cpu_stopper {
+- spinlock_t lock;
++ raw_spinlock_t lock;
+ bool enabled; /* is this stopper enabled? */
+ struct list_head works; /* list of pending works */
+ struct task_struct *thread; /* stopper thread */
+@@ -47,7 +47,7 @@ static void cpu_stop_init_done(struct cp
+ {
+ memset(done, 0, sizeof(*done));
+ atomic_set(&done->nr_todo, nr_todo);
+- init_completion(&done->completion);
++ done->waiter = current;
+ }
+
+ /* signal completion unless @done is NULL */
+@@ -56,8 +56,10 @@ static void cpu_stop_signal_done(struct
+ if (done) {
+ if (executed)
+ done->executed = true;
+- if (atomic_dec_and_test(&done->nr_todo))
+- complete(&done->completion);
++ if (atomic_dec_and_test(&done->nr_todo)) {
++ wake_up_process(done->waiter);
++ done->waiter = NULL;
++ }
+ }
+ }
+
+@@ -67,7 +69,7 @@ static void cpu_stop_queue_work(struct c
+ {
+ unsigned long flags;
+
+- spin_lock_irqsave(&stopper->lock, flags);
++ raw_spin_lock_irqsave(&stopper->lock, flags);
+
+ if (stopper->enabled) {
+ list_add_tail(&work->list, &stopper->works);
+@@ -75,7 +77,23 @@ static void cpu_stop_queue_work(struct c
+ } else
+ cpu_stop_signal_done(work->done, false);
+
+- spin_unlock_irqrestore(&stopper->lock, flags);
++ raw_spin_unlock_irqrestore(&stopper->lock, flags);
++}
++
++static void wait_for_stop_done(struct cpu_stop_done *done)
++{
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ while (atomic_read(&done->nr_todo)) {
++ schedule();
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ }
++ /*
++ * We need to wait until cpu_stop_signal_done() has cleared
++ * done->waiter.
++ */
++ while (done->waiter)
++ cpu_relax();
++ set_current_state(TASK_RUNNING);
+ }
+
+ /**
+@@ -109,7 +127,7 @@ int stop_one_cpu(unsigned int cpu, cpu_s
+
+ cpu_stop_init_done(&done, 1);
+ cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
+- wait_for_completion(&done.completion);
++ wait_for_stop_done(&done);
+ return done.executed ? done.ret : -ENOENT;
+ }
+
+@@ -171,7 +189,7 @@ static int __stop_cpus(const struct cpum
+
+ cpu_stop_init_done(&done, cpumask_weight(cpumask));
+ queue_stop_cpus_work(cpumask, fn, arg, &done);
+- wait_for_completion(&done.completion);
++ wait_for_stop_done(&done);
+ return done.executed ? done.ret : -ENOENT;
+ }
+
+@@ -259,13 +277,13 @@ repeat:
+ }
+
+ work = NULL;
+- spin_lock_irq(&stopper->lock);
++ raw_spin_lock_irq(&stopper->lock);
+ if (!list_empty(&stopper->works)) {
+ work = list_first_entry(&stopper->works,
+ struct cpu_stop_work, list);
+ list_del_init(&work->list);
+ }
+- spin_unlock_irq(&stopper->lock);
++ raw_spin_unlock_irq(&stopper->lock);
+
+ if (work) {
+ cpu_stop_fn_t fn = work->fn;
+@@ -299,7 +317,13 @@ repeat:
+ kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
+ ksym_buf), arg);
+
++ /*
++ * Make sure that the wakeup and setting done->waiter
++ * to NULL is atomic.
++ */
++ local_irq_disable();
+ cpu_stop_signal_done(done, true);
++ local_irq_enable();
+ } else
+ schedule();
+
+@@ -337,9 +361,9 @@ static int __cpuinit cpu_stop_cpu_callba
+ /* strictly unnecessary, as first user will wake it */
+ wake_up_process(stopper->thread);
+ /* mark enabled */
+- spin_lock_irq(&stopper->lock);
++ raw_spin_lock_irq(&stopper->lock);
+ stopper->enabled = true;
+- spin_unlock_irq(&stopper->lock);
++ raw_spin_unlock_irq(&stopper->lock);
+ break;
+
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -352,11 +376,11 @@ static int __cpuinit cpu_stop_cpu_callba
+ /* kill the stopper */
+ kthread_stop(stopper->thread);
+ /* drain remaining works */
+- spin_lock_irq(&stopper->lock);
++ raw_spin_lock_irq(&stopper->lock);
+ list_for_each_entry(work, &stopper->works, list)
+ cpu_stop_signal_done(work->done, false);
+ stopper->enabled = false;
+- spin_unlock_irq(&stopper->lock);
++ raw_spin_unlock_irq(&stopper->lock);
+ /* release the stopper */
+ put_task_struct(stopper->thread);
+ stopper->thread = NULL;
+@@ -387,7 +411,7 @@ static int __init cpu_stop_init(void)
+ for_each_possible_cpu(cpu) {
+ struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+
+- spin_lock_init(&stopper->lock);
++ raw_spin_lock_init(&stopper->lock);
+ INIT_LIST_HEAD(&stopper->works);
+ }
+
+@@ -581,7 +605,7 @@ int stop_machine_from_inactive_cpu(int (
+ ret = stop_machine_cpu_stop(&smdata);
+
+ /* Busy wait for completion. */
+- while (!completion_done(&done.completion))
++ while (atomic_read(&done.nr_todo))
+ cpu_relax();
+
+ mutex_unlock(&stop_cpus_mutex);
diff --git a/patches/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch b/patches/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch
new file mode 100644
index 0000000..f196a81
--- /dev/null
+++ b/patches/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch
@@ -0,0 +1,62 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:27 -0500
+Subject: stop_machine: convert stop_machine_run() to PREEMPT_RT
+
+Instead of playing with non-preemption, introduce explicit
+startup serialization. This is more robust and cleaner as
+well.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/stop_machine.c | 20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/kernel/stop_machine.c
++++ b/kernel/stop_machine.c
+@@ -135,6 +135,7 @@ void stop_one_cpu_nowait(unsigned int cp
+
+ /* static data for stop_cpus */
+ static DEFINE_MUTEX(stop_cpus_mutex);
++static DEFINE_MUTEX(stopper_lock);
+ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+
+ static void queue_stop_cpus_work(const struct cpumask *cpumask,
+@@ -153,15 +154,14 @@ static void queue_stop_cpus_work(const s
+ }
+
+ /*
+- * Disable preemption while queueing to avoid getting
+- * preempted by a stopper which might wait for other stoppers
+- * to enter @fn which can lead to deadlock.
++ * Make sure that all work is queued on all cpus before we
++ * any of the cpus can execute it.
+ */
+- preempt_disable();
++ mutex_lock(&stopper_lock);
+ for_each_cpu(cpu, cpumask)
+ cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
+ &per_cpu(stop_cpus_work, cpu));
+- preempt_enable();
++ mutex_unlock(&stopper_lock);
+ }
+
+ static int __stop_cpus(const struct cpumask *cpumask,
+@@ -275,6 +275,16 @@ repeat:
+
+ __set_current_state(TASK_RUNNING);
+
++ /*
++ * Wait until the stopper finished scheduling on all
++ * cpus
++ */
++ mutex_lock(&stopper_lock);
++ /*
++ * Let other cpu threads continue as well
++ */
++ mutex_unlock(&stopper_lock);
++
+ /* cpu stop callbacks are not allowed to sleep */
+ preempt_disable();
+
diff --git a/patches/suspend-prevernt-might-sleep-splats.patch b/patches/suspend-prevernt-might-sleep-splats.patch
new file mode 100644
index 0000000..feaefdb
--- /dev/null
+++ b/patches/suspend-prevernt-might-sleep-splats.patch
@@ -0,0 +1,106 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 15 Jul 2010 10:29:00 +0200
+Subject: suspend: Prevent might sleep splats
+
+timekeeping suspend/resume calls read_persistant_clock() which takes
+rtc_lock. That results in might sleep warnings because at that point
+we run with interrupts disabled.
+
+We cannot convert rtc_lock to a raw spinlock as that would trigger
+other might sleep warnings.
+
+As a temporary workaround we disable the might sleep warnings by
+setting system_state to SYSTEM_SUSPEND before calling sysdev_suspend()
+and restoring it to SYSTEM_RUNNING afer sysdev_resume().
+
+Needs to be revisited.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/kernel.h | 1 +
+ kernel/power/hibernate.c | 7 +++++++
+ kernel/power/suspend.c | 4 ++++
+ 3 files changed, 12 insertions(+)
+
+--- a/include/linux/kernel.h
++++ b/include/linux/kernel.h
+@@ -412,6 +412,7 @@ extern enum system_states {
+ SYSTEM_HALT,
+ SYSTEM_POWER_OFF,
+ SYSTEM_RESTART,
++ SYSTEM_SUSPEND,
+ } system_state;
+
+ #define TAINT_PROPRIETARY_MODULE 0
+--- a/kernel/power/hibernate.c
++++ b/kernel/power/hibernate.c
+@@ -275,6 +275,8 @@ static int create_image(int platform_mod
+
+ local_irq_disable();
+
++ system_state = SYSTEM_SUSPEND;
++
+ error = syscore_suspend();
+ if (error) {
+ printk(KERN_ERR "PM: Some system devices failed to power down, "
+@@ -302,6 +304,7 @@ static int create_image(int platform_mod
+ syscore_resume();
+
+ Enable_irqs:
++ system_state = SYSTEM_RUNNING;
+ local_irq_enable();
+
+ Enable_cpus:
+@@ -427,6 +430,7 @@ static int resume_target_kernel(bool pla
+ goto Enable_cpus;
+
+ local_irq_disable();
++ system_state = SYSTEM_SUSPEND;
+
+ error = syscore_suspend();
+ if (error)
+@@ -460,6 +464,7 @@ static int resume_target_kernel(bool pla
+ syscore_resume();
+
+ Enable_irqs:
++ system_state = SYSTEM_RUNNING;
+ local_irq_enable();
+
+ Enable_cpus:
+@@ -542,6 +547,7 @@ int hibernation_platform_enter(void)
+ goto Platform_finish;
+
+ local_irq_disable();
++ system_state = SYSTEM_SUSPEND;
+ syscore_suspend();
+ if (pm_wakeup_pending()) {
+ error = -EAGAIN;
+@@ -554,6 +560,7 @@ int hibernation_platform_enter(void)
+
+ Power_up:
+ syscore_resume();
++ system_state = SYSTEM_RUNNING;
+ local_irq_enable();
+ enable_nonboot_cpus();
+
+--- a/kernel/power/suspend.c
++++ b/kernel/power/suspend.c
+@@ -165,6 +165,8 @@ static int suspend_enter(suspend_state_t
+ arch_suspend_disable_irqs();
+ BUG_ON(!irqs_disabled());
+
++ system_state = SYSTEM_SUSPEND;
++
+ error = syscore_suspend();
+ if (!error) {
+ *wakeup = pm_wakeup_pending();
+@@ -175,6 +177,8 @@ static int suspend_enter(suspend_state_t
+ syscore_resume();
+ }
+
++ system_state = SYSTEM_RUNNING;
++
+ arch_suspend_enable_irqs();
+ BUG_ON(irqs_disabled());
+
diff --git a/patches/sysctl-include-atomic-h.patch b/patches/sysctl-include-atomic-h.patch
new file mode 100644
index 0000000..2e51098
--- /dev/null
+++ b/patches/sysctl-include-atomic-h.patch
@@ -0,0 +1,19 @@
+Subject: sysctl-include-fix.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 14 Nov 2011 10:52:34 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sysctl.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/linux/sysctl.h
++++ b/include/linux/sysctl.h
+@@ -25,6 +25,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/wait.h>
+ #include <linux/rbtree.h>
++#include <linux/atomic.h>
+ #include <uapi/linux/sysctl.h>
+
+ /* For the /proc/sys support */
diff --git a/patches/sysfs-realtime-entry.patch b/patches/sysfs-realtime-entry.patch
new file mode 100644
index 0000000..4889c6b
--- /dev/null
+++ b/patches/sysfs-realtime-entry.patch
@@ -0,0 +1,47 @@
+Subject: add /sys/kernel/realtime entry
+From: Clark Williams <williams@redhat.com>
+Date: Sat Jul 30 21:55:53 2011 -0500
+
+Add a /sys/kernel entry to indicate that the kernel is a
+realtime kernel.
+
+Clark says that he needs this for udev rules, udev needs to evaluate
+if its a PREEMPT_RT kernel a few thousand times and parsing uname
+output is too slow or so.
+
+Are there better solutions? Should it exist and return 0 on !-rt?
+
+Signed-off-by: Clark Williams <williams@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+---
+ kernel/ksysfs.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/kernel/ksysfs.c
++++ b/kernel/ksysfs.c
+@@ -132,6 +132,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
+
+ #endif /* CONFIG_KEXEC */
+
++#if defined(CONFIG_PREEMPT_RT_FULL)
++static ssize_t realtime_show(struct kobject *kobj,
++ struct kobj_attribute *attr, char *buf)
++{
++ return sprintf(buf, "%d\n", 1);
++}
++KERNEL_ATTR_RO(realtime);
++#endif
++
+ /* whether file capabilities are enabled */
+ static ssize_t fscaps_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+@@ -197,6 +206,9 @@ static struct attribute * kernel_attrs[]
+ &vmcoreinfo_attr.attr,
+ #endif
+ &rcu_expedited_attr.attr,
++#ifdef CONFIG_PREEMPT_RT_FULL
++ &realtime_attr.attr,
++#endif
+ NULL
+ };
+
diff --git a/patches/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch b/patches/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch
new file mode 100644
index 0000000..f0bbd80
--- /dev/null
+++ b/patches/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch
@@ -0,0 +1,402 @@
+Subject: tasklet: Prevent tasklets from going into infinite spin in RT
+From: Ingo Molnar <mingo@elte.hu>
+Date: Tue Nov 29 20:18:22 2011 -0500
+
+When CONFIG_PREEMPT_RT_FULL is enabled, tasklets run as threads,
+and spinlocks turn are mutexes. But this can cause issues with
+tasks disabling tasklets. A tasklet runs under ksoftirqd, and
+if a tasklets are disabled with tasklet_disable(), the tasklet
+count is increased. When a tasklet runs, it checks this counter
+and if it is set, it adds itself back on the softirq queue and
+returns.
+
+The problem arises in RT because ksoftirq will see that a softirq
+is ready to run (the tasklet softirq just re-armed itself), and will
+not sleep, but instead run the softirqs again. The tasklet softirq
+will still see that the count is non-zero and will not execute
+the tasklet and requeue itself on the softirq again, which will
+cause ksoftirqd to run it again and again and again.
+
+It gets worse because ksoftirqd runs as a real-time thread.
+If it preempted the task that disabled tasklets, and that task
+has migration disabled, or can't run for other reasons, the tasklet
+softirq will never run because the count will never be zero, and
+ksoftirqd will go into an infinite loop. As an RT task, it this
+becomes a big problem.
+
+This is a hack solution to have tasklet_disable stop tasklets, and
+when a tasklet runs, instead of requeueing the tasklet softirqd
+it delays it. When tasklet_enable() is called, and tasklets are
+waiting, then the tasklet_enable() will kick the tasklets to continue.
+This prevents the lock up from ksoftirq going into an infinite loop.
+
+[ rostedt@goodmis.org: ported to 3.0-rt ]
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/interrupt.h | 39 ++++----
+ kernel/softirq.c | 208 +++++++++++++++++++++++++++++++++-------------
+ 2 files changed, 170 insertions(+), 77 deletions(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -502,8 +502,9 @@ extern void __send_remote_softirq(struct
+ to be executed on some cpu at least once after this.
+ * If the tasklet is already scheduled, but its execution is still not
+ started, it will be executed only once.
+- * If this tasklet is already running on another CPU (or schedule is called
+- from tasklet itself), it is rescheduled for later.
++ * If this tasklet is already running on another CPU, it is rescheduled
++ for later.
++ * Schedule must not be called from the tasklet itself (a lockup occurs)
+ * Tasklet is strictly serialized wrt itself, but not
+ wrt another tasklets. If client needs some intertask synchronization,
+ he makes it with spinlocks.
+@@ -528,27 +529,36 @@ struct tasklet_struct name = { NULL, 0,
+ enum
+ {
+ TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
+- TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
++ TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
++ TASKLET_STATE_PENDING /* Tasklet is pending */
+ };
+
+-#ifdef CONFIG_SMP
++#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
++#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
++#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
++
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
+ static inline int tasklet_trylock(struct tasklet_struct *t)
+ {
+ return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
+ }
+
++static inline int tasklet_tryunlock(struct tasklet_struct *t)
++{
++ return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
++}
++
+ static inline void tasklet_unlock(struct tasklet_struct *t)
+ {
+ smp_mb__before_clear_bit();
+ clear_bit(TASKLET_STATE_RUN, &(t)->state);
+ }
+
+-static inline void tasklet_unlock_wait(struct tasklet_struct *t)
+-{
+- while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
+-}
++extern void tasklet_unlock_wait(struct tasklet_struct *t);
++
+ #else
+ #define tasklet_trylock(t) 1
++#define tasklet_tryunlock(t) 1
+ #define tasklet_unlock_wait(t) do { } while (0)
+ #define tasklet_unlock(t) do { } while (0)
+ #endif
+@@ -597,17 +607,8 @@ static inline void tasklet_disable(struc
+ smp_mb();
+ }
+
+-static inline void tasklet_enable(struct tasklet_struct *t)
+-{
+- smp_mb__before_atomic_dec();
+- atomic_dec(&t->count);
+-}
+-
+-static inline void tasklet_hi_enable(struct tasklet_struct *t)
+-{
+- smp_mb__before_atomic_dec();
+- atomic_dec(&t->count);
+-}
++extern void tasklet_enable(struct tasklet_struct *t);
++extern void tasklet_hi_enable(struct tasklet_struct *t);
+
+ extern void tasklet_kill(struct tasklet_struct *t);
+ extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -21,6 +21,7 @@
+ #include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/rcupdate.h>
++#include <linux/delay.h>
+ #include <linux/ftrace.h>
+ #include <linux/smp.h>
+ #include <linux/smpboot.h>
+@@ -648,15 +649,45 @@ struct tasklet_head
+ static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
+ static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
+
++static void inline
++__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
++{
++ if (tasklet_trylock(t)) {
++again:
++ /* We may have been preempted before tasklet_trylock
++ * and __tasklet_action may have already run.
++ * So double check the sched bit while the takslet
++ * is locked before adding it to the list.
++ */
++ if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
++ t->next = NULL;
++ *head->tail = t;
++ head->tail = &(t->next);
++ raise_softirq_irqoff(nr);
++ tasklet_unlock(t);
++ } else {
++ /* This is subtle. If we hit the corner case above
++ * It is possible that we get preempted right here,
++ * and another task has successfully called
++ * tasklet_schedule(), then this function, and
++ * failed on the trylock. Thus we must be sure
++ * before releasing the tasklet lock, that the
++ * SCHED_BIT is clear. Otherwise the tasklet
++ * may get its SCHED_BIT set, but not added to the
++ * list
++ */
++ if (!tasklet_tryunlock(t))
++ goto again;
++ }
++ }
++}
++
+ void __tasklet_schedule(struct tasklet_struct *t)
+ {
+ unsigned long flags;
+
+ local_irq_save(flags);
+- t->next = NULL;
+- *__this_cpu_read(tasklet_vec.tail) = t;
+- __this_cpu_write(tasklet_vec.tail, &(t->next));
+- raise_softirq_irqoff(TASKLET_SOFTIRQ);
++ __tasklet_common_schedule(t, &__get_cpu_var(tasklet_vec), TASKLET_SOFTIRQ);
+ local_irq_restore(flags);
+ }
+
+@@ -667,10 +698,7 @@ void __tasklet_hi_schedule(struct taskle
+ unsigned long flags;
+
+ local_irq_save(flags);
+- t->next = NULL;
+- *__this_cpu_read(tasklet_hi_vec.tail) = t;
+- __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
+- raise_softirq_irqoff(HI_SOFTIRQ);
++ __tasklet_common_schedule(t, &__get_cpu_var(tasklet_hi_vec), HI_SOFTIRQ);
+ local_irq_restore(flags);
+ }
+
+@@ -678,50 +706,119 @@ EXPORT_SYMBOL(__tasklet_hi_schedule);
+
+ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
+ {
+- BUG_ON(!irqs_disabled());
+-
+- t->next = __this_cpu_read(tasklet_hi_vec.head);
+- __this_cpu_write(tasklet_hi_vec.head, t);
+- __raise_softirq_irqoff(HI_SOFTIRQ);
++ __tasklet_hi_schedule(t);
+ }
+
+ EXPORT_SYMBOL(__tasklet_hi_schedule_first);
+
+-static void tasklet_action(struct softirq_action *a)
++void tasklet_enable(struct tasklet_struct *t)
+ {
+- struct tasklet_struct *list;
++ if (!atomic_dec_and_test(&t->count))
++ return;
++ if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
++ tasklet_schedule(t);
++}
+
+- local_irq_disable();
+- list = __this_cpu_read(tasklet_vec.head);
+- __this_cpu_write(tasklet_vec.head, NULL);
+- __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
+- local_irq_enable();
++EXPORT_SYMBOL(tasklet_enable);
++
++void tasklet_hi_enable(struct tasklet_struct *t)
++{
++ if (!atomic_dec_and_test(&t->count))
++ return;
++ if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
++ tasklet_hi_schedule(t);
++}
++
++EXPORT_SYMBOL(tasklet_hi_enable);
++
++static void
++__tasklet_action(struct softirq_action *a, struct tasklet_struct *list)
++{
++ int loops = 1000000;
+
+ while (list) {
+ struct tasklet_struct *t = list;
+
+ list = list->next;
+
+- if (tasklet_trylock(t)) {
+- if (!atomic_read(&t->count)) {
+- if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+- BUG();
+- t->func(t->data);
+- tasklet_unlock(t);
+- continue;
+- }
+- tasklet_unlock(t);
++ /*
++ * Should always succeed - after a tasklist got on the
++ * list (after getting the SCHED bit set from 0 to 1),
++ * nothing but the tasklet softirq it got queued to can
++ * lock it:
++ */
++ if (!tasklet_trylock(t)) {
++ WARN_ON(1);
++ continue;
+ }
+
+- local_irq_disable();
+ t->next = NULL;
+- *__this_cpu_read(tasklet_vec.tail) = t;
+- __this_cpu_write(tasklet_vec.tail, &(t->next));
+- __raise_softirq_irqoff(TASKLET_SOFTIRQ);
+- local_irq_enable();
++
++ /*
++ * If we cannot handle the tasklet because it's disabled,
++ * mark it as pending. tasklet_enable() will later
++ * re-schedule the tasklet.
++ */
++ if (unlikely(atomic_read(&t->count))) {
++out_disabled:
++ /* implicit unlock: */
++ wmb();
++ t->state = TASKLET_STATEF_PENDING;
++ continue;
++ }
++
++ /*
++ * After this point on the tasklet might be rescheduled
++ * on another CPU, but it can only be added to another
++ * CPU's tasklet list if we unlock the tasklet (which we
++ * dont do yet).
++ */
++ if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
++ WARN_ON(1);
++
++again:
++ t->func(t->data);
++
++ /*
++ * Try to unlock the tasklet. We must use cmpxchg, because
++ * another CPU might have scheduled or disabled the tasklet.
++ * We only allow the STATE_RUN -> 0 transition here.
++ */
++ while (!tasklet_tryunlock(t)) {
++ /*
++ * If it got disabled meanwhile, bail out:
++ */
++ if (atomic_read(&t->count))
++ goto out_disabled;
++ /*
++ * If it got scheduled meanwhile, re-execute
++ * the tasklet function:
++ */
++ if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
++ goto again;
++ if (!--loops) {
++ printk("hm, tasklet state: %08lx\n", t->state);
++ WARN_ON(1);
++ tasklet_unlock(t);
++ break;
++ }
++ }
+ }
+ }
+
++static void tasklet_action(struct softirq_action *a)
++{
++ struct tasklet_struct *list;
++
++ local_irq_disable();
++ list = __get_cpu_var(tasklet_vec).head;
++ __get_cpu_var(tasklet_vec).head = NULL;
++ __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
++ local_irq_enable();
++
++ __tasklet_action(a, list);
++}
++
+ static void tasklet_hi_action(struct softirq_action *a)
+ {
+ struct tasklet_struct *list;
+@@ -732,29 +829,7 @@ static void tasklet_hi_action(struct sof
+ __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
+ local_irq_enable();
+
+- while (list) {
+- struct tasklet_struct *t = list;
+-
+- list = list->next;
+-
+- if (tasklet_trylock(t)) {
+- if (!atomic_read(&t->count)) {
+- if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+- BUG();
+- t->func(t->data);
+- tasklet_unlock(t);
+- continue;
+- }
+- tasklet_unlock(t);
+- }
+-
+- local_irq_disable();
+- t->next = NULL;
+- *__this_cpu_read(tasklet_hi_vec.tail) = t;
+- __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
+- __raise_softirq_irqoff(HI_SOFTIRQ);
+- local_irq_enable();
+- }
++ __tasklet_action(a, list);
+ }
+
+
+@@ -777,7 +852,7 @@ void tasklet_kill(struct tasklet_struct
+
+ while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+ do {
+- yield();
++ msleep(1);
+ } while (test_bit(TASKLET_STATE_SCHED, &t->state));
+ }
+ tasklet_unlock_wait(t);
+@@ -983,6 +1058,23 @@ void __init softirq_init(void)
+ open_softirq(HI_SOFTIRQ, tasklet_hi_action);
+ }
+
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
++void tasklet_unlock_wait(struct tasklet_struct *t)
++{
++ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
++ /*
++ * Hack for now to avoid this busy-loop:
++ */
++#ifdef CONFIG_PREEMPT_RT_FULL
++ msleep(1);
++#else
++ barrier();
++#endif
++ }
++}
++EXPORT_SYMBOL(tasklet_unlock_wait);
++#endif
++
+ static int ksoftirqd_should_run(unsigned int cpu)
+ {
+ return local_softirq_pending();
diff --git a/patches/tasklist-lock-fix-section-conflict.patch b/patches/tasklist-lock-fix-section-conflict.patch
new file mode 100644
index 0000000..48c0e4e
--- /dev/null
+++ b/patches/tasklist-lock-fix-section-conflict.patch
@@ -0,0 +1,57 @@
+Subject: rwlocks: Fix section mismatch
+From: John Kacur <jkacur@redhat.com>
+Date: Mon, 19 Sep 2011 11:09:27 +0200 (CEST)
+
+This fixes the following build error for the preempt-rt kernel.
+
+make kernel/fork.o
+ CC kernel/fork.o
+kernel/fork.c:90: error: section of ¡tasklist_lock¢ conflicts with previous declaration
+make[2]: *** [kernel/fork.o] Error 1
+make[1]: *** [kernel/fork.o] Error 2
+
+The rt kernel cache aligns the RWLOCK in DEFINE_RWLOCK by default.
+The non-rt kernels explicitly cache align only the tasklist_lock in
+kernel/fork.c
+That can create a build conflict. This fixes the build problem by making the
+non-rt kernels cache align RWLOCKs by default. The side effect is that
+the other RWLOCKs are also cache aligned for non-rt.
+
+This is a short term solution for rt only.
+The longer term solution would be to push the cache aligned DEFINE_RWLOCK
+to mainline. If there are objections, then we could create a
+DEFINE_RWLOCK_CACHE_ALIGNED or something of that nature.
+
+Comments? Objections?
+
+Signed-off-by: John Kacur <jkacur@redhat.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/alpine.LFD.2.00.1109191104010.23118@localhost6.localdomain6
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/rwlock_types.h | 3 ++-
+ kernel/fork.c | 2 +-
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/include/linux/rwlock_types.h
++++ b/include/linux/rwlock_types.h
+@@ -47,6 +47,7 @@ typedef struct {
+ RW_DEP_MAP_INIT(lockname) }
+ #endif
+
+-#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
++#define DEFINE_RWLOCK(name) \
++ rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
+
+ #endif /* __LINUX_RWLOCK_TYPES_H */
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -93,7 +93,7 @@ int max_threads; /* tunable limit on nr
+
+ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
+
+-__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
++DEFINE_RWLOCK(tasklist_lock); /* outer */
+
+ #ifdef CONFIG_PROVE_RCU
+ int lockdep_tasklist_lock_is_held(void)
diff --git a/patches/timekeeping-delay-clock-cycle-last-update.patch b/patches/timekeeping-delay-clock-cycle-last-update.patch
new file mode 100644
index 0000000..249e391
--- /dev/null
+++ b/patches/timekeeping-delay-clock-cycle-last-update.patch
@@ -0,0 +1,33 @@
+Subject: timekeeping: Delay update of clock->cycle_last
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Feb 2013 00:06:18 +0100
+
+For calculating the new timekeeper values store the new cycle_last
+value in the timekeeper and update the clock->cycle_last just when we
+actually update the new values.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/time/timekeeping.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -1095,7 +1095,7 @@ static cycle_t logarithmic_accumulation(
+
+ /* Accumulate one shifted interval */
+ offset -= interval;
+- tk->clock->cycle_last += interval;
++ tk->cycle_last += interval;
+
+ tk->xtime_nsec += tk->xtime_interval << shift;
+ accumulate_nsecs_to_secs(tk);
+@@ -1210,6 +1210,8 @@ static void update_wall_time(void)
+ */
+ accumulate_nsecs_to_secs(tk);
+
++ /* Update clock->cycle_last with the new value */
++ clock->cycle_last = tk->cycle_last;
+ timekeeping_update(tk, false);
+
+ out:
diff --git a/patches/timekeeping-do-not-calc-crap-over-and-over.patch b/patches/timekeeping-do-not-calc-crap-over-and-over.patch
new file mode 100644
index 0000000..e73e640
--- /dev/null
+++ b/patches/timekeeping-do-not-calc-crap-over-and-over.patch
@@ -0,0 +1,34 @@
+Subject: timekeeping: Calc stuff once
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Feb 2013 00:12:36 +0100
+
+Calculate the cycle interval shifted value once. No functional change,
+just makes the code more readable.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/time/timekeeping.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -1077,15 +1077,16 @@ static inline void accumulate_nsecs_to_s
+ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
+ u32 shift)
+ {
++ cycle_t interval = tk->cycle_interval << shift;
+ u64 raw_nsecs;
+
+ /* If the offset is smaller then a shifted interval, do nothing */
+- if (offset < tk->cycle_interval<<shift)
++ if (offset < interval)
+ return offset;
+
+ /* Accumulate one shifted interval */
+- offset -= tk->cycle_interval << shift;
+- tk->clock->cycle_last += tk->cycle_interval << shift;
++ offset -= interval;
++ tk->clock->cycle_last += interval;
+
+ tk->xtime_nsec += tk->xtime_interval << shift;
+ accumulate_nsecs_to_secs(tk);
diff --git a/patches/timekeeping-implement-shadow-timekeeper.patch b/patches/timekeeping-implement-shadow-timekeeper.patch
new file mode 100644
index 0000000..242aa22
--- /dev/null
+++ b/patches/timekeeping-implement-shadow-timekeeper.patch
@@ -0,0 +1,156 @@
+Subject: timekeeping: Implement a shadow timekeeper
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 15 Feb 2013 15:47:13 +0100
+
+Use the shadow timekeeper to do the update_wall_time() adjustments and
+then copy it over to the real timekeeper.
+
+Keep the shadow timekeeper in sync when updating stuff outside of
+update_wall_time().
+
+This allows us to limit the timekeeper_seq hold time to the update of
+the real timekeeper and the vsyscall data in the next patch.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/time/timekeeping.c | 41 +++++++++++++++++++++++++++++------------
+ 1 file changed, 29 insertions(+), 12 deletions(-)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -28,6 +28,7 @@
+ static struct timekeeper timekeeper;
+ static DEFINE_RAW_SPINLOCK(timekeeper_lock);
+ static seqcount_t timekeeper_seq;
++static struct timekeeper shadow_timekeeper;
+
+ /* flag for if timekeeping is suspended */
+ int __read_mostly timekeeping_suspended;
+@@ -221,7 +222,7 @@ int pvclock_gtod_unregister_notifier(str
+ EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
+
+ /* must hold timekeeper_lock */
+-static void timekeeping_update(struct timekeeper *tk, bool clearntp)
++static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
+ {
+ if (clearntp) {
+ tk->ntp_error = 0;
+@@ -229,6 +230,9 @@ static void timekeeping_update(struct ti
+ }
+ update_vsyscall(tk);
+ update_pvclock_gtod(tk);
++
++ if (mirror)
++ memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
+ }
+
+ /**
+@@ -422,7 +426,7 @@ int do_settimeofday(const struct timespe
+
+ tk_set_xtime(tk, tv);
+
+- timekeeping_update(tk, true);
++ timekeeping_update(tk, true, true);
+
+ write_seqcount_end(&timekeeper_seq);
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+@@ -466,7 +470,7 @@ int timekeeping_inject_offset(struct tim
+ tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
+
+ error: /* even if we error out, we forwarded the time, so call update */
+- timekeeping_update(tk, true);
++ timekeeping_update(tk, true, true);
+
+ write_seqcount_end(&timekeeper_seq);
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+@@ -501,7 +505,7 @@ static int change_clocksource(void *data
+ if (old->disable)
+ old->disable(old);
+ }
+- timekeeping_update(tk, true);
++ timekeeping_update(tk, true, true);
+
+ write_seqcount_end(&timekeeper_seq);
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+@@ -680,6 +684,8 @@ void __init timekeeping_init(void)
+ tmp.tv_nsec = 0;
+ tk_set_sleep_time(tk, tmp);
+
++ memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
++
+ write_seqcount_end(&timekeeper_seq);
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ }
+@@ -735,7 +741,7 @@ void timekeeping_inject_sleeptime(struct
+
+ __timekeeping_inject_sleeptime(tk, delta);
+
+- timekeeping_update(tk, true);
++ timekeeping_update(tk, true, true);
+
+ write_seqcount_end(&timekeeper_seq);
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+@@ -773,7 +779,7 @@ static void timekeeping_resume(void)
+ tk->clock->cycle_last = tk->clock->read(tk->clock);
+ tk->ntp_error = 0;
+ timekeeping_suspended = 0;
+- timekeeping_update(tk, false);
++ timekeeping_update(tk, false, true);
+ write_seqcount_end(&timekeeper_seq);
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+@@ -1152,7 +1158,8 @@ static inline void old_vsyscall_fixup(st
+ static void update_wall_time(void)
+ {
+ struct clocksource *clock;
+- struct timekeeper *tk = &timekeeper;
++ struct timekeeper *real_tk = &timekeeper;
++ struct timekeeper *tk = &shadow_timekeeper;
+ cycle_t offset;
+ int shift = 0, maxshift;
+ unsigned long flags;
+@@ -1164,16 +1171,16 @@ static void update_wall_time(void)
+ if (unlikely(timekeeping_suspended))
+ goto out;
+
+- clock = tk->clock;
++ clock = real_tk->clock;
+
+ #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+- offset = tk->cycle_interval;
++ offset = real_tk->cycle_interval;
+ #else
+ offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
+ #endif
+
+ /* Check if there's really nothing to do */
+- if (offset < tk->cycle_interval)
++ if (offset < real_tk->cycle_interval)
+ goto out;
+
+ /*
+@@ -1212,12 +1219,22 @@ static void update_wall_time(void)
+
+ /* Update clock->cycle_last with the new value */
+ clock->cycle_last = tk->cycle_last;
+- timekeeping_update(tk, false);
++ /*
++ * Update the real timekeeper.
++ *
++ * We could avoid this memcpy by switching pointers, but that
++ * requires changes to all other timekeeper usage sites as
++ * well, i.e. move the timekeeper pointer getter into the
++ * spinlocked/seqcount protected sections. And we trade this
++ * memcpy under the timekeeper_seq against one before we start
++ * updating.
++ */
++ memcpy(real_tk, tk, sizeof(*tk));
++ timekeeping_update(real_tk, false, false);
+
+ out:
+ write_seqcount_end(&timekeeper_seq);
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+-
+ }
+
+ /**
diff --git a/patches/timekeeping-make-jiffies-lock-internal.patch b/patches/timekeeping-make-jiffies-lock-internal.patch
new file mode 100644
index 0000000..77d6e76
--- /dev/null
+++ b/patches/timekeeping-make-jiffies-lock-internal.patch
@@ -0,0 +1,44 @@
+Subject: timekeeping: Make jiffies_lock internal
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Feb 2013 22:38:07 +0100
+
+Nothing outside of the timekeeping core needs that lock.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/jiffies.h | 1 -
+ kernel/time/tick-internal.h | 2 ++
+ kernel/time/timekeeping.c | 1 +
+ 3 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/linux/jiffies.h
++++ b/include/linux/jiffies.h
+@@ -75,7 +75,6 @@ extern int register_refined_jiffies(long
+ */
+ extern u64 __jiffy_data jiffies_64;
+ extern unsigned long volatile __jiffy_data jiffies;
+-extern seqlock_t jiffies_lock;
+
+ #if (BITS_PER_LONG < 64)
+ u64 get_jiffies_64(void);
+--- a/kernel/time/tick-internal.h
++++ b/kernel/time/tick-internal.h
+@@ -4,6 +4,8 @@
+ #include <linux/hrtimer.h>
+ #include <linux/tick.h>
+
++extern seqlock_t jiffies_lock;
++
+ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
+
+ #define TICK_DO_TIMER_NONE -1
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -23,6 +23,7 @@
+ #include <linux/stop_machine.h>
+ #include <linux/pvclock_gtod.h>
+
++#include "tick-internal.h"
+
+ static struct timekeeper timekeeper;
+
diff --git a/patches/timekeeping-move-lock-out-of-timekeeper.patch b/patches/timekeeping-move-lock-out-of-timekeeper.patch
new file mode 100644
index 0000000..685b899
--- /dev/null
+++ b/patches/timekeeping-move-lock-out-of-timekeeper.patch
@@ -0,0 +1,402 @@
+Subject: timekeeping: Move lock out of timekeeper struct
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 15 Feb 2013 15:05:48 +0100
+
+Make the lock a separate entity. Preparatory patch for shadow
+timekeeper structure.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/timekeeper_internal.h | 2
+ kernel/time/timekeeping.c | 96 +++++++++++++++++-------------------
+ 2 files changed, 47 insertions(+), 51 deletions(-)
+
+--- a/include/linux/timekeeper_internal.h
++++ b/include/linux/timekeeper_internal.h
+@@ -62,8 +62,6 @@ struct timekeeper {
+ ktime_t offs_boot;
+ /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
+ struct timespec raw_time;
+- /* Seqlock for all timekeeper values */
+- seqlock_t lock;
+ };
+
+ static inline struct timespec tk_xtime(struct timekeeper *tk)
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -26,6 +26,7 @@
+ #include "tick-internal.h"
+
+ static struct timekeeper timekeeper;
++static DEFINE_SEQLOCK(timekeeper_lock);
+
+ /* flag for if timekeeping is suspended */
+ int __read_mostly timekeeping_suspended;
+@@ -194,11 +195,11 @@ int pvclock_gtod_register_notifier(struc
+ unsigned long flags;
+ int ret;
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+ ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
+ /* update timekeeping data */
+ update_pvclock_gtod(tk);
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ return ret;
+ }
+@@ -212,13 +213,12 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_
+ */
+ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
+ {
+- struct timekeeper *tk = &timekeeper;
+ unsigned long flags;
+ int ret;
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+ ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ return ret;
+ }
+@@ -279,12 +279,12 @@ void getnstimeofday(struct timespec *ts)
+ WARN_ON(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+
+ ts->tv_sec = tk->xtime_sec;
+ nsecs = timekeeping_get_ns(tk);
+
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ ts->tv_nsec = 0;
+ timespec_add_ns(ts, nsecs);
+@@ -300,11 +300,11 @@ ktime_t ktime_get(void)
+ WARN_ON(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+ secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
+
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+ /*
+ * Use ktime_set/ktime_add_ns to create a proper ktime on
+ * 32-bit architectures without CONFIG_KTIME_SCALAR.
+@@ -331,12 +331,12 @@ void ktime_get_ts(struct timespec *ts)
+ WARN_ON(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+ ts->tv_sec = tk->xtime_sec;
+ nsec = timekeeping_get_ns(tk);
+ tomono = tk->wall_to_monotonic;
+
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ ts->tv_sec += tomono.tv_sec;
+ ts->tv_nsec = 0;
+@@ -364,7 +364,7 @@ void getnstime_raw_and_real(struct times
+ WARN_ON_ONCE(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+
+ *ts_raw = tk->raw_time;
+ ts_real->tv_sec = tk->xtime_sec;
+@@ -373,7 +373,7 @@ void getnstime_raw_and_real(struct times
+ nsecs_raw = timekeeping_get_ns_raw(tk);
+ nsecs_real = timekeeping_get_ns(tk);
+
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ timespec_add_ns(ts_raw, nsecs_raw);
+ timespec_add_ns(ts_real, nsecs_real);
+@@ -413,7 +413,7 @@ int do_settimeofday(const struct timespe
+ if (!timespec_valid_strict(tv))
+ return -EINVAL;
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+
+ timekeeping_forward_now(tk);
+
+@@ -427,7 +427,7 @@ int do_settimeofday(const struct timespe
+
+ timekeeping_update(tk, true);
+
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+@@ -452,7 +452,7 @@ int timekeeping_inject_offset(struct tim
+ if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+
+ timekeeping_forward_now(tk);
+
+@@ -469,7 +469,7 @@ int timekeeping_inject_offset(struct tim
+ error: /* even if we error out, we forwarded the time, so call update */
+ timekeeping_update(tk, true);
+
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+@@ -491,7 +491,7 @@ static int change_clocksource(void *data
+
+ new = (struct clocksource *) data;
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+
+ timekeeping_forward_now(tk);
+ if (!new->enable || new->enable(new) == 0) {
+@@ -502,7 +502,7 @@ static int change_clocksource(void *data
+ }
+ timekeeping_update(tk, true);
+
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ return 0;
+ }
+@@ -552,11 +552,11 @@ void getrawmonotonic(struct timespec *ts
+ s64 nsecs;
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+ nsecs = timekeeping_get_ns_raw(tk);
+ *ts = tk->raw_time;
+
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ timespec_add_ns(ts, nsecs);
+ }
+@@ -572,11 +572,11 @@ int timekeeping_valid_for_hres(void)
+ int ret;
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+
+ ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ return ret;
+ }
+@@ -591,11 +591,11 @@ u64 timekeeping_max_deferment(void)
+ u64 ret;
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+
+ ret = tk->clock->max_idle_ns;
+
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ return ret;
+ }
+@@ -656,11 +656,9 @@ void __init timekeeping_init(void)
+ boot.tv_nsec = 0;
+ }
+
+- seqlock_init(&tk->lock);
+-
+ ntp_init();
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+ clock = clocksource_default_clock();
+ if (clock->enable)
+ clock->enable(clock);
+@@ -679,7 +677,7 @@ void __init timekeeping_init(void)
+ tmp.tv_nsec = 0;
+ tk_set_sleep_time(tk, tmp);
+
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+ }
+
+ /* time in seconds when suspend began */
+@@ -726,7 +724,7 @@ void timekeeping_inject_sleeptime(struct
+ if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
+ return;
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+
+ timekeeping_forward_now(tk);
+
+@@ -734,7 +732,7 @@ void timekeeping_inject_sleeptime(struct
+
+ timekeeping_update(tk, true);
+
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+@@ -758,7 +756,7 @@ static void timekeeping_resume(void)
+ clockevents_resume();
+ clocksource_resume();
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+
+ if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
+ ts = timespec_sub(ts, timekeeping_suspend_time);
+@@ -769,7 +767,7 @@ static void timekeeping_resume(void)
+ tk->ntp_error = 0;
+ timekeeping_suspended = 0;
+ timekeeping_update(tk, false);
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ touch_softlockup_watchdog();
+
+@@ -788,7 +786,7 @@ static int timekeeping_suspend(void)
+
+ read_persistent_clock(&timekeeping_suspend_time);
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+ timekeeping_forward_now(tk);
+ timekeeping_suspended = 1;
+
+@@ -811,7 +809,7 @@ static int timekeeping_suspend(void)
+ timekeeping_suspend_time =
+ timespec_add(timekeeping_suspend_time, delta_delta);
+ }
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+ clocksource_suspend();
+@@ -1149,7 +1147,7 @@ static void update_wall_time(void)
+ int shift = 0, maxshift;
+ unsigned long flags;
+
+- write_seqlock_irqsave(&tk->lock, flags);
++ write_seqlock_irqsave(&timekeeper_lock, flags);
+
+ /* Make sure we're fully resumed: */
+ if (unlikely(timekeeping_suspended))
+@@ -1204,7 +1202,7 @@ static void update_wall_time(void)
+ timekeeping_update(tk, false);
+
+ out:
+- write_sequnlock_irqrestore(&tk->lock, flags);
++ write_sequnlock_irqrestore(&timekeeper_lock, flags);
+
+ }
+
+@@ -1252,13 +1250,13 @@ void get_monotonic_boottime(struct times
+ WARN_ON(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+ ts->tv_sec = tk->xtime_sec;
+ nsec = timekeeping_get_ns(tk);
+ tomono = tk->wall_to_monotonic;
+ sleep = tk->total_sleep_time;
+
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
+ ts->tv_nsec = 0;
+@@ -1317,10 +1315,10 @@ struct timespec current_kernel_time(void
+ unsigned long seq;
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+
+ now = tk_xtime(tk);
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ return now;
+ }
+@@ -1333,11 +1331,11 @@ struct timespec get_monotonic_coarse(voi
+ unsigned long seq;
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+
+ now = tk_xtime(tk);
+ mono = tk->wall_to_monotonic;
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
+ now.tv_nsec + mono.tv_nsec);
+@@ -1368,11 +1366,11 @@ void get_xtime_and_monotonic_and_sleep_o
+ unsigned long seq;
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+ *xtim = tk_xtime(tk);
+ *wtom = tk->wall_to_monotonic;
+ *sleep = tk->total_sleep_time;
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+ }
+
+ #ifdef CONFIG_HIGH_RES_TIMERS
+@@ -1392,14 +1390,14 @@ ktime_t ktime_get_update_offsets(ktime_t
+ u64 secs, nsecs;
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+
+ secs = tk->xtime_sec;
+ nsecs = timekeeping_get_ns(tk);
+
+ *offs_real = tk->offs_real;
+ *offs_boot = tk->offs_boot;
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ now = ktime_add_ns(ktime_set(secs, 0), nsecs);
+ now = ktime_sub(now, *offs_real);
+@@ -1417,9 +1415,9 @@ ktime_t ktime_get_monotonic_offset(void)
+ struct timespec wtom;
+
+ do {
+- seq = read_seqbegin(&tk->lock);
++ seq = read_seqbegin(&timekeeper_lock);
+ wtom = tk->wall_to_monotonic;
+- } while (read_seqretry(&tk->lock, seq));
++ } while (read_seqretry(&timekeeper_lock, seq));
+
+ return timespec_to_ktime(wtom);
+ }
diff --git a/patches/timekeeping-shorten-seq-count-region.patch b/patches/timekeeping-shorten-seq-count-region.patch
new file mode 100644
index 0000000..8d75ae5
--- /dev/null
+++ b/patches/timekeeping-shorten-seq-count-region.patch
@@ -0,0 +1,44 @@
+Subject: timekeeping: Shorten seq_count region
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 16 Feb 2013 00:39:49 +0100
+
+Shorten the seqcount write hold region to the actual update of the
+timekeeper and the related data (e.g vsyscall).
+
+On a contemporary x86 system this reduces the maximum latencies on
+Preempt-RT from 8us to 4us on the non-timekeeping cores.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/time/timekeeping.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -1165,7 +1165,6 @@ static void update_wall_time(void)
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
+- write_seqcount_begin(&timekeeper_seq);
+
+ /* Make sure we're fully resumed: */
+ if (unlikely(timekeeping_suspended))
+@@ -1217,6 +1216,7 @@ static void update_wall_time(void)
+ */
+ accumulate_nsecs_to_secs(tk);
+
++ write_seqcount_begin(&timekeeper_seq);
+ /* Update clock->cycle_last with the new value */
+ clock->cycle_last = tk->cycle_last;
+ /*
+@@ -1231,9 +1231,8 @@ static void update_wall_time(void)
+ */
+ memcpy(real_tk, tk, sizeof(*tk));
+ timekeeping_update(real_tk, false, false);
+-
+-out:
+ write_seqcount_end(&timekeeper_seq);
++out:
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ }
+
diff --git a/patches/timekeeping-split-jiffies-lock.patch b/patches/timekeeping-split-jiffies-lock.patch
new file mode 100644
index 0000000..8586d59
--- /dev/null
+++ b/patches/timekeeping-split-jiffies-lock.patch
@@ -0,0 +1,148 @@
+Subject: timekeeping-split-jiffies-lock.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 14 Feb 2013 22:36:59 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/time/jiffies.c | 7 ++++---
+ kernel/time/tick-common.c | 10 ++++++----
+ kernel/time/tick-internal.h | 3 ++-
+ kernel/time/tick-sched.c | 16 ++++++++++------
+ kernel/time/timekeeping.c | 6 ++++--
+ 5 files changed, 26 insertions(+), 16 deletions(-)
+
+--- a/kernel/time/jiffies.c
++++ b/kernel/time/jiffies.c
+@@ -67,7 +67,8 @@ static struct clocksource clocksource_ji
+ .shift = JIFFIES_SHIFT,
+ };
+
+-__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
++__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
++__cacheline_aligned_in_smp seqcount_t jiffies_seq;
+
+ #if (BITS_PER_LONG < 64)
+ u64 get_jiffies_64(void)
+@@ -76,9 +77,9 @@ u64 get_jiffies_64(void)
+ u64 ret;
+
+ do {
+- seq = read_seqbegin(&jiffies_lock);
++ seq = read_seqcount_begin(&jiffies_seq);
+ ret = jiffies_64;
+- } while (read_seqretry(&jiffies_lock, seq));
++ } while (read_seqcount_retry(&jiffies_seq, seq));
+ return ret;
+ }
+ EXPORT_SYMBOL(get_jiffies_64);
+--- a/kernel/time/tick-common.c
++++ b/kernel/time/tick-common.c
+@@ -63,13 +63,15 @@ int tick_is_oneshot_available(void)
+ static void tick_periodic(int cpu)
+ {
+ if (tick_do_timer_cpu == cpu) {
+- write_seqlock(&jiffies_lock);
++ raw_spin_lock(&jiffies_lock);
++ write_seqcount_begin(&jiffies_seq);
+
+ /* Keep track of the next tick event */
+ tick_next_period = ktime_add(tick_next_period, tick_period);
+
+ do_timer(1);
+- write_sequnlock(&jiffies_lock);
++ write_seqcount_end(&jiffies_seq);
++ raw_spin_unlock(&jiffies_lock);
+ }
+
+ update_process_times(user_mode(get_irq_regs()));
+@@ -130,9 +132,9 @@ void tick_setup_periodic(struct clock_ev
+ ktime_t next;
+
+ do {
+- seq = read_seqbegin(&jiffies_lock);
++ seq = read_seqcount_begin(&jiffies_seq);
+ next = tick_next_period;
+- } while (read_seqretry(&jiffies_lock, seq));
++ } while (read_seqcount_retry(&jiffies_seq, seq));
+
+ clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+
+--- a/kernel/time/tick-internal.h
++++ b/kernel/time/tick-internal.h
+@@ -4,7 +4,8 @@
+ #include <linux/hrtimer.h>
+ #include <linux/tick.h>
+
+-extern seqlock_t jiffies_lock;
++extern raw_spinlock_t jiffies_lock;
++extern seqcount_t jiffies_seq;
+
+ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
+
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -56,7 +56,8 @@ static void tick_do_update_jiffies64(kti
+ return;
+
+ /* Reevalute with jiffies_lock held */
+- write_seqlock(&jiffies_lock);
++ raw_spin_lock(&jiffies_lock);
++ write_seqcount_begin(&jiffies_seq);
+
+ delta = ktime_sub(now, last_jiffies_update);
+ if (delta.tv64 >= tick_period.tv64) {
+@@ -79,7 +80,8 @@ static void tick_do_update_jiffies64(kti
+ /* Keep the tick_next_period variable up to date */
+ tick_next_period = ktime_add(last_jiffies_update, tick_period);
+ }
+- write_sequnlock(&jiffies_lock);
++ write_seqcount_end(&jiffies_seq);
++ raw_spin_unlock(&jiffies_lock);
+ }
+
+ /*
+@@ -89,12 +91,14 @@ static ktime_t tick_init_jiffy_update(vo
+ {
+ ktime_t period;
+
+- write_seqlock(&jiffies_lock);
++ raw_spin_lock(&jiffies_lock);
++ write_seqcount_begin(&jiffies_seq);
+ /* Did we start the jiffies update yet ? */
+ if (last_jiffies_update.tv64 == 0)
+ last_jiffies_update = tick_next_period;
+ period = last_jiffies_update;
+- write_sequnlock(&jiffies_lock);
++ write_seqcount_end(&jiffies_seq);
++ raw_spin_unlock(&jiffies_lock);
+ return period;
+ }
+
+@@ -325,11 +329,11 @@ static ktime_t tick_nohz_stop_sched_tick
+
+ /* Read jiffies and the time when jiffies were updated last */
+ do {
+- seq = read_seqbegin(&jiffies_lock);
++ seq = read_seqcount_begin(&jiffies_seq);
+ last_update = last_jiffies_update;
+ last_jiffies = jiffies;
+ time_delta = timekeeping_max_deferment();
+- } while (read_seqretry(&jiffies_lock, seq));
++ } while (read_seqcount_retry(&jiffies_seq, seq));
+
+ if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
+ arch_needs_cpu(cpu)) {
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -1461,7 +1461,9 @@ EXPORT_SYMBOL_GPL(ktime_get_monotonic_of
+ */
+ void xtime_update(unsigned long ticks)
+ {
+- write_seqlock(&jiffies_lock);
++ raw_spin_lock(&jiffies_lock);
++ write_seqcount_begin(&jiffies_seq);
+ do_timer(ticks);
+- write_sequnlock(&jiffies_lock);
++ write_seqcount_end(&jiffies_seq);
++ raw_spin_unlock(&jiffies_lock);
+ }
diff --git a/patches/timekeeping-split-timekeeper-lock.patch b/patches/timekeeping-split-timekeeper-lock.patch
new file mode 100644
index 0000000..b9076c4
--- /dev/null
+++ b/patches/timekeeping-split-timekeeper-lock.patch
@@ -0,0 +1,427 @@
+Subject: timekeeping: Split timekeeper_lock into lock and seqcount
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 15 Feb 2013 15:03:17 +0100
+
+We want to shorten the seqcount write hold time. So split the seqlock
+into a lock and a seqcount.
+
+Open code the seqwrite_lock in the places which matter and drop the
+sequence counter update where it's pointless.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/time/timekeeping.c | 118 +++++++++++++++++++++++++---------------------
+ 1 file changed, 65 insertions(+), 53 deletions(-)
+
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -26,7 +26,8 @@
+ #include "tick-internal.h"
+
+ static struct timekeeper timekeeper;
+-static DEFINE_SEQLOCK(timekeeper_lock);
++static DEFINE_RAW_SPINLOCK(timekeeper_lock);
++static seqcount_t timekeeper_seq;
+
+ /* flag for if timekeeping is suspended */
+ int __read_mostly timekeeping_suspended;
+@@ -186,8 +187,6 @@ static void update_pvclock_gtod(struct t
+
+ /**
+ * pvclock_gtod_register_notifier - register a pvclock timedata update listener
+- *
+- * Must hold write on timekeeper.lock
+ */
+ int pvclock_gtod_register_notifier(struct notifier_block *nb)
+ {
+@@ -195,11 +194,10 @@ int pvclock_gtod_register_notifier(struc
+ unsigned long flags;
+ int ret;
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
+ ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
+- /* update timekeeping data */
+ update_pvclock_gtod(tk);
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ return ret;
+ }
+@@ -208,23 +206,21 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_
+ /**
+ * pvclock_gtod_unregister_notifier - unregister a pvclock
+ * timedata update listener
+- *
+- * Must hold write on timekeeper.lock
+ */
+ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
+ {
+ unsigned long flags;
+ int ret;
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
+ ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
+
+-/* must hold write on timekeeper.lock */
++/* must hold timekeeper_lock */
+ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
+ {
+ if (clearntp) {
+@@ -279,12 +275,12 @@ void getnstimeofday(struct timespec *ts)
+ WARN_ON(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+
+ ts->tv_sec = tk->xtime_sec;
+ nsecs = timekeeping_get_ns(tk);
+
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ ts->tv_nsec = 0;
+ timespec_add_ns(ts, nsecs);
+@@ -300,11 +296,11 @@ ktime_t ktime_get(void)
+ WARN_ON(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+ secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
+
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+ /*
+ * Use ktime_set/ktime_add_ns to create a proper ktime on
+ * 32-bit architectures without CONFIG_KTIME_SCALAR.
+@@ -331,12 +327,12 @@ void ktime_get_ts(struct timespec *ts)
+ WARN_ON(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+ ts->tv_sec = tk->xtime_sec;
+ nsec = timekeeping_get_ns(tk);
+ tomono = tk->wall_to_monotonic;
+
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ ts->tv_sec += tomono.tv_sec;
+ ts->tv_nsec = 0;
+@@ -364,7 +360,7 @@ void getnstime_raw_and_real(struct times
+ WARN_ON_ONCE(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+
+ *ts_raw = tk->raw_time;
+ ts_real->tv_sec = tk->xtime_sec;
+@@ -373,7 +369,7 @@ void getnstime_raw_and_real(struct times
+ nsecs_raw = timekeeping_get_ns_raw(tk);
+ nsecs_real = timekeeping_get_ns(tk);
+
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ timespec_add_ns(ts_raw, nsecs_raw);
+ timespec_add_ns(ts_real, nsecs_real);
+@@ -413,7 +409,8 @@ int do_settimeofday(const struct timespe
+ if (!timespec_valid_strict(tv))
+ return -EINVAL;
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
++ write_seqcount_begin(&timekeeper_seq);
+
+ timekeeping_forward_now(tk);
+
+@@ -427,7 +424,8 @@ int do_settimeofday(const struct timespe
+
+ timekeeping_update(tk, true);
+
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ write_seqcount_end(&timekeeper_seq);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+@@ -452,7 +450,8 @@ int timekeeping_inject_offset(struct tim
+ if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
++ write_seqcount_begin(&timekeeper_seq);
+
+ timekeeping_forward_now(tk);
+
+@@ -469,7 +468,8 @@ int timekeeping_inject_offset(struct tim
+ error: /* even if we error out, we forwarded the time, so call update */
+ timekeeping_update(tk, true);
+
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ write_seqcount_end(&timekeeper_seq);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+@@ -491,7 +491,8 @@ static int change_clocksource(void *data
+
+ new = (struct clocksource *) data;
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
++ write_seqcount_begin(&timekeeper_seq);
+
+ timekeeping_forward_now(tk);
+ if (!new->enable || new->enable(new) == 0) {
+@@ -502,7 +503,8 @@ static int change_clocksource(void *data
+ }
+ timekeeping_update(tk, true);
+
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ write_seqcount_end(&timekeeper_seq);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ return 0;
+ }
+@@ -552,11 +554,11 @@ void getrawmonotonic(struct timespec *ts
+ s64 nsecs;
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+ nsecs = timekeeping_get_ns_raw(tk);
+ *ts = tk->raw_time;
+
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ timespec_add_ns(ts, nsecs);
+ }
+@@ -572,11 +574,11 @@ int timekeeping_valid_for_hres(void)
+ int ret;
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+
+ ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ return ret;
+ }
+@@ -591,11 +593,11 @@ u64 timekeeping_max_deferment(void)
+ u64 ret;
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+
+ ret = tk->clock->max_idle_ns;
+
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ return ret;
+ }
+@@ -658,7 +660,8 @@ void __init timekeeping_init(void)
+
+ ntp_init();
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
++ write_seqcount_begin(&timekeeper_seq);
+ clock = clocksource_default_clock();
+ if (clock->enable)
+ clock->enable(clock);
+@@ -677,7 +680,8 @@ void __init timekeeping_init(void)
+ tmp.tv_nsec = 0;
+ tk_set_sleep_time(tk, tmp);
+
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ write_seqcount_end(&timekeeper_seq);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ }
+
+ /* time in seconds when suspend began */
+@@ -724,7 +728,8 @@ void timekeeping_inject_sleeptime(struct
+ if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
+ return;
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
++ write_seqcount_begin(&timekeeper_seq);
+
+ timekeeping_forward_now(tk);
+
+@@ -732,7 +737,8 @@ void timekeeping_inject_sleeptime(struct
+
+ timekeeping_update(tk, true);
+
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ write_seqcount_end(&timekeeper_seq);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+@@ -756,7 +762,8 @@ static void timekeeping_resume(void)
+ clockevents_resume();
+ clocksource_resume();
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
++ write_seqcount_begin(&timekeeper_seq);
+
+ if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
+ ts = timespec_sub(ts, timekeeping_suspend_time);
+@@ -767,7 +774,8 @@ static void timekeeping_resume(void)
+ tk->ntp_error = 0;
+ timekeeping_suspended = 0;
+ timekeeping_update(tk, false);
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ write_seqcount_end(&timekeeper_seq);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ touch_softlockup_watchdog();
+
+@@ -786,7 +794,8 @@ static int timekeeping_suspend(void)
+
+ read_persistent_clock(&timekeeping_suspend_time);
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
++ write_seqcount_begin(&timekeeper_seq);
+ timekeeping_forward_now(tk);
+ timekeeping_suspended = 1;
+
+@@ -809,7 +818,8 @@ static int timekeeping_suspend(void)
+ timekeeping_suspend_time =
+ timespec_add(timekeeping_suspend_time, delta_delta);
+ }
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ write_seqcount_end(&timekeeper_seq);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+ clocksource_suspend();
+@@ -1147,7 +1157,8 @@ static void update_wall_time(void)
+ int shift = 0, maxshift;
+ unsigned long flags;
+
+- write_seqlock_irqsave(&timekeeper_lock, flags);
++ raw_spin_lock_irqsave(&timekeeper_lock, flags);
++ write_seqcount_begin(&timekeeper_seq);
+
+ /* Make sure we're fully resumed: */
+ if (unlikely(timekeeping_suspended))
+@@ -1202,7 +1213,8 @@ static void update_wall_time(void)
+ timekeeping_update(tk, false);
+
+ out:
+- write_sequnlock_irqrestore(&timekeeper_lock, flags);
++ write_seqcount_end(&timekeeper_seq);
++ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+ }
+
+@@ -1250,13 +1262,13 @@ void get_monotonic_boottime(struct times
+ WARN_ON(timekeeping_suspended);
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+ ts->tv_sec = tk->xtime_sec;
+ nsec = timekeeping_get_ns(tk);
+ tomono = tk->wall_to_monotonic;
+ sleep = tk->total_sleep_time;
+
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
+ ts->tv_nsec = 0;
+@@ -1315,10 +1327,10 @@ struct timespec current_kernel_time(void
+ unsigned long seq;
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+
+ now = tk_xtime(tk);
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ return now;
+ }
+@@ -1331,11 +1343,11 @@ struct timespec get_monotonic_coarse(voi
+ unsigned long seq;
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+
+ now = tk_xtime(tk);
+ mono = tk->wall_to_monotonic;
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
+ now.tv_nsec + mono.tv_nsec);
+@@ -1366,11 +1378,11 @@ void get_xtime_and_monotonic_and_sleep_o
+ unsigned long seq;
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+ *xtim = tk_xtime(tk);
+ *wtom = tk->wall_to_monotonic;
+ *sleep = tk->total_sleep_time;
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+ }
+
+ #ifdef CONFIG_HIGH_RES_TIMERS
+@@ -1390,14 +1402,14 @@ ktime_t ktime_get_update_offsets(ktime_t
+ u64 secs, nsecs;
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+
+ secs = tk->xtime_sec;
+ nsecs = timekeeping_get_ns(tk);
+
+ *offs_real = tk->offs_real;
+ *offs_boot = tk->offs_boot;
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ now = ktime_add_ns(ktime_set(secs, 0), nsecs);
+ now = ktime_sub(now, *offs_real);
+@@ -1415,9 +1427,9 @@ ktime_t ktime_get_monotonic_offset(void)
+ struct timespec wtom;
+
+ do {
+- seq = read_seqbegin(&timekeeper_lock);
++ seq = read_seqcount_begin(&timekeeper_seq);
+ wtom = tk->wall_to_monotonic;
+- } while (read_seqretry(&timekeeper_lock, seq));
++ } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+ return timespec_to_ktime(wtom);
+ }
diff --git a/patches/timekeeping-store-cycle-last-in-timekeeper.patch b/patches/timekeeping-store-cycle-last-in-timekeeper.patch
new file mode 100644
index 0000000..c69aadb
--- /dev/null
+++ b/patches/timekeeping-store-cycle-last-in-timekeeper.patch
@@ -0,0 +1,47 @@
+Subject: timekeeping: Store cycle_last value in timekeeper struct as well
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 15 Feb 2013 17:15:49 +0100
+
+For implementing a shadow timekeeper and a split calculation/update
+region we need to store the cycle_last value in the timekeeper and
+update the value in the clocksource struct only in the update region.
+
+Add the extra storage to the timekeeper.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/timekeeper_internal.h | 2 ++
+ kernel/time/timekeeping.c | 4 ++--
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/include/linux/timekeeper_internal.h
++++ b/include/linux/timekeeper_internal.h
+@@ -20,6 +20,8 @@ struct timekeeper {
+ u32 shift;
+ /* Number of clock cycles in one NTP interval. */
+ cycle_t cycle_interval;
++ /* Last cycle value (also stored in clock->cycle_last) */
++ cycle_t cycle_last;
+ /* Number of clock shifted nano seconds in one NTP interval. */
+ u64 xtime_interval;
+ /* shifted nano seconds left over when rounding cycle_interval */
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -96,7 +96,7 @@ static void tk_setup_internals(struct ti
+
+ old_clock = tk->clock;
+ tk->clock = clock;
+- clock->cycle_last = clock->read(clock);
++ tk->cycle_last = clock->cycle_last = clock->read(clock);
+
+ /* Do the ns -> cycle conversion first, using original mult */
+ tmp = NTP_INTERVAL_LENGTH;
+@@ -247,7 +247,7 @@ static void timekeeping_forward_now(stru
+ clock = tk->clock;
+ cycle_now = clock->read(clock);
+ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+- clock->cycle_last = cycle_now;
++ tk->cycle_last = clock->cycle_last = cycle_now;
+
+ tk->xtime_nsec += cycle_delta * tk->mult;
+
diff --git a/patches/timer-delay-waking-softirqs-from-the-jiffy-tick.patch b/patches/timer-delay-waking-softirqs-from-the-jiffy-tick.patch
new file mode 100644
index 0000000..9eb2d06
--- /dev/null
+++ b/patches/timer-delay-waking-softirqs-from-the-jiffy-tick.patch
@@ -0,0 +1,75 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 21 Aug 2009 11:56:45 +0200
+Subject: timer: delay waking softirqs from the jiffy tick
+
+People were complaining about broken balancing with the recent -rt
+series.
+
+A look at /proc/sched_debug yielded:
+
+cpu#0, 2393.874 MHz
+ .nr_running : 0
+ .load : 0
+ .cpu_load[0] : 177522
+ .cpu_load[1] : 177522
+ .cpu_load[2] : 177522
+ .cpu_load[3] : 177522
+ .cpu_load[4] : 177522
+cpu#1, 2393.874 MHz
+ .nr_running : 4
+ .load : 4096
+ .cpu_load[0] : 181618
+ .cpu_load[1] : 180850
+ .cpu_load[2] : 180274
+ .cpu_load[3] : 179938
+ .cpu_load[4] : 179758
+
+Which indicated the cpu_load computation was hosed, the 177522 value
+indicates that there is one RT task runnable. Initially I thought the
+old problem of calculating the cpu_load from a softirq had re-surfaced,
+however looking at the code shows its being done from scheduler_tick().
+
+[ we really should fix this RT/cfs interaction some day... ]
+
+A few trace_printk()s later:
+
+ sirq-timer/1-19 [001] 174.289744: 19: 50:S ==> [001] 0:140:R <idle>
+ <idle>-0 [001] 174.290724: enqueue_task_rt: adding task: 19/sirq-timer/1 with load: 177522
+ <idle>-0 [001] 174.290725: 0:140:R + [001] 19: 50:S sirq-timer/1
+ <idle>-0 [001] 174.290730: scheduler_tick: current load: 177522
+ <idle>-0 [001] 174.290732: scheduler_tick: current: 0/swapper
+ <idle>-0 [001] 174.290736: 0:140:R ==> [001] 19: 50:R sirq-timer/1
+ sirq-timer/1-19 [001] 174.290741: dequeue_task_rt: removing task: 19/sirq-timer/1 with load: 177522
+ sirq-timer/1-19 [001] 174.290743: 19: 50:S ==> [001] 0:140:R <idle>
+
+We see that we always raise the timer softirq before doing the load
+calculation. Avoid this by re-ordering the scheduler_tick() call in
+update_process_times() to occur before we deal with timers.
+
+This lowers the load back to sanity and restores regular load-balancing
+behaviour.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/timer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -1387,13 +1387,13 @@ void update_process_times(int user_tick)
+
+ /* Note: this timer irq context must be accounted for as well. */
+ account_process_tick(p, user_tick);
++ scheduler_tick();
+ run_local_timers();
+ rcu_check_callbacks(cpu, user_tick);
+ #ifdef CONFIG_IRQ_WORK
+ if (in_irq())
+ irq_work_run();
+ #endif
+- scheduler_tick();
+ run_posix_cpu_timers(p);
+ }
+
diff --git a/patches/timer-fd-avoid-live-lock.patch b/patches/timer-fd-avoid-live-lock.patch
new file mode 100644
index 0000000..699bfff
--- /dev/null
+++ b/patches/timer-fd-avoid-live-lock.patch
@@ -0,0 +1,27 @@
+Subject: timer-fd: Prevent live lock
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 25 Jan 2012 11:08:40 +0100
+
+If hrtimer_try_to_cancel() requires a retry, then depending on the
+priority setting te retry loop might prevent timer callback completion
+on RT. Prevent that by waiting for completion on RT, no change for a
+non RT kernel.
+
+Reported-by: Sankara Muthukrishnan <sankara.m@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable-rt@vger.kernel.org
+---
+ fs/timerfd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/timerfd.c
++++ b/fs/timerfd.c
+@@ -311,7 +311,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, uf
+ if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
+ break;
+ spin_unlock_irq(&ctx->wqh.lock);
+- cpu_relax();
++ hrtimer_wait_for_timer(&ctx->tmr);
+ }
+
+ /*
diff --git a/patches/timer-handle-idle-trylock-in-get-next-timer-irq.patch b/patches/timer-handle-idle-trylock-in-get-next-timer-irq.patch
new file mode 100644
index 0000000..729696c
--- /dev/null
+++ b/patches/timer-handle-idle-trylock-in-get-next-timer-irq.patch
@@ -0,0 +1,78 @@
+Subject: timer-handle-idle-trylock-in-get-next-timer-irq.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 22:08:38 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/spinlock_rt.h | 12 +++++++++++-
+ kernel/rtmutex.c | 7 +------
+ kernel/timer.c | 9 +++++++--
+ 3 files changed, 19 insertions(+), 9 deletions(-)
+
+--- a/include/linux/spinlock_rt.h
++++ b/include/linux/spinlock_rt.h
+@@ -53,7 +53,17 @@ extern void __lockfunc __rt_spin_unlock(
+
+ #define spin_lock_irq(lock) spin_lock(lock)
+
+-#define spin_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
++#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
++
++#define spin_trylock(lock) \
++({ \
++ int __locked; \
++ migrate_disable(); \
++ __locked = spin_do_trylock(lock); \
++ if (!__locked) \
++ migrate_enable(); \
++ __locked; \
++})
+
+ #ifdef CONFIG_LOCKDEP
+ # define spin_lock_nested(lock, subclass) \
+--- a/kernel/rtmutex.c
++++ b/kernel/rtmutex.c
+@@ -861,15 +861,10 @@ EXPORT_SYMBOL(rt_spin_unlock_wait);
+
+ int __lockfunc rt_spin_trylock(spinlock_t *lock)
+ {
+- int ret;
++ int ret = rt_mutex_trylock(&lock->lock);
+
+- migrate_disable();
+- ret = rt_mutex_trylock(&lock->lock);
+ if (ret)
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+- else
+- migrate_enable();
+-
+ return ret;
+ }
+ EXPORT_SYMBOL(rt_spin_trylock);
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -1380,9 +1380,10 @@ unsigned long get_next_timer_interrupt(u
+ /*
+ * On PREEMPT_RT we cannot sleep here. If the trylock does not
+ * succeed then we return the worst-case 'expires in 1 tick'
+- * value:
++ * value. We use the rt functions here directly to avoid a
++ * migrate_disable() call.
+ */
+- if (!spin_trylock(&base->lock))
++ if (!spin_do_trylock(&base->lock))
+ return now + 1;
+ #else
+ spin_lock(&base->lock);
+@@ -1392,7 +1393,11 @@ unsigned long get_next_timer_interrupt(u
+ base->next_timer = __next_timer_interrupt(base);
+ expires = base->next_timer;
+ }
++#ifdef CONFIG_PREEMPT_RT_FULL
++ rt_spin_unlock(&base->lock);
++#else
+ spin_unlock(&base->lock);
++#endif
+
+ if (time_before_eq(expires, now))
+ return now;
diff --git a/patches/timers-avoid-the-base-null-otptimization-on-rt.patch b/patches/timers-avoid-the-base-null-otptimization-on-rt.patch
new file mode 100644
index 0000000..aa2ff13
--- /dev/null
+++ b/patches/timers-avoid-the-base-null-otptimization-on-rt.patch
@@ -0,0 +1,68 @@
+Subject: timers: Avoid the switch timers base set to NULL trick on RT
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 21 Jul 2011 15:23:39 +0200
+
+On RT that code is preemptible, so we cannot assign NULL to timers
+base as a preempter would spin forever in lock_timer_base().
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/timer.c | 40 ++++++++++++++++++++++++++++++++--------
+ 1 file changed, 32 insertions(+), 8 deletions(-)
+
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -717,6 +717,36 @@ static struct tvec_base *lock_timer_base
+ }
+ }
+
++#ifndef CONFIG_PREEMPT_RT_FULL
++static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
++ struct tvec_base *old,
++ struct tvec_base *new)
++{
++ /* See the comment in lock_timer_base() */
++ timer_set_base(timer, NULL);
++ spin_unlock(&old->lock);
++ spin_lock(&new->lock);
++ timer_set_base(timer, new);
++ return new;
++}
++#else
++static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
++ struct tvec_base *old,
++ struct tvec_base *new)
++{
++ /*
++ * We cannot do the above because we might be preempted and
++ * then the preempter would see NULL and loop forever.
++ */
++ if (spin_trylock(&new->lock)) {
++ timer_set_base(timer, new);
++ spin_unlock(&old->lock);
++ return new;
++ }
++ return old;
++}
++#endif
++
+ static inline int
+ __mod_timer(struct timer_list *timer, unsigned long expires,
+ bool pending_only, int pinned)
+@@ -755,14 +785,8 @@ __mod_timer(struct timer_list *timer, un
+ * handler yet has not finished. This also guarantees that
+ * the timer is serialized wrt itself.
+ */
+- if (likely(base->running_timer != timer)) {
+- /* See the comment in lock_timer_base() */
+- timer_set_base(timer, NULL);
+- spin_unlock(&base->lock);
+- base = new_base;
+- spin_lock(&base->lock);
+- timer_set_base(timer, base);
+- }
++ if (likely(base->running_timer != timer))
++ base = switch_timer_base(timer, base, new_base);
+ }
+
+ timer->expires = expires;
diff --git a/patches/timers-mov-printk_tick-to-soft-interrupt.patch b/patches/timers-mov-printk_tick-to-soft-interrupt.patch
new file mode 100644
index 0000000..79c194a
--- /dev/null
+++ b/patches/timers-mov-printk_tick-to-soft-interrupt.patch
@@ -0,0 +1,29 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:30 -0500
+Subject: timers: mov printk_tick to soft interrupt
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+---
+ kernel/timer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -1389,7 +1389,6 @@ void update_process_times(int user_tick)
+ account_process_tick(p, user_tick);
+ run_local_timers();
+ rcu_check_callbacks(cpu, user_tick);
+- printk_tick();
+ #ifdef CONFIG_IRQ_WORK
+ if (in_irq())
+ irq_work_run();
+@@ -1405,6 +1404,7 @@ static void run_timer_softirq(struct sof
+ {
+ struct tvec_base *base = __this_cpu_read(tvec_bases);
+
++ printk_tick();
+ hrtimer_run_pending();
+
+ if (time_after_eq(jiffies, base->timer_jiffies))
diff --git a/patches/timers-preempt-rt-support.patch b/patches/timers-preempt-rt-support.patch
new file mode 100644
index 0000000..436f8b3
--- /dev/null
+++ b/patches/timers-preempt-rt-support.patch
@@ -0,0 +1,56 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:30:20 -0500
+Subject: timers: preempt-rt support
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/timer.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -1352,7 +1352,17 @@ unsigned long get_next_timer_interrupt(u
+ if (cpu_is_offline(smp_processor_id()))
+ return expires;
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++ /*
++ * On PREEMPT_RT we cannot sleep here. If the trylock does not
++ * succeed then we return the worst-case 'expires in 1 tick'
++ * value:
++ */
++ if (!spin_trylock(&base->lock))
++ return now + 1;
++#else
+ spin_lock(&base->lock);
++#endif
+ if (base->active_timers) {
+ if (time_before_eq(base->next_timer, base->timer_jiffies))
+ base->next_timer = __next_timer_interrupt(base);
+@@ -1362,7 +1372,6 @@ unsigned long get_next_timer_interrupt(u
+
+ if (time_before_eq(expires, now))
+ return now;
+-
+ return cmp_next_hrtimer_event(now, expires);
+ }
+ #endif
+@@ -1752,7 +1761,7 @@ static void __cpuinit migrate_timers(int
+
+ BUG_ON(cpu_online(cpu));
+ old_base = per_cpu(tvec_bases, cpu);
+- new_base = get_cpu_var(tvec_bases);
++ new_base = get_local_var(tvec_bases);
+ /*
+ * The caller is globally serialized and nobody else
+ * takes two locks at once, deadlock is not possible.
+@@ -1773,7 +1782,7 @@ static void __cpuinit migrate_timers(int
+
+ spin_unlock(&old_base->lock);
+ spin_unlock_irq(&new_base->lock);
+- put_cpu_var(tvec_bases);
++ put_local_var(tvec_bases);
+ }
+ #endif /* CONFIG_HOTPLUG_CPU */
+
diff --git a/patches/timers-prepare-for-full-preemption.patch b/patches/timers-prepare-for-full-preemption.patch
new file mode 100644
index 0000000..859159a
--- /dev/null
+++ b/patches/timers-prepare-for-full-preemption.patch
@@ -0,0 +1,128 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:34 -0500
+Subject: timers: prepare for full preemption
+
+When softirqs can be preempted we need to make sure that cancelling
+the timer from the active thread can not deadlock vs. a running timer
+callback. Add a waitqueue to resolve that.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/timer.h | 2 +-
+ kernel/timer.c | 36 +++++++++++++++++++++++++++++++++---
+ 2 files changed, 34 insertions(+), 4 deletions(-)
+
+--- a/include/linux/timer.h
++++ b/include/linux/timer.h
+@@ -241,7 +241,7 @@ extern void add_timer(struct timer_list
+
+ extern int try_to_del_timer_sync(struct timer_list *timer);
+
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
+ extern int del_timer_sync(struct timer_list *timer);
+ #else
+ # define del_timer_sync(t) del_timer(t)
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -76,6 +76,7 @@ struct tvec_root {
+ struct tvec_base {
+ spinlock_t lock;
+ struct timer_list *running_timer;
++ wait_queue_head_t wait_for_running_timer;
+ unsigned long timer_jiffies;
+ unsigned long next_timer;
+ unsigned long active_timers;
+@@ -735,12 +736,15 @@ __mod_timer(struct timer_list *timer, un
+
+ debug_activate(timer, expires);
+
++ preempt_disable_rt();
+ cpu = smp_processor_id();
+
+ #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
+ cpu = get_nohz_timer_target();
+ #endif
++ preempt_enable_rt();
++
+ new_base = per_cpu(tvec_bases, cpu);
+
+ if (base != new_base) {
+@@ -941,6 +945,29 @@ void add_timer_on(struct timer_list *tim
+ }
+ EXPORT_SYMBOL_GPL(add_timer_on);
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++/*
++ * Wait for a running timer
++ */
++static void wait_for_running_timer(struct timer_list *timer)
++{
++ struct tvec_base *base = timer->base;
++
++ if (base->running_timer == timer)
++ wait_event(base->wait_for_running_timer,
++ base->running_timer != timer);
++}
++
++# define wakeup_timer_waiters(b) wake_up(&(b)->wait_for_tunning_timer)
++#else
++static inline void wait_for_running_timer(struct timer_list *timer)
++{
++ cpu_relax();
++}
++
++# define wakeup_timer_waiters(b) do { } while (0)
++#endif
++
+ /**
+ * del_timer - deactive a timer.
+ * @timer: the timer to be deactivated
+@@ -998,7 +1025,7 @@ int try_to_del_timer_sync(struct timer_l
+ }
+ EXPORT_SYMBOL(try_to_del_timer_sync);
+
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
+ /**
+ * del_timer_sync - deactivate a timer and wait for the handler to finish.
+ * @timer: the timer to be deactivated
+@@ -1058,7 +1085,7 @@ int del_timer_sync(struct timer_list *ti
+ int ret = try_to_del_timer_sync(timer);
+ if (ret >= 0)
+ return ret;
+- cpu_relax();
++ wait_for_running_timer(timer);
+ }
+ }
+ EXPORT_SYMBOL(del_timer_sync);
+@@ -1175,15 +1202,17 @@ static inline void __run_timers(struct t
+ if (irqsafe) {
+ spin_unlock(&base->lock);
+ call_timer_fn(timer, fn, data);
++ base->running_timer = NULL;
+ spin_lock(&base->lock);
+ } else {
+ spin_unlock_irq(&base->lock);
+ call_timer_fn(timer, fn, data);
++ base->running_timer = NULL;
+ spin_lock_irq(&base->lock);
+ }
+ }
+ }
+- base->running_timer = NULL;
++ wake_up(&base->wait_for_running_timer);
+ spin_unlock_irq(&base->lock);
+ }
+
+@@ -1684,6 +1713,7 @@ static int __cpuinit init_timers_cpu(int
+ }
+
+ spin_lock_init(&base->lock);
++ init_waitqueue_head(&base->wait_for_running_timer);
+
+ for (j = 0; j < TVN_SIZE; j++) {
+ INIT_LIST_HEAD(base->tv5.vec + j);
diff --git a/patches/tracing-account-for-preempt-off-in-preempt_schedule.patch b/patches/tracing-account-for-preempt-off-in-preempt_schedule.patch
new file mode 100644
index 0000000..df79a43
--- /dev/null
+++ b/patches/tracing-account-for-preempt-off-in-preempt_schedule.patch
@@ -0,0 +1,46 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Thu, 29 Sep 2011 12:24:30 -0500
+Subject: tracing: Account for preempt off in preempt_schedule()
+
+The preempt_schedule() uses the preempt_disable_notrace() version
+because it can cause infinite recursion by the function tracer as
+the function tracer uses preempt_enable_notrace() which may call
+back into the preempt_schedule() code as the NEED_RESCHED is still
+set and the PREEMPT_ACTIVE has not been set yet.
+
+See commit: d1f74e20b5b064a130cd0743a256c2d3cfe84010 that made this
+change.
+
+The preemptoff and preemptirqsoff latency tracers require the first
+and last preempt count modifiers to enable tracing. But this skips
+the checks. Since we can not convert them back to the non notrace
+version, we can use the idle() hooks for the latency tracers here.
+That is, the start/stop_critical_timings() works well to manually
+start and stop the latency tracer for preempt off timings.
+
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Clark Williams <williams@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/sched/core.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3071,7 +3071,16 @@ asmlinkage void __sched notrace preempt_
+
+ do {
+ add_preempt_count_notrace(PREEMPT_ACTIVE);
++ /*
++ * The add/subtract must not be traced by the function
++ * tracer. But we still want to account for the
++ * preempt off latency tracer. Since the _notrace versions
++ * of add/subtract skip the accounting for latency tracer
++ * we must force it manually.
++ */
++ start_critical_timings();
+ __schedule();
++ stop_critical_timings();
+ sub_preempt_count_notrace(PREEMPT_ACTIVE);
+
+ /*
diff --git a/patches/treercu-use-simple-waitqueue.patch b/patches/treercu-use-simple-waitqueue.patch
new file mode 100644
index 0000000..e89eb39
--- /dev/null
+++ b/patches/treercu-use-simple-waitqueue.patch
@@ -0,0 +1,73 @@
+---
+ kernel/rcutree.c | 13 +++++++------
+ kernel/rcutree.h | 2 +-
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/kernel/rcutree.c
++++ b/kernel/rcutree.c
+@@ -1319,7 +1319,7 @@ static int __noreturn rcu_gp_kthread(voi
+
+ /* Handle grace-period start. */
+ for (;;) {
+- wait_event_interruptible(rsp->gp_wq,
++ swait_event_interruptible(rsp->gp_wq,
+ rsp->gp_flags &
+ RCU_GP_FLAG_INIT);
+ if ((rsp->gp_flags & RCU_GP_FLAG_INIT) &&
+@@ -1338,7 +1338,7 @@ static int __noreturn rcu_gp_kthread(voi
+ }
+ for (;;) {
+ rsp->jiffies_force_qs = jiffies + j;
+- ret = wait_event_interruptible_timeout(rsp->gp_wq,
++ ret = swait_event_interruptible_timeout(rsp->gp_wq,
+ (rsp->gp_flags & RCU_GP_FLAG_FQS) ||
+ (!ACCESS_ONCE(rnp->qsmask) &&
+ !rcu_preempt_blocked_readers_cgp(rnp)),
+@@ -1423,7 +1423,7 @@ rcu_start_gp(struct rcu_state *rsp, unsi
+ local_irq_restore(flags);
+
+ /* Wake up rcu_gp_kthread() to start the grace period. */
+- wake_up(&rsp->gp_wq);
++ swait_wake(&rsp->gp_wq);
+ }
+
+ /*
+@@ -1438,7 +1438,7 @@ static void rcu_report_qs_rsp(struct rcu
+ {
+ WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
+ raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+- wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
++ swait_wake(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
+ }
+
+ /*
+@@ -2003,7 +2003,8 @@ static void force_quiescent_state(struct
+ }
+ rsp->gp_flags |= RCU_GP_FLAG_FQS;
+ raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
+- wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
++ /* Memory barrier implied by wake_up() path. */
++ swait_wake(&rsp->gp_wq);
+ }
+
+ /*
+@@ -2999,7 +3000,7 @@ static void __init rcu_init_one(struct r
+ }
+
+ rsp->rda = rda;
+- init_waitqueue_head(&rsp->gp_wq);
++ init_swait_head(&rsp->gp_wq);
+ rnp = rsp->level[rcu_num_lvls - 1];
+ for_each_possible_cpu(i) {
+ while (i > rnp->grphi)
+--- a/kernel/rcutree.h
++++ b/kernel/rcutree.h
+@@ -397,7 +397,7 @@ struct rcu_state {
+ unsigned long gpnum; /* Current gp number. */
+ unsigned long completed; /* # of last completed gp. */
+ struct task_struct *gp_kthread; /* Task for grace periods. */
+- wait_queue_head_t gp_wq; /* Where GP task waits. */
++ struct swait_head gp_wq; /* Where GP task waits. */
+ int gp_flags; /* Commands for GP task. */
+
+ /* End of fields guarded by root rcu_node's lock. */
diff --git a/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch b/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch
new file mode 100644
index 0000000..01e1f46
--- /dev/null
+++ b/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch
@@ -0,0 +1,65 @@
+Subject: [UPSTREAM]net,RT:REmove preemption disabling in netif_rx()
+From: Priyanka Jain <Priyanka.Jain@freescale.com>
+Date: Thu, 17 May 2012 09:35:11 +0530
+
+1)enqueue_to_backlog() (called from netif_rx) should be
+ bind to a particluar CPU. This can be achieved by
+ disabling migration. No need to disable preemption
+
+2)Fixes crash "BUG: scheduling while atomic: ksoftirqd"
+ in case of RT.
+ If preemption is disabled, enqueue_to_backog() is called
+ in atomic context. And if backlog exceeds its count,
+ kfree_skb() is called. But in RT, kfree_skb() might
+ gets scheduled out, so it expects non atomic context.
+
+3)When CONFIG_PREEMPT_RT_FULL is not defined,
+ migrate_enable(), migrate_disable() maps to
+ preempt_enable() and preempt_disable(), so no
+ change in functionality in case of non-RT.
+
+-Replace preempt_enable(), preempt_disable() with
+ migrate_enable(), migrate_disable() respectively
+-Replace get_cpu(), put_cpu() with get_cpu_light(),
+ put_cpu_light() respectively
+
+Signed-off-by: Priyanka Jain <Priyanka.Jain@freescale.com>
+Acked-by: Rajan Srivastava <Rajan.Srivastava@freescale.com>
+Cc: <rostedt@goodmis.orgn>
+Link: http://lkml.kernel.org/r/1337227511-2271-1-git-send-email-Priyanka.Jain@freescale.com
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ Testing: Tested successfully on p4080ds(8-core SMP system)
+
+ net/core/dev.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3092,7 +3092,7 @@ int netif_rx(struct sk_buff *skb)
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
+ int cpu;
+
+- preempt_disable();
++ migrate_disable();
+ rcu_read_lock();
+
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
+@@ -3102,13 +3102,13 @@ int netif_rx(struct sk_buff *skb)
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
+ rcu_read_unlock();
+- preempt_enable();
++ migrate_enable();
+ } else
+ #endif
+ {
+ unsigned int qtail;
+- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+- put_cpu();
++ ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
++ put_cpu_light();
+ }
+ return ret;
+ }
diff --git a/patches/usb-fix-mouse-problem-copying-large-data.patch b/patches/usb-fix-mouse-problem-copying-large-data.patch
new file mode 100644
index 0000000..781d134
--- /dev/null
+++ b/patches/usb-fix-mouse-problem-copying-large-data.patch
@@ -0,0 +1,36 @@
+From: Wu Zhangjin <wuzj@lemote.com>
+Date: Mon, 4 Jan 2010 11:33:02 +0800
+Subject: USB: Fix the mouse problem when copying large amounts of data
+
+When copying large amounts of data between the USB storage devices and
+the hard disk, the USB mouse will not work, this patch fixes it.
+
+[NOTE: This problem have been found in the Loongson family machines, not
+sure whether it is producible on other platforms]
+
+Signed-off-by: Hu Hongbing <huhb@lemote.com>
+Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
+
+---
+ drivers/usb/host/ohci-hcd.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/usb/host/ohci-hcd.c
++++ b/drivers/usb/host/ohci-hcd.c
+@@ -857,9 +857,13 @@ static irqreturn_t ohci_irq (struct usb_
+ }
+
+ if (ints & OHCI_INTR_WDH) {
+- spin_lock (&ohci->lock);
+- dl_done_list (ohci);
+- spin_unlock (&ohci->lock);
++ if (ohci->hcca->done_head == 0) {
++ ints &= ~OHCI_INTR_WDH;
++ } else {
++ spin_lock (&ohci->lock);
++ dl_done_list (ohci);
++ spin_unlock (&ohci->lock);
++ }
+ }
+
+ if (quirk_zfmicro(ohci) && (ints & OHCI_INTR_SF)) {
diff --git a/patches/usb-hcd-use-local-irq-nort.patch b/patches/usb-hcd-use-local-irq-nort.patch
new file mode 100644
index 0000000..fc190ca
--- /dev/null
+++ b/patches/usb-hcd-use-local-irq-nort.patch
@@ -0,0 +1,34 @@
+From: Steven Rostedt <srostedt@redhat.com>
+Date: Fri, 3 Jul 2009 08:44:26 -0500
+Subject: usb: Use local_irq_*_nort() variants
+
+[ tglx: Now that irqf_disabled is dead we should kill that ]
+
+Signed-off-by: Steven Rostedt <srostedt@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ drivers/usb/core/hcd.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/usb/core/hcd.c
++++ b/drivers/usb/core/hcd.c
+@@ -2217,7 +2217,7 @@ irqreturn_t usb_hcd_irq (int irq, void *
+ * when the first handler doesn't use it. So let's just
+ * assume it's never used.
+ */
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+
+ if (unlikely(HCD_DEAD(hcd) || !HCD_HW_ACCESSIBLE(hcd)))
+ rc = IRQ_NONE;
+@@ -2226,7 +2226,7 @@ irqreturn_t usb_hcd_irq (int irq, void *
+ else
+ rc = IRQ_HANDLED;
+
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ return rc;
+ }
+ EXPORT_SYMBOL_GPL(usb_hcd_irq);
diff --git a/patches/user-use-local-irq-nort.patch b/patches/user-use-local-irq-nort.patch
new file mode 100644
index 0000000..3674e06
--- /dev/null
+++ b/patches/user-use-local-irq-nort.patch
@@ -0,0 +1,29 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 21 Jul 2009 23:06:05 +0200
+Subject: core: Do not disable interrupts on RT in kernel/users.c
+
+Use the local_irq_*_nort variants to reduce latencies in RT. The code
+is serialized by the locks. No need to disable interrupts.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ kernel/user.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/user.c
++++ b/kernel/user.c
+@@ -157,11 +157,11 @@ void free_uid(struct user_struct *up)
+ if (!up)
+ return;
+
+- local_irq_save(flags);
++ local_irq_save_nort(flags);
+ if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+ free_user(up, flags);
+ else
+- local_irq_restore(flags);
++ local_irq_restore_nort(flags);
+ }
+
+ struct user_struct *alloc_uid(kuid_t uid)
diff --git a/patches/wait-simple-implementation.patch b/patches/wait-simple-implementation.patch
new file mode 100644
index 0000000..e47c729
--- /dev/null
+++ b/patches/wait-simple-implementation.patch
@@ -0,0 +1,337 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon Dec 12 12:29:04 2011 +0100
+Subject: wait-simple: Simple waitqueue implementation
+
+wait_queue is a swiss army knife and in most of the cases the
+complexity is not needed. For RT waitqueues are a constant source of
+trouble as we can't convert the head lock to a raw spinlock due to
+fancy and long lasting callbacks.
+
+Provide a slim version, which allows RT to replace wait queues. This
+should go mainline as well, as it lowers memory consumption and
+runtime overhead.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ include/linux/wait-simple.h | 231 ++++++++++++++++++++++++++++++++++++++++++++
+ kernel/Makefile | 2
+ kernel/wait-simple.c | 68 ++++++++++++
+ 3 files changed, 300 insertions(+), 1 deletion(-)
+
+--- /dev/null
++++ b/include/linux/wait-simple.h
+@@ -0,0 +1,231 @@
++#ifndef _LINUX_WAIT_SIMPLE_H
++#define _LINUX_WAIT_SIMPLE_H
++
++#include <linux/spinlock.h>
++#include <linux/list.h>
++
++#include <asm/current.h>
++
++struct swaiter {
++ struct task_struct *task;
++ struct list_head node;
++};
++
++#define DEFINE_SWAITER(name) \
++ struct swaiter name = { \
++ .task = current, \
++ .node = LIST_HEAD_INIT((name).node), \
++ }
++
++struct swait_head {
++ raw_spinlock_t lock;
++ struct list_head list;
++};
++
++#define DEFINE_SWAIT_HEAD(name) \
++ struct swait_head name = { \
++ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
++ .list = LIST_HEAD_INIT((name).list), \
++ }
++
++extern void __init_swait_head(struct swait_head *h, struct lock_class_key *key);
++
++#define init_swait_head(swh) \
++ do { \
++ static struct lock_class_key __key; \
++ \
++ __init_swait_head((swh), &__key); \
++ } while (0)
++
++/*
++ * Waiter functions
++ */
++static inline bool swaiter_enqueued(struct swaiter *w)
++{
++ return w->task != NULL;
++}
++
++extern void swait_prepare(struct swait_head *head, struct swaiter *w, int state);
++extern void swait_finish(struct swait_head *head, struct swaiter *w);
++
++/*
++ * Adds w to head->list. Must be called with head->lock locked.
++ */
++static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
++{
++ list_add(&w->node, &head->list);
++}
++
++/*
++ * Removes w from head->list. Must be called with head->lock locked.
++ */
++static inline void __swait_dequeue(struct swaiter *w)
++{
++ list_del_init(&w->node);
++}
++
++/*
++ * Check whether a head has waiters enqueued
++ */
++static inline bool swait_head_has_waiters(struct swait_head *h)
++{
++ return !list_empty(&h->list);
++}
++
++/*
++ * Wakeup functions
++ */
++extern int __swait_wake(struct swait_head *head, unsigned int state);
++
++static inline int swait_wake(struct swait_head *head)
++{
++ return swait_head_has_waiters(head) ?
++ __swait_wake(head, TASK_NORMAL) : 0;
++}
++
++static inline int swait_wake_interruptible(struct swait_head *head)
++{
++ return swait_head_has_waiters(head) ?
++ __swait_wake(head, TASK_INTERRUPTIBLE) : 0;
++}
++
++/*
++ * Event API
++ */
++
++#define __swait_event(wq, condition) \
++do { \
++ DEFINE_SWAITER(__wait); \
++ \
++ for (;;) { \
++ swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
++ if (condition) \
++ break; \
++ schedule(); \
++ } \
++ swait_finish(&wq, &__wait); \
++} while (0)
++
++/**
++ * swait_event - sleep until a condition gets true
++ * @wq: the waitqueue to wait on
++ * @condition: a C expression for the event to wait for
++ *
++ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
++ * @condition evaluates to true. The @condition is checked each time
++ * the waitqueue @wq is woken up.
++ *
++ * wake_up() has to be called after changing any variable that could
++ * change the result of the wait condition.
++ */
++#define swait_event(wq, condition) \
++do { \
++ if (condition) \
++ break; \
++ __swait_event(wq, condition); \
++} while (0)
++
++#define __swait_event_interruptible(wq, condition, ret) \
++do { \
++ DEFINE_SWAITER(__wait); \
++ \
++ for (;;) { \
++ swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
++ if (condition) \
++ break; \
++ if (signal_pending(current)) { \
++ ret = -ERESTARTSYS; \
++ break; \
++ } \
++ schedule(); \
++ } \
++ swait_finish(&wq, &__wait); \
++} while (0)
++
++#define __swait_event_interruptible_timeout(wq, condition, ret) \
++do { \
++ DEFINE_SWAITER(__wait); \
++ \
++ for (;;) { \
++ swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
++ if (condition) \
++ break; \
++ if (signal_pending(current)) { \
++ ret = -ERESTARTSYS; \
++ break; \
++ } \
++ ret = schedule_timeout(ret); \
++ if (!ret) \
++ break; \
++ } \
++ swait_finish(&wq, &__wait); \
++} while (0)
++
++/**
++ * swait_event_interruptible - sleep until a condition gets true
++ * @wq: the waitqueue to wait on
++ * @condition: a C expression for the event to wait for
++ *
++ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
++ * @condition evaluates to true. The @condition is checked each time
++ * the waitqueue @wq is woken up.
++ *
++ * wake_up() has to be called after changing any variable that could
++ * change the result of the wait condition.
++ */
++#define swait_event_interruptible(wq, condition) \
++({ \
++ int __ret = 0; \
++ if (!(condition)) \
++ __swait_event_interruptible(wq, condition, __ret); \
++ __ret; \
++})
++
++#define swait_event_interruptible_timeout(wq, condition, timeout) \
++({ \
++ int __ret = timeout; \
++ if (!(condition)) \
++ __swait_event_interruptible_timeout(wq, condition, __ret); \
++ __ret; \
++})
++
++#define __swait_event_timeout(wq, condition, ret) \
++do { \
++ DEFINE_SWAITER(__wait); \
++ \
++ for (;;) { \
++ swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
++ if (condition) \
++ break; \
++ ret = schedule_timeout(ret); \
++ if (!ret) \
++ break; \
++ } \
++ swait_finish(&wq, &__wait); \
++} while (0)
++
++/**
++ * swait_event_timeout - sleep until a condition gets true or a timeout elapses
++ * @wq: the waitqueue to wait on
++ * @condition: a C expression for the event to wait for
++ * @timeout: timeout, in jiffies
++ *
++ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
++ * @condition evaluates to true. The @condition is checked each time
++ * the waitqueue @wq is woken up.
++ *
++ * wake_up() has to be called after changing any variable that could
++ * change the result of the wait condition.
++ *
++ * The function returns 0 if the @timeout elapsed, and the remaining
++ * jiffies if the condition evaluated to true before the timeout elapsed.
++ */
++#define swait_event_timeout(wq, condition, timeout) \
++({ \
++ long __ret = timeout; \
++ if (!(condition)) \
++ __swait_event_timeout(wq, condition, __ret); \
++ __ret; \
++})
++
++#endif
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o
+ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o \
+ hrtimer.o nsproxy.o srcu.o semaphore.o \
+ notifier.o ksysfs.o cred.o \
+- async.o range.o groups.o lglock.o smpboot.o
++ async.o range.o groups.o lglock.o smpboot.o wait-simple.o
+
+ ifdef CONFIG_FUNCTION_TRACER
+ # Do not trace debug files and internal ftrace files
+--- /dev/null
++++ b/kernel/wait-simple.c
+@@ -0,0 +1,68 @@
++/*
++ * Simple waitqueues without fancy flags and callbacks
++ *
++ * (C) 2011 Thomas Gleixner <tglx@linutronix.de>
++ *
++ * Based on kernel/wait.c
++ *
++ * For licencing details see kernel-base/COPYING
++ */
++#include <linux/init.h>
++#include <linux/export.h>
++#include <linux/sched.h>
++#include <linux/wait-simple.h>
++
++void __init_swait_head(struct swait_head *head, struct lock_class_key *key)
++{
++ raw_spin_lock_init(&head->lock);
++ lockdep_set_class(&head->lock, key);
++ INIT_LIST_HEAD(&head->list);
++}
++EXPORT_SYMBOL(__init_swait_head);
++
++void swait_prepare(struct swait_head *head, struct swaiter *w, int state)
++{
++ unsigned long flags;
++
++ raw_spin_lock_irqsave(&head->lock, flags);
++ w->task = current;
++ if (list_empty(&w->node))
++ __swait_enqueue(head, w);
++ set_current_state(state);
++ raw_spin_unlock_irqrestore(&head->lock, flags);
++}
++EXPORT_SYMBOL(swait_prepare);
++
++void swait_finish(struct swait_head *head, struct swaiter *w)
++{
++ unsigned long flags;
++
++ __set_current_state(TASK_RUNNING);
++ if (w->task) {
++ raw_spin_lock_irqsave(&head->lock, flags);
++ __swait_dequeue(w);
++ raw_spin_unlock_irqrestore(&head->lock, flags);
++ }
++}
++EXPORT_SYMBOL(swait_finish);
++
++int __swait_wake(struct swait_head *head, unsigned int state)
++{
++ struct swaiter *curr, *next;
++ unsigned long flags;
++ int woken = 0;
++
++ raw_spin_lock_irqsave(&head->lock, flags);
++
++ list_for_each_entry_safe(curr, next, &head->list, node) {
++ if (wake_up_state(curr->task, state)) {
++ __swait_dequeue(curr);
++ curr->task = NULL;
++ woken++;
++ }
++ }
++
++ raw_spin_unlock_irqrestore(&head->lock, flags);
++ return woken;
++}
++EXPORT_SYMBOL(__swait_wake);
diff --git a/patches/wait-simple-rework-for-completions.patch b/patches/wait-simple-rework-for-completions.patch
new file mode 100644
index 0000000..ecf8adb
--- /dev/null
+++ b/patches/wait-simple-rework-for-completions.patch
@@ -0,0 +1,209 @@
+Subject: wait-simple: Rework for use with completions
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 10 Jan 2013 11:47:35 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/wait-simple.h | 56 +++++++----------------------------
+ kernel/wait-simple.c | 69 ++++++++++++++++++++++++++++++++++++++------
+ 2 files changed, 72 insertions(+), 53 deletions(-)
+
+--- a/include/linux/wait-simple.h
++++ b/include/linux/wait-simple.h
+@@ -22,12 +22,14 @@ struct swait_head {
+ struct list_head list;
+ };
+
+-#define DEFINE_SWAIT_HEAD(name) \
+- struct swait_head name = { \
++#define SWAIT_HEAD_INITIALIZER(name) { \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+ .list = LIST_HEAD_INIT((name).list), \
+ }
+
++#define DEFINE_SWAIT_HEAD(name) \
++ struct swait_head name = SWAIT_HEAD_INITIALIZER(name)
++
+ extern void __init_swait_head(struct swait_head *h, struct lock_class_key *key);
+
+ #define init_swait_head(swh) \
+@@ -40,59 +42,25 @@ extern void __init_swait_head(struct swa
+ /*
+ * Waiter functions
+ */
+-static inline bool swaiter_enqueued(struct swaiter *w)
+-{
+- return w->task != NULL;
+-}
+-
++extern void swait_prepare_locked(struct swait_head *head, struct swaiter *w);
+ extern void swait_prepare(struct swait_head *head, struct swaiter *w, int state);
++extern void swait_finish_locked(struct swait_head *head, struct swaiter *w);
+ extern void swait_finish(struct swait_head *head, struct swaiter *w);
+
+ /*
+- * Adds w to head->list. Must be called with head->lock locked.
+- */
+-static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
+-{
+- list_add(&w->node, &head->list);
+-}
+-
+-/*
+- * Removes w from head->list. Must be called with head->lock locked.
+- */
+-static inline void __swait_dequeue(struct swaiter *w)
+-{
+- list_del_init(&w->node);
+-}
+-
+-/*
+- * Check whether a head has waiters enqueued
+- */
+-static inline bool swait_head_has_waiters(struct swait_head *h)
+-{
+- return !list_empty(&h->list);
+-}
+-
+-/*
+ * Wakeup functions
+ */
+-extern int __swait_wake(struct swait_head *head, unsigned int state);
++extern unsigned int __swait_wake(struct swait_head *head, unsigned int state, unsigned int num);
++extern unsigned int __swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num);
+
+-static inline int swait_wake(struct swait_head *head)
+-{
+- return swait_head_has_waiters(head) ?
+- __swait_wake(head, TASK_NORMAL) : 0;
+-}
+-
+-static inline int swait_wake_interruptible(struct swait_head *head)
+-{
+- return swait_head_has_waiters(head) ?
+- __swait_wake(head, TASK_INTERRUPTIBLE) : 0;
+-}
++#define swait_wake(head) __swait_wake(head, TASK_NORMAL, 1)
++#define swait_wake_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 1)
++#define swait_wake_all(head) __swait_wake(head, TASK_NORMAL, 0)
++#define swait_wake_all_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 0)
+
+ /*
+ * Event API
+ */
+-
+ #define __swait_event(wq, condition) \
+ do { \
+ DEFINE_SWAITER(__wait); \
+--- a/kernel/wait-simple.c
++++ b/kernel/wait-simple.c
+@@ -12,6 +12,24 @@
+ #include <linux/sched.h>
+ #include <linux/wait-simple.h>
+
++/* Adds w to head->list. Must be called with head->lock locked. */
++static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
++{
++ list_add(&w->node, &head->list);
++}
++
++/* Removes w from head->list. Must be called with head->lock locked. */
++static inline void __swait_dequeue(struct swaiter *w)
++{
++ list_del_init(&w->node);
++}
++
++/* Check whether a head has waiters enqueued */
++static inline bool swait_head_has_waiters(struct swait_head *h)
++{
++ return !list_empty(&h->list);
++}
++
+ void __init_swait_head(struct swait_head *head, struct lock_class_key *key)
+ {
+ raw_spin_lock_init(&head->lock);
+@@ -20,19 +38,31 @@ void __init_swait_head(struct swait_head
+ }
+ EXPORT_SYMBOL(__init_swait_head);
+
++void swait_prepare_locked(struct swait_head *head, struct swaiter *w)
++{
++ w->task = current;
++ if (list_empty(&w->node))
++ __swait_enqueue(head, w);
++}
++
+ void swait_prepare(struct swait_head *head, struct swaiter *w, int state)
+ {
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&head->lock, flags);
+- w->task = current;
+- if (list_empty(&w->node))
+- __swait_enqueue(head, w);
+- set_current_state(state);
++ swait_prepare_locked(head, w);
++ __set_current_state(state);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
+ }
+ EXPORT_SYMBOL(swait_prepare);
+
++void swait_finish_locked(struct swait_head *head, struct swaiter *w)
++{
++ __set_current_state(TASK_RUNNING);
++ if (w->task)
++ __swait_dequeue(w);
++}
++
+ void swait_finish(struct swait_head *head, struct swaiter *w)
+ {
+ unsigned long flags;
+@@ -46,22 +76,43 @@ void swait_finish(struct swait_head *hea
+ }
+ EXPORT_SYMBOL(swait_finish);
+
+-int __swait_wake(struct swait_head *head, unsigned int state)
++unsigned int
++__swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num)
+ {
+ struct swaiter *curr, *next;
+- unsigned long flags;
+ int woken = 0;
+
+- raw_spin_lock_irqsave(&head->lock, flags);
+-
+ list_for_each_entry_safe(curr, next, &head->list, node) {
+ if (wake_up_state(curr->task, state)) {
+ __swait_dequeue(curr);
++ /*
++ * The waiting task can free the waiter as
++ * soon as curr->task = NULL is written,
++ * without taking any locks. A memory barrier
++ * is required here to prevent the following
++ * store to curr->task from getting ahead of
++ * the dequeue operation.
++ */
++ smp_wmb();
+ curr->task = NULL;
+- woken++;
++ if (++woken == num)
++ break;
+ }
+ }
++ return woken;
++}
++
++unsigned int
++__swait_wake(struct swait_head *head, unsigned int state, unsigned int num)
++{
++ unsigned long flags;
++ int woken;
+
++ if (!swait_head_has_waiters(head))
++ return 0;
++
++ raw_spin_lock_irqsave(&head->lock, flags);
++ woken = __swait_wake_locked(head, state, num);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
+ return woken;
+ }
diff --git a/patches/workqueue-use-locallock.patch b/patches/workqueue-use-locallock.patch
new file mode 100644
index 0000000..a2c611b
--- /dev/null
+++ b/patches/workqueue-use-locallock.patch
@@ -0,0 +1,130 @@
+Subject: Use local irq lock instead of irq disable regions
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 17 Jul 2011 21:42:26 +0200
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/workqueue.c | 27 +++++++++++++++------------
+ 1 file changed, 15 insertions(+), 12 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -41,6 +41,7 @@
+ #include <linux/debug_locks.h>
+ #include <linux/lockdep.h>
+ #include <linux/idr.h>
++#include <linux/locallock.h>
+
+ #include "workqueue_sched.h"
+
+@@ -278,6 +279,8 @@ EXPORT_SYMBOL_GPL(system_unbound_wq);
+ struct workqueue_struct *system_freezable_wq __read_mostly;
+ EXPORT_SYMBOL_GPL(system_freezable_wq);
+
++static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
++
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/workqueue.h>
+
+@@ -1092,7 +1095,7 @@ static int try_to_grab_pending(struct wo
+ {
+ struct global_cwq *gcwq;
+
+- local_irq_save(*flags);
++ local_lock_irqsave(pendingb_lock, *flags);
+
+ /* try to steal the timer if it exists */
+ if (is_dwork) {
+@@ -1151,7 +1154,7 @@ static int try_to_grab_pending(struct wo
+ }
+ spin_unlock(&gcwq->lock);
+ fail:
+- local_irq_restore(*flags);
++ local_unlock_irqrestore(pendingb_lock, *flags);
+ if (work_is_canceling(work))
+ return -ENOENT;
+ cpu_relax();
+@@ -1246,7 +1249,7 @@ static void __queue_work(unsigned int cp
+ * queued or lose PENDING. Grabbing PENDING and queueing should
+ * happen with IRQ disabled.
+ */
+- WARN_ON_ONCE(!irqs_disabled());
++ WARN_ON_ONCE_NONRT(!irqs_disabled());
+
+ debug_work_activate(work);
+
+@@ -1336,14 +1339,14 @@ bool queue_work_on(int cpu, struct workq
+ bool ret = false;
+ unsigned long flags;
+
+- local_irq_save(flags);
++ local_lock_irqsave(pendingb_lock,flags);
+
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+ __queue_work(cpu, wq, work);
+ ret = true;
+ }
+
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pendingb_lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(queue_work_on);
+@@ -1451,14 +1454,14 @@ bool queue_delayed_work_on(int cpu, stru
+ unsigned long flags;
+
+ /* read the comment in __queue_work() */
+- local_irq_save(flags);
++ local_lock_irqsave(pendingb_lock, flags);
+
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+ __queue_delayed_work(cpu, wq, dwork, delay);
+ ret = true;
+ }
+
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pendingb_lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+@@ -1508,7 +1511,7 @@ bool mod_delayed_work_on(int cpu, struct
+
+ if (likely(ret >= 0)) {
+ __queue_delayed_work(cpu, wq, dwork, delay);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pendingb_lock, flags);
+ }
+
+ /* -ENOENT from try_to_grab_pending() becomes %true */
+@@ -2936,7 +2939,7 @@ static bool __cancel_work_timer(struct w
+
+ /* tell other tasks trying to grab @work to back off */
+ mark_work_canceling(work);
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pendingb_lock, flags);
+
+ flush_work(work);
+ clear_work_data(work);
+@@ -2981,11 +2984,11 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
+ */
+ bool flush_delayed_work(struct delayed_work *dwork)
+ {
+- local_irq_disable();
++ local_lock_irq(pendingb_lock);
+ if (del_timer_sync(&dwork->timer))
+ __queue_work(dwork->cpu,
+ get_work_cwq(&dwork->work)->wq, &dwork->work);
+- local_irq_enable();
++ local_unlock_irq(pendingb_lock);
+ return flush_work(&dwork->work);
+ }
+ EXPORT_SYMBOL(flush_delayed_work);
+@@ -3015,7 +3018,7 @@ bool cancel_delayed_work(struct delayed_
+ return false;
+
+ set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
+- local_irq_restore(flags);
++ local_unlock_irqrestore(pendingb_lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL(cancel_delayed_work);
diff --git a/patches/x86-crypto-reduce-preempt-disabled-regions.patch b/patches/x86-crypto-reduce-preempt-disabled-regions.patch
new file mode 100644
index 0000000..f433dcd
--- /dev/null
+++ b/patches/x86-crypto-reduce-preempt-disabled-regions.patch
@@ -0,0 +1,112 @@
+Subject: x86: crypto: Reduce preempt disabled regions
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 14 Nov 2011 18:19:27 +0100
+
+Restrict the preempt disabled regions to the actual floating point
+operations and enable preemption for the administrative actions.
+
+This is necessary on RT to avoid that kfree and other operations are
+called with preemption disabled.
+
+Reported-and-tested-by: Carsten Emde <cbe@osadl.org>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/crypto/aesni-intel_glue.c | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/crypto/aesni-intel_glue.c
++++ b/arch/x86/crypto/aesni-intel_glue.c
+@@ -250,14 +250,14 @@ static int ecb_encrypt(struct blkcipher_
+ err = blkcipher_walk_virt(desc, &walk);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+- kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
++ kernel_fpu_begin();
+ aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+- nbytes & AES_BLOCK_MASK);
++ nbytes & AES_BLOCK_MASK);
++ kernel_fpu_end();
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+- kernel_fpu_end();
+
+ return err;
+ }
+@@ -274,14 +274,14 @@ static int ecb_decrypt(struct blkcipher_
+ err = blkcipher_walk_virt(desc, &walk);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+- kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
++ kernel_fpu_begin();
+ aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK);
++ kernel_fpu_end();
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+- kernel_fpu_end();
+
+ return err;
+ }
+@@ -298,14 +298,14 @@ static int cbc_encrypt(struct blkcipher_
+ err = blkcipher_walk_virt(desc, &walk);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+- kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
++ kernel_fpu_begin();
+ aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK, walk.iv);
++ kernel_fpu_end();
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+- kernel_fpu_end();
+
+ return err;
+ }
+@@ -322,14 +322,14 @@ static int cbc_decrypt(struct blkcipher_
+ err = blkcipher_walk_virt(desc, &walk);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+- kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
++ kernel_fpu_begin();
+ aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK, walk.iv);
++ kernel_fpu_end();
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+- kernel_fpu_end();
+
+ return err;
+ }
+@@ -362,18 +362,20 @@ static int ctr_crypt(struct blkcipher_de
+ err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+- kernel_fpu_begin();
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
++ kernel_fpu_begin();
+ aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK, walk.iv);
++ kernel_fpu_end();
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ if (walk.nbytes) {
++ kernel_fpu_begin();
+ ctr_crypt_final(ctx, &walk);
++ kernel_fpu_end();
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+- kernel_fpu_end();
+
+ return err;
+ }
diff --git a/patches/x86-disable-debug-stack.patch b/patches/x86-disable-debug-stack.patch
new file mode 100644
index 0000000..5c138b3
--- /dev/null
+++ b/patches/x86-disable-debug-stack.patch
@@ -0,0 +1,102 @@
+From: Andi Kleen <ak@suse.de>
+Date: Fri, 3 Jul 2009 08:44:10 -0500
+Subject: x86: Disable IST stacks for debug/int 3/stack fault for PREEMPT_RT
+
+Normally the x86-64 trap handlers for debug/int 3/stack fault run
+on a special interrupt stack to make them more robust
+when dealing with kernel code.
+
+The PREEMPT_RT kernel can sleep in locks even while allocating
+GFP_ATOMIC memory. When one of these trap handlers needs to send
+real time signals for ptrace it allocates memory and could then
+try to to schedule. But it is not allowed to schedule on a
+IST stack. This can cause warnings and hangs.
+
+This patch disables the IST stacks for these handlers for PREEMPT_RT
+kernel. Instead let them run on the normal process stack.
+
+The kernel only really needs the ISTs here to make kernel debuggers more
+robust in case someone sets a break point somewhere where the stack is
+invalid. But there are no kernel debuggers in the standard kernel
+that do this.
+
+It also means kprobes cannot be set in situations with invalid stack;
+but that sounds like a reasonable restriction.
+
+The stack fault change could minimally impact oops quality, but not very
+much because stack faults are fairly rare.
+
+A better solution would be to use similar logic as the NMI "paranoid"
+path: check if signal is for user space, if yes go back to entry.S, switch stack,
+call sync_regs, then do the signal sending etc.
+
+But this patch is much simpler and should work too with minimal impact.
+
+Signed-off-by: Andi Kleen <ak@suse.de>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/include/asm/page_64_types.h | 21 +++++++++++++++------
+ arch/x86/kernel/cpu/common.c | 2 ++
+ arch/x86/kernel/dumpstack_64.c | 4 ++++
+ 3 files changed, 21 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/page_64_types.h
++++ b/arch/x86/include/asm/page_64_types.h
+@@ -14,12 +14,21 @@
+ #define IRQ_STACK_ORDER 2
+ #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
+
+-#define STACKFAULT_STACK 1
+-#define DOUBLEFAULT_STACK 2
+-#define NMI_STACK 3
+-#define DEBUG_STACK 4
+-#define MCE_STACK 5
+-#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
++#ifdef CONFIG_PREEMPT_RT_FULL
++# define STACKFAULT_STACK 0
++# define DOUBLEFAULT_STACK 1
++# define NMI_STACK 2
++# define DEBUG_STACK 0
++# define MCE_STACK 3
++# define N_EXCEPTION_STACKS 3 /* hw limit: 7 */
++#else
++# define STACKFAULT_STACK 1
++# define DOUBLEFAULT_STACK 2
++# define NMI_STACK 3
++# define DEBUG_STACK 4
++# define MCE_STACK 5
++# define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
++#endif
+
+ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
+ #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1103,7 +1103,9 @@ DEFINE_PER_CPU(struct task_struct *, fpu
+ */
+ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
++#if DEBUG_STACK > 0
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
++#endif
+ };
+
+ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+--- a/arch/x86/kernel/dumpstack_64.c
++++ b/arch/x86/kernel/dumpstack_64.c
+@@ -21,10 +21,14 @@
+ (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
+
+ static char x86_stack_ids[][8] = {
++#if DEBUG_STACK > 0
+ [ DEBUG_STACK-1 ] = "#DB",
++#endif
+ [ NMI_STACK-1 ] = "NMI",
+ [ DOUBLEFAULT_STACK-1 ] = "#DF",
++#if STACKFAULT_STACK > 0
+ [ STACKFAULT_STACK-1 ] = "#SS",
++#endif
+ [ MCE_STACK-1 ] = "#MC",
+ #if DEBUG_STKSZ > EXCEPTION_STKSZ
+ [ N_EXCEPTION_STACKS ...
diff --git a/patches/x86-hpet-disable-msi-on-lenovo-w510.patch b/patches/x86-hpet-disable-msi-on-lenovo-w510.patch
new file mode 100644
index 0000000..646779e
--- /dev/null
+++ b/patches/x86-hpet-disable-msi-on-lenovo-w510.patch
@@ -0,0 +1,64 @@
+Subject: x86: hpet: Disable MSI on Lenovo W510
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 30 Sep 2011 20:03:37 +0200
+
+MSI based per cpu timers lose interrupts when intel_idle() is enabled
+- independent of the c-state. With idle=poll the problem cannot be
+observed. We have no idea yet, whether this is a W510 specific issue
+or a general chipset oddity. Blacklist the known problem machine.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/kernel/hpet.c | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -8,6 +8,7 @@
+ #include <linux/slab.h>
+ #include <linux/hpet.h>
+ #include <linux/init.h>
++#include <linux/dmi.h>
+ #include <linux/cpu.h>
+ #include <linux/pm.h>
+ #include <linux/io.h>
+@@ -573,6 +574,30 @@ static void init_one_hpet_msi_clockevent
+ #define RESERVE_TIMERS 0
+ #endif
+
++static int __init dmi_disable_hpet_msi(const struct dmi_system_id *d)
++{
++ hpet_msi_disable = 1;
++ return 0;
++}
++
++static struct dmi_system_id __initdata dmi_hpet_table[] = {
++ /*
++ * MSI based per cpu timers lose interrupts when intel_idle()
++ * is enabled - independent of the c-state. With idle=poll the
++ * problem cannot be observed. We have no idea yet, whether
++ * this is a W510 specific issue or a general chipset oddity.
++ */
++ {
++ .callback = dmi_disable_hpet_msi,
++ .ident = "Lenovo W510",
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W510"),
++ },
++ },
++ {}
++};
++
+ static void hpet_msi_capability_lookup(unsigned int start_timer)
+ {
+ unsigned int id;
+@@ -580,6 +605,8 @@ static void hpet_msi_capability_lookup(u
+ unsigned int num_timers_used = 0;
+ int i;
+
++ dmi_check_system(dmi_hpet_table);
++
+ if (hpet_msi_disable)
+ return;
+
diff --git a/patches/x86-io-apic-migra-no-unmask.patch b/patches/x86-io-apic-migra-no-unmask.patch
new file mode 100644
index 0000000..1ea2c69
--- /dev/null
+++ b/patches/x86-io-apic-migra-no-unmask.patch
@@ -0,0 +1,26 @@
+From: Ingo Molnar <mingo@elte.hu>
+Date: Fri, 3 Jul 2009 08:29:27 -0500
+Subject: x86: Do not unmask io_apic when interrupt is in progress
+
+With threaded interrupts we might see an interrupt in progress on
+migration. Do not unmask it when this is the case.
+
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/x86/kernel/apic/io_apic.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -2428,7 +2428,8 @@ static bool io_apic_level_ack_pending(st
+ static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+ {
+ /* If we are moving the irq we need to mask it */
+- if (unlikely(irqd_is_setaffinity_pending(data))) {
++ if (unlikely(irqd_is_setaffinity_pending(data) &&
++ !irqd_irq_inprogress(data))) {
+ mask_ioapic(cfg);
+ return true;
+ }
diff --git a/patches/x86-kvm-require-const-tsc-for-rt.patch b/patches/x86-kvm-require-const-tsc-for-rt.patch
new file mode 100644
index 0000000..d74a1cc
--- /dev/null
+++ b/patches/x86-kvm-require-const-tsc-for-rt.patch
@@ -0,0 +1,25 @@
+Subject: x86-kvm-require-const-tsc-for-rt.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 06 Nov 2011 12:26:18 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/kvm/x86.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5242,6 +5242,13 @@ int kvm_arch_init(void *opaque)
+ goto out;
+ }
+
++#ifdef CONFIG_PREEMPT_RT_FULL
++ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
++ printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
++ return -EOPNOTSUPP;
++ }
++#endif
++
+ r = kvm_mmu_module_init();
+ if (r)
+ goto out_free_percpu;
diff --git a/patches/x86-mce-timer-hrtimer.patch b/patches/x86-mce-timer-hrtimer.patch
new file mode 100644
index 0000000..1270030
--- /dev/null
+++ b/patches/x86-mce-timer-hrtimer.patch
@@ -0,0 +1,176 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 13 Dec 2010 16:33:39 +0100
+Subject: x86: Convert mce timer to hrtimer
+
+mce_timer is started in atomic contexts of cpu bringup. This results
+in might_sleep() warnings on RT. Convert mce_timer to a hrtimer to
+avoid this.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/x86/kernel/cpu/mcheck/mce.c | 57 ++++++++++++++++++++++-----------------
+ 1 file changed, 33 insertions(+), 24 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -41,6 +41,7 @@
+ #include <linux/debugfs.h>
+ #include <linux/irq_work.h>
+ #include <linux/export.h>
++#include <linux/jiffies.h>
+
+ #include <asm/processor.h>
+ #include <asm/mce.h>
+@@ -1259,7 +1260,7 @@ void mce_log_therm_throt_event(__u64 sta
+ static unsigned long check_interval = 5 * 60; /* 5 minutes */
+
+ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
+-static DEFINE_PER_CPU(struct timer_list, mce_timer);
++static DEFINE_PER_CPU(struct hrtimer, mce_timer);
+
+ static unsigned long mce_adjust_timer_default(unsigned long interval)
+ {
+@@ -1269,13 +1270,10 @@ static unsigned long mce_adjust_timer_de
+ static unsigned long (*mce_adjust_timer)(unsigned long interval) =
+ mce_adjust_timer_default;
+
+-static void mce_timer_fn(unsigned long data)
++static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
+ {
+- struct timer_list *t = &__get_cpu_var(mce_timer);
+ unsigned long iv;
+
+- WARN_ON(smp_processor_id() != data);
+-
+ if (mce_available(__this_cpu_ptr(&cpu_info))) {
+ machine_check_poll(MCP_TIMESTAMP,
+ &__get_cpu_var(mce_poll_banks));
+@@ -1296,9 +1294,10 @@ static void mce_timer_fn(unsigned long d
+ __this_cpu_write(mce_next_interval, iv);
+ /* Might have become 0 after CMCI storm subsided */
+ if (iv) {
+- t->expires = jiffies + iv;
+- add_timer_on(t, smp_processor_id());
++ hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_usecs(iv)));
++ return HRTIMER_RESTART;
+ }
++ return HRTIMER_NORESTART;
+ }
+
+ /*
+@@ -1306,28 +1305,37 @@ static void mce_timer_fn(unsigned long d
+ */
+ void mce_timer_kick(unsigned long interval)
+ {
+- struct timer_list *t = &__get_cpu_var(mce_timer);
+- unsigned long when = jiffies + interval;
++ struct hrtimer *t = &__get_cpu_var(mce_timer);
+ unsigned long iv = __this_cpu_read(mce_next_interval);
+
+- if (timer_pending(t)) {
+- if (time_before(when, t->expires))
+- mod_timer_pinned(t, when);
++ if (hrtimer_active(t)) {
++ s64 exp;
++ s64 intv_us;
++
++ intv_us = jiffies_to_usecs(interval);
++ exp = ktime_to_us(hrtimer_expires_remaining(t));
++ if (intv_us < exp) {
++ hrtimer_cancel(t);
++ hrtimer_start_range_ns(t,
++ ns_to_ktime(intv_us * 1000),
++ 0, HRTIMER_MODE_REL_PINNED);
++ }
+ } else {
+- t->expires = round_jiffies(when);
+- add_timer_on(t, smp_processor_id());
++ hrtimer_start_range_ns(t,
++ ns_to_ktime(jiffies_to_usecs(interval) * 1000),
++ 0, HRTIMER_MODE_REL_PINNED);
+ }
+ if (interval < iv)
+ __this_cpu_write(mce_next_interval, interval);
+ }
+
+-/* Must not be called in IRQ context where del_timer_sync() can deadlock */
++/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */
+ static void mce_timer_delete_all(void)
+ {
+ int cpu;
+
+ for_each_online_cpu(cpu)
+- del_timer_sync(&per_cpu(mce_timer, cpu));
++ hrtimer_cancel(&per_cpu(mce_timer, cpu));
+ }
+
+ static void mce_do_trigger(struct work_struct *work)
+@@ -1632,7 +1640,7 @@ static void __mcheck_cpu_init_vendor(str
+ }
+ }
+
+-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
++static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
+ {
+ unsigned long iv = mce_adjust_timer(check_interval * HZ);
+
+@@ -1641,16 +1649,17 @@ static void mce_start_timer(unsigned int
+ if (mca_cfg.ignore_ce || !iv)
+ return;
+
+- t->expires = round_jiffies(jiffies + iv);
+- add_timer_on(t, smp_processor_id());
++ hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000),
++ 0, HRTIMER_MODE_REL_PINNED);
+ }
+
+ static void __mcheck_cpu_init_timer(void)
+ {
+- struct timer_list *t = &__get_cpu_var(mce_timer);
++ struct hrtimer *t = &__get_cpu_var(mce_timer);
+ unsigned int cpu = smp_processor_id();
+
+- setup_timer(t, mce_timer_fn, cpu);
++ hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++ t->function = mce_timer_fn;
+ mce_start_timer(cpu, t);
+ }
+
+@@ -2307,6 +2316,8 @@ static void __cpuinit mce_disable_cpu(vo
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
+ return;
+
++ hrtimer_cancel(&__get_cpu_var(mce_timer));
++
+ if (!(action & CPU_TASKS_FROZEN))
+ cmci_clear();
+ for (i = 0; i < mca_cfg.banks; i++) {
+@@ -2333,6 +2344,7 @@ static void __cpuinit mce_reenable_cpu(v
+ if (b->init)
+ wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
+ }
++ __mcheck_cpu_init_timer();
+ }
+
+ /* Get notified when a cpu comes on/off. Be hotplug friendly. */
+@@ -2340,7 +2352,6 @@ static int __cpuinit
+ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ {
+ unsigned int cpu = (unsigned long)hcpu;
+- struct timer_list *t = &per_cpu(mce_timer, cpu);
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+@@ -2356,11 +2367,9 @@ mce_cpu_callback(struct notifier_block *
+ break;
+ case CPU_DOWN_PREPARE:
+ smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+- del_timer_sync(t);
+ break;
+ case CPU_DOWN_FAILED:
+ smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+- mce_start_timer(cpu, t);
+ break;
+ }
+
diff --git a/patches/x86-perf-uncore-deal-with-kfree.patch b/patches/x86-perf-uncore-deal-with-kfree.patch
new file mode 100644
index 0000000..94e5b9b
--- /dev/null
+++ b/patches/x86-perf-uncore-deal-with-kfree.patch
@@ -0,0 +1,68 @@
+Subject: x86: perf: Deal with kfree from atomic contexts
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 04 Oct 2012 13:32:46 +0100
+
+The x86 perf code allocates memory upfront because it might need
+it. The detection that it is not needed happens in atomic context and
+calls kfree from there. RT cant do that. Use kfree_rcu instead.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/kernel/cpu/perf_event.h | 1 +
+ arch/x86/kernel/cpu/perf_event_intel.c | 2 +-
+ arch/x86/kernel/cpu/perf_event_intel_uncore.c | 5 +++--
+ arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1 +
+ 4 files changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/cpu/perf_event.h
++++ b/arch/x86/kernel/cpu/perf_event.h
+@@ -108,6 +108,7 @@ struct intel_shared_regs {
+ struct er_account regs[EXTRA_REG_MAX];
+ int refcnt; /* per-core: #HT threads */
+ unsigned core_id; /* per-core: core id */
++ struct rcu_head rcu;
+ };
+
+ #define MAX_LBR_ENTRIES 16
+--- a/arch/x86/kernel/cpu/perf_event_intel.c
++++ b/arch/x86/kernel/cpu/perf_event_intel.c
+@@ -1715,7 +1715,7 @@ static void intel_pmu_cpu_dying(int cpu)
+ pc = cpuc->shared_regs;
+ if (pc) {
+ if (pc->core_id == -1 || --pc->refcnt == 0)
+- kfree(pc);
++ kfree_rcu(pc, rcu);
+ cpuc->shared_regs = NULL;
+ }
+
+--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
++++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+@@ -2636,7 +2636,7 @@ static void __cpuinit uncore_cpu_dying(i
+ box = *per_cpu_ptr(pmu->box, cpu);
+ *per_cpu_ptr(pmu->box, cpu) = NULL;
+ if (box && atomic_dec_and_test(&box->refcnt))
+- kfree(box);
++ kfree_rcu(box, rcu);
+ }
+ }
+ }
+@@ -2666,7 +2666,8 @@ static int __cpuinit uncore_cpu_starting
+ if (exist && exist->phys_id == phys_id) {
+ atomic_inc(&exist->refcnt);
+ *per_cpu_ptr(pmu->box, cpu) = exist;
+- kfree(box);
++ if (box)
++ kfree_rcu(box, rcu);
+ box = NULL;
+ break;
+ }
+--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
++++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+@@ -421,6 +421,7 @@ struct intel_uncore_box {
+ struct hrtimer hrtimer;
+ struct list_head list;
+ struct intel_uncore_extra_reg shared_regs[0];
++ struct rcu_head rcu;
+ };
+
+ #define UNCORE_BOX_FLAG_INITIATED 0
diff --git a/patches/x86-preempt-lazy.patch b/patches/x86-preempt-lazy.patch
new file mode 100644
index 0000000..8fa4e9b
--- /dev/null
+++ b/patches/x86-preempt-lazy.patch
@@ -0,0 +1,177 @@
+Subject: x86-preempt-lazy.patch
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 01 Nov 2012 11:03:47 +0100
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ arch/x86/Kconfig | 1 +
+ arch/x86/include/asm/thread_info.h | 6 ++++++
+ arch/x86/kernel/asm-offsets.c | 1 +
+ arch/x86/kernel/entry_32.S | 18 +++++++++++++-----
+ arch/x86/kernel/entry_64.S | 24 +++++++++++++++---------
+ 5 files changed, 36 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -108,6 +108,7 @@ config X86
+ select KTIME_SCALAR if X86_32
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
++ select HAVE_PREEMPT_LAZY
+ select HAVE_CONTEXT_TRACKING if X86_64
+ select HAVE_IRQ_TIME_ACCOUNTING
+ select MODULES_USE_ELF_REL if X86_32
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -31,6 +31,8 @@ struct thread_info {
+ __u32 cpu; /* current CPU */
+ int preempt_count; /* 0 => preemptable,
+ <0 => BUG */
++ int preempt_lazy_count; /* 0 => lazy preemptable,
++ <0 => BUG */
+ mm_segment_t addr_limit;
+ struct restart_block restart_block;
+ void __user *sysenter_return;
+@@ -82,6 +84,7 @@ struct thread_info {
+ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
+ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
+ #define TIF_SECCOMP 8 /* secure computing */
++#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
+ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
+ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
+ #define TIF_UPROBE 12 /* breakpointed or singlestepping */
+@@ -107,6 +110,7 @@ struct thread_info {
+ #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+ #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP (1 << TIF_SECCOMP)
++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
+ #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
+ #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
+ #define _TIF_UPROBE (1 << TIF_UPROBE)
+@@ -157,6 +161,8 @@ struct thread_info {
+ #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
+ #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
+
++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
++
+ #define PREEMPT_ACTIVE 0x10000000
+
+ #ifdef CONFIG_X86_32
+--- a/arch/x86/kernel/asm-offsets.c
++++ b/arch/x86/kernel/asm-offsets.c
+@@ -33,6 +33,7 @@ void common(void) {
+ OFFSET(TI_status, thread_info, status);
+ OFFSET(TI_addr_limit, thread_info, addr_limit);
+ OFFSET(TI_preempt_count, thread_info, preempt_count);
++ OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
+
+ BLANK();
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+--- a/arch/x86/kernel/entry_32.S
++++ b/arch/x86/kernel/entry_32.S
+@@ -364,14 +364,22 @@ ENTRY(resume_kernel)
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
+ jnz restore_all
+-need_resched:
+ movl TI_flags(%ebp), %ecx # need_resched set ?
+ testb $_TIF_NEED_RESCHED, %cl
++ jnz 1f
++
++ cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
++ jnz restore_all
++ testl $_TIF_NEED_RESCHED_LAZY, %ecx
+ jz restore_all
+- testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
++
++1: testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
+ jz restore_all
+ call preempt_schedule_irq
+- jmp need_resched
++ movl TI_flags(%ebp), %ecx # need_resched set ?
++ testl $_TIF_NEED_RESCHED_MASK, %ecx
++ jnz 1b
++ jmp restore_all
+ END(resume_kernel)
+ #endif
+ CFI_ENDPROC
+@@ -607,7 +615,7 @@ ENDPROC(system_call)
+ ALIGN
+ RING0_PTREGS_FRAME # can't unwind into user space anyway
+ work_pending:
+- testb $_TIF_NEED_RESCHED, %cl
++ testl $_TIF_NEED_RESCHED_MASK, %ecx
+ jz work_notifysig
+ work_resched:
+ call schedule
+@@ -620,7 +628,7 @@ work_resched:
+ andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
+ # than syscall tracing?
+ jz restore_all
+- testb $_TIF_NEED_RESCHED, %cl
++ testl $_TIF_NEED_RESCHED_MASK, %ecx
+ jnz work_resched
+
+ work_notifysig: # deal with pending signals and
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -673,8 +673,8 @@ sysret_check:
+ /* Handle reschedules */
+ /* edx: work, edi: workmask */
+ sysret_careful:
+- bt $TIF_NEED_RESCHED,%edx
+- jnc sysret_signal
++ testl $_TIF_NEED_RESCHED_MASK,%edx
++ jz sysret_signal
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ pushq_cfi %rdi
+@@ -786,8 +786,8 @@ GLOBAL(int_with_check)
+ /* First do a reschedule test. */
+ /* edx: work, edi: workmask */
+ int_careful:
+- bt $TIF_NEED_RESCHED,%edx
+- jnc int_very_careful
++ testl $_TIF_NEED_RESCHED_MASK,%edx
++ jz int_very_careful
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ pushq_cfi %rdi
+@@ -1094,8 +1094,8 @@ bad_iret:
+ /* edi: workmask, edx: work */
+ retint_careful:
+ CFI_RESTORE_STATE
+- bt $TIF_NEED_RESCHED,%edx
+- jnc retint_signal
++ testl $_TIF_NEED_RESCHED_MASK,%edx
++ jz retint_signal
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ pushq_cfi %rdi
+@@ -1128,9 +1128,15 @@ retint_signal:
+ ENTRY(retint_kernel)
+ cmpl $0,TI_preempt_count(%rcx)
+ jnz retint_restore_args
+- bt $TIF_NEED_RESCHED,TI_flags(%rcx)
++ bt $TIF_NEED_RESCHED,TI_flags(%rcx)
++ jc 1f
++
++ cmpl $0,TI_preempt_lazy_count(%rcx)
++ jnz retint_restore_args
++ bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
+ jnc retint_restore_args
+- bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
++
++1: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
+ jnc retint_restore_args
+ call preempt_schedule_irq
+ jmp exit_intr
+@@ -1522,7 +1528,7 @@ paranoid_userspace:
+ movq %rsp,%rdi /* &pt_regs */
+ call sync_regs
+ movq %rax,%rsp /* switch stack for scheduling */
+- testl $_TIF_NEED_RESCHED,%ebx
++ testl $_TIF_NEED_RESCHED_MASK,%ebx
+ jnz paranoid_schedule
+ movl %ebx,%edx /* arg3: thread flags */
+ TRACE_IRQS_ON
diff --git a/patches/x86-stackprot-no-random-on-rt.patch b/patches/x86-stackprot-no-random-on-rt.patch
new file mode 100644
index 0000000..5c3ca09
--- /dev/null
+++ b/patches/x86-stackprot-no-random-on-rt.patch
@@ -0,0 +1,47 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 16 Dec 2010 14:25:18 +0100
+Subject: x86: stackprotector: Avoid random pool on rt
+
+CPU bringup calls into the random pool to initialize the stack
+canary. During boot that works nicely even on RT as the might sleep
+checks are disabled. During CPU hotplug the might sleep checks
+trigger. Making the locks in random raw is a major PITA, so avoid the
+call on RT is the only sensible solution. This is basically the same
+randomness which we get during boot where the random pool has no
+entropy and we rely on the TSC randomnness.
+
+Reported-by: Carsten Emde <carsten.emde@osadl.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/x86/include/asm/stackprotector.h | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/stackprotector.h
++++ b/arch/x86/include/asm/stackprotector.h
+@@ -57,7 +57,7 @@
+ */
+ static __always_inline void boot_init_stack_canary(void)
+ {
+- u64 canary;
++ u64 uninitialized_var(canary);
+ u64 tsc;
+
+ #ifdef CONFIG_X86_64
+@@ -68,8 +68,16 @@ static __always_inline void boot_init_st
+ * of randomness. The TSC only matters for very early init,
+ * there it already has some randomness on most systems. Later
+ * on during the bootup the random pool has true entropy too.
++ *
++ * For preempt-rt we need to weaken the randomness a bit, as
++ * we can't call into the random generator from atomic context
++ * due to locking constraints. We just leave canary
++ * uninitialized and use the TSC based randomness on top of
++ * it.
+ */
++#ifndef CONFIG_PREEMPT_RT_FULL
+ get_random_bytes(&canary, sizeof(canary));
++#endif
+ tsc = __native_read_tsc();
+ canary += tsc + (tsc << 32UL);
+
diff --git a/patches/x86-use-gen-rwsem-spinlocks-rt.patch b/patches/x86-use-gen-rwsem-spinlocks-rt.patch
new file mode 100644
index 0000000..96fd475
--- /dev/null
+++ b/patches/x86-use-gen-rwsem-spinlocks-rt.patch
@@ -0,0 +1,28 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 26 Jul 2009 02:21:32 +0200
+Subject: x86: Use generic rwsem_spinlocks on -rt
+
+Simplifies the separation of anon_rw_semaphores and rw_semaphores for
+-rt.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+---
+ arch/x86/Kconfig | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -173,8 +173,11 @@ config ARCH_MAY_HAVE_PC_FDC
+ def_bool y
+ depends on ISA_DMA_API
+
++config RWSEM_GENERIC_SPINLOCK
++ def_bool PREEMPT_RT_FULL
++
+ config RWSEM_XCHGADD_ALGORITHM
+- def_bool y
++ def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
+
+ config GENERIC_CALIBRATE_DELAY
+ def_bool y