patches

author: Greg Kroah-Hartman <gregkh@suse.de> 2011-03-08 11:59:04 -0800
committer: Greg Kroah-Hartman <gregkh@suse.de> 2011-03-08 11:59:04 -0800
commit: 541a89f97b4fe8579c94d13cac5cfb894d13da6d (patch)
tree: e68ea13ca9e339739b8046d635dc337d63c577a4
parent: 73984b2809ee78802102714cb171fbabcf013269 (diff)
download: longterm-queue-2.6.33-541a89f97b4fe8579c94d13cac5cfb894d13da6d.tar.gz
13 files changed, 882 insertions, 0 deletions
diff --git a/queue-2.6.33/drm-ttm-Fix-two-race-conditions-fix-busy-codepaths.patch b/queue-2.6.33/drm-ttm-Fix-two-race-conditions-fix-busy-codepaths.patch
new file mode 100644
index 0000000..7adddad
--- /dev/null
+++ b/queue-2.6.33/drm-ttm-Fix-two-race-conditions-fix-busy-codepaths.patch
@@ -0,0 +1,170 @@
+From: Thomas Hellstrom <thellstrom@vmware.com>
+Date: Thu, 30 Sep 2010 12:36:45 +0200
+Subject: drm/ttm: Fix two race conditions + fix busy codepaths
+
+From: Thomas Hellstrom <thellstrom@vmware.com>
+
+commit 1df6a2ebd75067aefbdf07482bf8e3d0584e04ee upstream.
+
+This fixes a race pointed out by Dave Airlie where we don't take a buffer
+object about to be destroyed off the LRU lists properly. It also fixes a rare
+case where a buffer object could be destroyed in the middle of an
+accelerated eviction.
+
+The patch also adds a utility function that can be used to prematurely
+release GPU memory space usage of an object waiting to be destroyed.
+For example during eviction or swapout.
+
+The above mentioned commit didn't queue the buffer on the delayed destroy
+list under some rare circumstances. It also didn't completely honor the
+remove_all parameter.
+
+Fixes:
+https://bugzilla.redhat.com/show_bug.cgi?id=615505
+http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=591061
+
+Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+[ Backported to 2.6.33 -maks ]
+Cc: maximilian attems <max@stro.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ drivers/gpu/drm/ttm/ttm_bo.c |   84 ++++++++++++++++++++++++++++++++++++-------
+ include/drm/ttm/ttm_bo_api.h |    4 +-
+ 2 files changed, 74 insertions(+), 14 deletions(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_bo.c
++++ b/drivers/gpu/drm/ttm/ttm_bo.c
+@@ -443,6 +443,43 @@ out_err:
+ }
+ 
+ /**
++ * Call bo::reserved and with the lru lock held.
++ * Will release GPU memory type usage on destruction.
++ * This is the place to put in driver specific hooks.
++ * Will release the bo::reserved lock and the
++ * lru lock on exit.
++ */
++
++static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
++{
++	struct ttm_bo_global *glob = bo->glob;
++
++	if (bo->ttm) {
++
++		/**
++		 * Release the lru_lock, since we don't want to have
++		 * an atomic requirement on ttm_tt[unbind|destroy].
++		 */
++
++		spin_unlock(&glob->lru_lock);
++		ttm_tt_unbind(bo->ttm);
++		ttm_tt_destroy(bo->ttm);
++		bo->ttm = NULL;
++		spin_lock(&glob->lru_lock);
++	}
++
++	if (bo->mem.mm_node) {
++		drm_mm_put_block(bo->mem.mm_node);
++		bo->mem.mm_node = NULL;
++	}
++
++	atomic_set(&bo->reserved, 0);
++	wake_up_all(&bo->event_queue);
++	spin_unlock(&glob->lru_lock);
++}
++
++
++/**
+  * If bo idle, remove from delayed- and lru lists, and unref.
+  * If not idle, and already on delayed list, do nothing.
+  * If not idle, and not on delayed list, put on delayed list,
+@@ -457,6 +494,7 @@ static int ttm_bo_cleanup_refs(struct tt
+ 	int ret;
+ 
+ 	spin_lock(&bo->lock);
++retry:
+ 	(void) ttm_bo_wait(bo, false, false, !remove_all);
+ 
+ 	if (!bo->sync_obj) {
+@@ -465,32 +503,52 @@ static int ttm_bo_cleanup_refs(struct tt
+ 		spin_unlock(&bo->lock);
+ 
+ 		spin_lock(&glob->lru_lock);
+-		put_count = ttm_bo_del_from_lru(bo);
++		ret = ttm_bo_reserve_locked(bo, false, !remove_all, false, 0);
+ 
+-		ret = ttm_bo_reserve_locked(bo, false, false, false, 0);
+-		BUG_ON(ret);
+-		if (bo->ttm)
+-			ttm_tt_unbind(bo->ttm);
++		/**
++		 * Someone else has the object reserved. Bail and retry.
++		 */
++
++		if (unlikely(ret == -EBUSY)) {
++			spin_unlock(&glob->lru_lock);
++			spin_lock(&bo->lock);
++			goto requeue;
++		}
++
++		/**
++		 * We can re-check for sync object without taking
++		 * the bo::lock since setting the sync object requires
++		 * also bo::reserved. A busy object at this point may
++		 * be caused by another thread starting an accelerated
++		 * eviction.
++		 */
++
++		if (unlikely(bo->sync_obj)) {
++			atomic_set(&bo->reserved, 0);
++			wake_up_all(&bo->event_queue);
++			spin_unlock(&glob->lru_lock);
++			spin_lock(&bo->lock);
++			if (remove_all)
++				goto retry;
++			else
++				goto requeue;
++		}
++
++		put_count = ttm_bo_del_from_lru(bo);
+ 
+ 		if (!list_empty(&bo->ddestroy)) {
+ 			list_del_init(&bo->ddestroy);
+ 			++put_count;
+ 		}
+-		if (bo->mem.mm_node) {
+-			bo->mem.mm_node->private = NULL;
+-			drm_mm_put_block(bo->mem.mm_node);
+-			bo->mem.mm_node = NULL;
+-		}
+-		spin_unlock(&glob->lru_lock);
+ 
+-		atomic_set(&bo->reserved, 0);
++		ttm_bo_cleanup_memtype_use(bo);
+ 
+ 		while (put_count--)
+ 			kref_put(&bo->list_kref, ttm_bo_ref_bug);
+ 
+ 		return 0;
+ 	}
+-
++requeue:
+ 	spin_lock(&glob->lru_lock);
+ 	if (list_empty(&bo->ddestroy)) {
+ 		void *sync_obj = bo->sync_obj;
+--- a/include/drm/ttm/ttm_bo_api.h
++++ b/include/drm/ttm/ttm_bo_api.h
+@@ -224,9 +224,11 @@ struct ttm_buffer_object {
+ 
+ 	atomic_t reserved;
+ 
+-
+ 	/**
+ 	 * Members protected by the bo::lock
++	 * In addition, setting sync_obj to anything else
++	 * than NULL requires bo::reserved to be held. This allows for
++	 * checking NULL while reserved but not holding bo::lock.
+ 	 */
+ 
+ 	void *sync_obj_arg;
diff --git a/queue-2.6.33/ixgbe-fix-for-82599-erratum-on-header-splitting.patch b/queue-2.6.33/ixgbe-fix-for-82599-erratum-on-header-splitting.patch
new file mode 100644
index 0000000..287ad52
--- /dev/null
+++ b/queue-2.6.33/ixgbe-fix-for-82599-erratum-on-header-splitting.patch
@@ -0,0 +1,37 @@
+From a124339ad28389093ed15eca990d39c51c5736cc Mon Sep 17 00:00:00 2001
+From: Don Skidmore <donald.c.skidmore@intel.com>
+Date: Tue, 18 Jan 2011 22:53:47 +0000
+Subject: ixgbe: fix for 82599 erratum on Header Splitting
+
+From: Don Skidmore <donald.c.skidmore@intel.com>
+
+commit a124339ad28389093ed15eca990d39c51c5736cc upstream.
+
+We have found a hardware erratum on 82599 hardware that can lead to
+unpredictable behavior when Header Splitting mode is enabled.  So
+we are no longer enabling this feature on affected hardware.
+
+Please see the 82599 Specification Update for more information.
+
+Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
+Tested-by: Stephen Ko <stephen.s.ko@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/ixgbe/ixgbe_main.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ixgbe/ixgbe_main.c
++++ b/drivers/net/ixgbe/ixgbe_main.c
+@@ -2148,6 +2148,10 @@ static void ixgbe_configure_rx(struct ix
+ 	/* Decide whether to use packet split mode or not */
+ 	adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED;
+ 
++	/* Disable packet split due to 82599 erratum #45 */
++	if (hw->mac.type == ixgbe_mac_82599EB)
++		adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED;
++
+ 	/* Set the RX buffer length according to the mode */
+ 	if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) {
+ 		rx_buf_len = IXGBE_RX_HDR_SIZE;
diff --git a/queue-2.6.33/mm-fix-possible-cause-of-a-page_mapped-bug.patch b/queue-2.6.33/mm-fix-possible-cause-of-a-page_mapped-bug.patch
new file mode 100644
index 0000000..d2d48c4
--- /dev/null
+++ b/queue-2.6.33/mm-fix-possible-cause-of-a-page_mapped-bug.patch
@@ -0,0 +1,47 @@
+From a3e8cc643d22d2c8ed36b9be7d9c9ca21efcf7f7 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 23 Feb 2011 21:39:49 -0800
+Subject: mm: fix possible cause of a page_mapped BUG
+
+From: Hugh Dickins <hughd@google.com>
+
+commit a3e8cc643d22d2c8ed36b9be7d9c9ca21efcf7f7 upstream.
+
+Robert Swiecki reported a BUG_ON(page_mapped) from a fuzzer, punching
+a hole with madvise(,, MADV_REMOVE).  That path is under mutex, and
+cannot be explained by lack of serialization in unmap_mapping_range().
+
+Reviewing the code, I found one place where vm_truncate_count handling
+should have been updated, when I switched at the last minute from one
+way of managing the restart_addr to another: mremap move changes the
+virtual addresses, so it ought to adjust the restart_addr.
+
+But rather than exporting the notion of restart_addr from memory.c, or
+converting to restart_pgoff throughout, simply reset vm_truncate_count
+to 0 to force a rescan if mremap move races with preempted truncation.
+
+We have no confirmation that this fixes Robert's BUG,
+but it is a fix that's worth making anyway.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Kerin Millar <kerframil@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/mremap.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/mm/mremap.c
++++ b/mm/mremap.c
+@@ -92,9 +92,7 @@ static void move_ptes(struct vm_area_str
+ 		 */
+ 		mapping = vma->vm_file->f_mapping;
+ 		spin_lock(&mapping->i_mmap_lock);
+-		if (new_vma->vm_truncate_count &&
+-		    new_vma->vm_truncate_count != vma->vm_truncate_count)
+-			new_vma->vm_truncate_count = 0;
++		new_vma->vm_truncate_count = 0;
+ 	}
+ 
+ 	/*
diff --git a/queue-2.6.33/netfilter-nf_log-avoid-oops-in-un-bind-with-invalid-nfproto-values.patch b/queue-2.6.33/netfilter-nf_log-avoid-oops-in-un-bind-with-invalid-nfproto-values.patch
new file mode 100644
index 0000000..61992c0
--- /dev/null
+++ b/queue-2.6.33/netfilter-nf_log-avoid-oops-in-un-bind-with-invalid-nfproto-values.patch
@@ -0,0 +1,53 @@
+From 9ef0298a8e5730d9a46d640014c727f3b4152870 Mon Sep 17 00:00:00 2001
+From: Jan Engelhardt <jengelh@medozas.de>
+Date: Wed, 2 Mar 2011 12:10:13 +0100
+Subject: netfilter: nf_log: avoid oops in (un)bind with invalid nfproto values
+
+From: Jan Engelhardt <jengelh@medozas.de>
+
+commit 9ef0298a8e5730d9a46d640014c727f3b4152870 upstream.
+
+Like many other places, we have to check that the array index is
+within allowed limits, or otherwise, a kernel oops and other nastiness
+can ensue when we access memory beyond the end of the array.
+
+[ 5954.115381] BUG: unable to handle kernel paging request at 0000004000000000
+[ 5954.120014] IP:  __find_logger+0x6f/0xa0
+[ 5954.123979]  nf_log_bind_pf+0x2b/0x70
+[ 5954.123979]  nfulnl_recv_config+0xc0/0x4a0 [nfnetlink_log]
+[ 5954.123979]  nfnetlink_rcv_msg+0x12c/0x1b0 [nfnetlink]
+...
+
+The problem goes back to v2.6.30-rc1~1372~1342~31 where nf_log_bind
+was decoupled from nf_log_register.
+
+Reported-by: Miguel Di Ciurcio Filho <miguel.filho@gmail.com>,
+  via irc.freenode.net/#netfilter
+Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
+Signed-off-by: Patrick McHardy <kaber@trash.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ net/netfilter/nf_log.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/netfilter/nf_log.c
++++ b/net/netfilter/nf_log.c
+@@ -83,6 +83,8 @@ EXPORT_SYMBOL(nf_log_unregister);
+ 
+ int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
+ {
++	if (pf >= ARRAY_SIZE(nf_loggers))
++		return -EINVAL;
+ 	mutex_lock(&nf_log_mutex);
+ 	if (__find_logger(pf, logger->name) == NULL) {
+ 		mutex_unlock(&nf_log_mutex);
+@@ -96,6 +98,8 @@ EXPORT_SYMBOL(nf_log_bind_pf);
+ 
+ void nf_log_unbind_pf(u_int8_t pf)
+ {
++	if (pf >= ARRAY_SIZE(nf_loggers))
++		return;
+ 	mutex_lock(&nf_log_mutex);
+ 	rcu_assign_pointer(nf_loggers[pf], NULL);
+ 	mutex_unlock(&nf_log_mutex);
diff --git a/queue-2.6.33/powerpc-crashdump-do-not-fail-on-null-pointer-dereferencing.patch b/queue-2.6.33/powerpc-crashdump-do-not-fail-on-null-pointer-dereferencing.patch
new file mode 100644
index 0000000..17780a5
--- /dev/null
+++ b/queue-2.6.33/powerpc-crashdump-do-not-fail-on-null-pointer-dereferencing.patch
@@ -0,0 +1,31 @@
+From 426b6cb478e60352a463a0d1ec75c1c9fab30b13 Mon Sep 17 00:00:00 2001
+From: Maxim Uvarov <muvarov@gmail.com>
+Date: Tue, 11 May 2010 05:41:08 +0000
+Subject: powerpc/crashdump: Do not fail on NULL pointer dereferencing
+
+From: Maxim Uvarov <muvarov@gmail.com>
+
+commit 426b6cb478e60352a463a0d1ec75c1c9fab30b13 upstream.
+
+Signed-off-by: Maxim Uvarov <muvarov@gmail.com>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Kamalesh babulal <kamalesh@linux.vnet.ibm.com>
+cc: Anton Blanchard <anton@samba.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/kernel/crash.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/powerpc/kernel/crash.c
++++ b/arch/powerpc/kernel/crash.c
+@@ -377,6 +377,9 @@ void default_machine_crash_shutdown(stru
+ 	for_each_irq(i) {
+ 		struct irq_desc *desc = irq_to_desc(i);
+ 
++		if (!desc || !desc->chip || !desc->chip->eoi)
++			continue;
++
+ 		if (desc->status & IRQ_INPROGRESS)
+ 			desc->chip->eoi(i);
+ 
diff --git a/queue-2.6.33/powerpc-kdump-cpus-assume-the-context-of-the-oopsing-cpu.patch b/queue-2.6.33/powerpc-kdump-cpus-assume-the-context-of-the-oopsing-cpu.patch
new file mode 100644
index 0000000..b2e5c36
--- /dev/null
+++ b/queue-2.6.33/powerpc-kdump-cpus-assume-the-context-of-the-oopsing-cpu.patch
@@ -0,0 +1,69 @@
+From 0644079410065567e3bb31fcb8e6441f2b7685a9 Mon Sep 17 00:00:00 2001
+From: Anton Blanchard <anton@samba.org>
+Date: Mon, 10 May 2010 16:25:51 +0000
+Subject: powerpc/kdump: CPUs assume the context of the oopsing CPU
+
+From: Anton Blanchard <anton@samba.org>
+
+commit 0644079410065567e3bb31fcb8e6441f2b7685a9 upstream.
+
+We wrap the crash_shutdown_handles[] calls with longjmp/setjmp, so if any
+of them fault we can recover. The problem is we add a hook to the debugger
+fault handler hook which calls longjmp unconditionally.
+
+This first part of kdump is run before we marshall the other CPUs, so there
+is a very good chance some CPU on the box is going to page fault. And when
+it does it hits the longjmp code and assumes the context of the oopsing CPU.
+The machine gets very confused when it has 10 CPUs all with the same stack,
+all thinking they have the same CPU id. I get even more confused trying
+to debug it.
+
+The patch below adds crash_shutdown_cpu and uses it to specify which cpu is
+in the protected region. Since it can only be -1 or the oopsing CPU, we don't
+need to use memory barriers since it is only valid on the local CPU - no other
+CPU will ever see a value that matches it's local CPU id.
+
+Eventually we should switch the order and marshall all CPUs before doing the
+crash_shutdown_handles[] calls, but that is a bigger fix.
+
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Kamalesh babulal <kamalesh@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/kernel/crash.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kernel/crash.c
++++ b/arch/powerpc/kernel/crash.c
+@@ -347,10 +347,12 @@ int crash_shutdown_unregister(crash_shut
+ EXPORT_SYMBOL(crash_shutdown_unregister);
+ 
+ static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
++static int crash_shutdown_cpu = -1;
+ 
+ static int handle_fault(struct pt_regs *regs)
+ {
+-	longjmp(crash_shutdown_buf, 1);
++	if (crash_shutdown_cpu == smp_processor_id())
++		longjmp(crash_shutdown_buf, 1);
+ 	return 0;
+ }
+ 
+@@ -388,6 +390,7 @@ void default_machine_crash_shutdown(stru
+ 	 */
+ 	old_handler = __debugger_fault_handler;
+ 	__debugger_fault_handler = handle_fault;
++	crash_shutdown_cpu = smp_processor_id();
+ 	for (i = 0; crash_shutdown_handles[i]; i++) {
+ 		if (setjmp(crash_shutdown_buf) == 0) {
+ 			/*
+@@ -401,6 +404,7 @@ void default_machine_crash_shutdown(stru
+ 			asm volatile("sync; isync");
+ 		}
+ 	}
++	crash_shutdown_cpu = -1;
+ 	__debugger_fault_handler = old_handler;
+ 
+ 	/*
diff --git a/queue-2.6.33/powerpc-kdump-use-chip-shutdown-to-disable-irqs.patch b/queue-2.6.33/powerpc-kdump-use-chip-shutdown-to-disable-irqs.patch
new file mode 100644
index 0000000..5aafd1c
--- /dev/null
+++ b/queue-2.6.33/powerpc-kdump-use-chip-shutdown-to-disable-irqs.patch
@@ -0,0 +1,56 @@
+From 5d7a87217de48b234b3c8ff8a73059947d822e07 Mon Sep 17 00:00:00 2001
+From: Anton Blanchard <anton@samba.org>
+Date: Mon, 10 May 2010 16:27:38 +0000
+Subject: powerpc/kdump: Use chip->shutdown to disable IRQs
+
+From: Anton Blanchard <anton@samba.org>
+
+commit 5d7a87217de48b234b3c8ff8a73059947d822e07 upstream.
+
+I saw this in a kdump kernel:
+
+IOMMU table initialized, virtual merging enabled
+Interrupt 155954 (real) is invalid, disabling it.
+Interrupt 155953 (real) is invalid, disabling it.
+
+ie we took some spurious interrupts. default_machine_crash_shutdown tries
+to disable all interrupt sources but uses chip->disable which maps to
+the default action of:
+
+static void default_disable(unsigned int irq)
+{
+}
+
+If we use chip->shutdown, then we actually mask the IRQ:
+
+static void default_shutdown(unsigned int irq)
+{
+        struct irq_desc *desc = irq_to_desc(irq);
+
+        desc->chip->mask(irq);
+        desc->status |= IRQ_MASKED;
+}
+
+Not sure why we don't implement a ->disable action for xics.c, or why
+default_disable doesn't mask the interrupt.
+
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Kamalesh babulal <kamalesh@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/kernel/crash.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/kernel/crash.c
++++ b/arch/powerpc/kernel/crash.c
+@@ -381,7 +381,7 @@ void default_machine_crash_shutdown(stru
+ 			desc->chip->eoi(i);
+ 
+ 		if (!(desc->status & IRQ_DISABLED))
+-			desc->chip->disable(i);
++			desc->chip->shutdown(i);
+ 	}
+ 
+ 	/*
diff --git a/queue-2.6.33/powerpc-kexec-fix-orphaned-offline-cpus-across-kexec.patch b/queue-2.6.33/powerpc-kexec-fix-orphaned-offline-cpus-across-kexec.patch
new file mode 100644
index 0000000..14aba22
--- /dev/null
+++ b/queue-2.6.33/powerpc-kexec-fix-orphaned-offline-cpus-across-kexec.patch
@@ -0,0 +1,82 @@
+From kamalesh@linux.vnet.ibm.com  Tue Mar  8 11:54:41 2011
+From: Matt Evans <matt@ozlabs.org>
+Date: Mon, 7 Mar 2011 17:26:04 +0530
+Subject: powerpc/kexec: Fix orphaned offline CPUs across kexec
+To: stable@kernel.org
+Cc: greg@kroah.com, benh@kernel.crashing.org, linux-kernel@vger.kernel.org, anton@samba.org
+Message-ID: <20110307115604.GI8194@linux.vnet.ibm.com>
+Content-Disposition: inline
+
+From: Matt Evans <matt@ozlabs.org>
+
+Commit: e8e5c2155b0035b6e04f29be67f6444bc914005b upstream
+
+When CPU hotplug is used, some CPUs may be offline at the time a kexec is
+performed.  The subsequent kernel may expect these CPUs to be already running,
+and will declare them stuck.  On pseries, there's also a soft-offline (cede)
+state that CPUs may be in; this can also cause problems as the kexeced kernel
+may ask RTAS if they're online -- and RTAS would say they are.  The CPU will
+either appear stuck, or will cause a crash as we replace its cede loop beneath
+it.
+
+This patch kicks each present offline CPU awake before the kexec, so that
+none are forever lost to these assumptions in the subsequent kernel.
+
+Now, the behaviour is that all available CPUs that were offlined are now
+online & usable after the kexec.  This mimics the behaviour of a full reboot
+(on which all CPUs will be restarted).
+
+Signed-off-by: Matt Evans <matt@ozlabs.org>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Kamalesh babulal <kamalesh@linux.vnet.ibm.com>
+cc: Anton Blanchard <anton@samba.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/powerpc/kernel/machine_kexec_64.c |   25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+--- a/arch/powerpc/kernel/machine_kexec_64.c
++++ b/arch/powerpc/kernel/machine_kexec_64.c
+@@ -15,6 +15,7 @@
+ #include <linux/thread_info.h>
+ #include <linux/init_task.h>
+ #include <linux/errno.h>
++#include <linux/cpu.h>
+ 
+ #include <asm/page.h>
+ #include <asm/current.h>
+@@ -169,10 +170,34 @@ static void kexec_smp_down(void *arg)
+ 	/* NOTREACHED */
+ }
+ 
++/*
++ * We need to make sure each present CPU is online.  The next kernel will scan
++ * the device tree and assume primary threads are online and query secondary
++ * threads via RTAS to online them if required.  If we don't online primary
++ * threads, they will be stuck.  However, we also online secondary threads as we
++ * may be using 'cede offline'.  In this case RTAS doesn't see the secondary
++ * threads as offline -- and again, these CPUs will be stuck.
++ *
++ * So, we online all CPUs that should be running, including secondary threads.
++ */
++static void wake_offline_cpus(void)
++{
++	int cpu = 0;
++
++	for_each_present_cpu(cpu) {
++		if (!cpu_online(cpu)) {
++			printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
++					cpu);
++			cpu_up(cpu);
++		}
++	}
++}
++
+ static void kexec_prepare_cpus(void)
+ {
+ 	int my_cpu, i, notified=-1;
+ 
++	wake_offline_cpus();
+ 	smp_call_function(kexec_smp_down, NULL, /* wait */0);
+ 	my_cpu = get_cpu();
+ 
diff --git a/queue-2.6.33/powerpc-kexec-speedup-kexec-hash-pte-tear-down.patch b/queue-2.6.33/powerpc-kexec-speedup-kexec-hash-pte-tear-down.patch
new file mode 100644
index 0000000..81de0c7
--- /dev/null
+++ b/queue-2.6.33/powerpc-kexec-speedup-kexec-hash-pte-tear-down.patch
@@ -0,0 +1,75 @@
+From d504bed676caad29a3dba3d3727298c560628f5c Mon Sep 17 00:00:00 2001
+From: Michael Neuling <mikey@neuling.org>
+Date: Mon, 10 May 2010 20:28:26 +0000
+Subject: powerpc/kexec: Speedup kexec hash PTE tear down
+
+From: Michael Neuling <mikey@neuling.org>
+
+commit d504bed676caad29a3dba3d3727298c560628f5c upstream.
+
+Currently for kexec the PTE tear down on 1TB segment systems normally
+requires 3 hcalls for each PTE removal. On a machine with 32GB of
+memory it can take around a minute to remove all the PTEs.
+
+This optimises the path so that we only remove PTEs that are valid.
+It also uses the read 4 PTEs at once HCALL.  For the common case where
+a PTEs is invalid in a 1TB segment, this turns the 3 HCALLs per PTE
+down to 1 HCALL per 4 PTEs.
+
+This gives an > 10x speedup in kexec times on PHYP, taking a 32GB
+machine from around 1 minute down to a few seconds.
+
+Signed-off-by: Michael Neuling <mikey@neuling.org>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Kamalesh babulal <kamalesh@linux.vnet.ibm.com>
+cc: Anton Blanchard <anton@samba.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/platforms/pseries/lpar.c |   33 ++++++++++++++++++++-------------
+ 1 file changed, 20 insertions(+), 13 deletions(-)
+
+--- a/arch/powerpc/platforms/pseries/lpar.c
++++ b/arch/powerpc/platforms/pseries/lpar.c
+@@ -367,21 +367,28 @@ static void pSeries_lpar_hptab_clear(voi
+ {
+ 	unsigned long size_bytes = 1UL << ppc64_pft_size;
+ 	unsigned long hpte_count = size_bytes >> 4;
+-	unsigned long dummy1, dummy2, dword0;
++	struct {
++		unsigned long pteh;
++		unsigned long ptel;
++	} ptes[4];
+ 	long lpar_rc;
+-	int i;
++	int i, j;
+ 
+-	/* TODO: Use bulk call */
+-	for (i = 0; i < hpte_count; i++) {
+-		/* dont remove HPTEs with VRMA mappings */
+-		lpar_rc = plpar_pte_remove_raw(H_ANDCOND, i, HPTE_V_1TB_SEG,
+-						&dummy1, &dummy2);
+-		if (lpar_rc == H_NOT_FOUND) {
+-			lpar_rc = plpar_pte_read_raw(0, i, &dword0, &dummy1);
+-			if (!lpar_rc && ((dword0 & HPTE_V_VRMA_MASK)
+-				!= HPTE_V_VRMA_MASK))
+-				/* Can be hpte for 1TB Seg. So remove it */
+-				plpar_pte_remove_raw(0, i, 0, &dummy1, &dummy2);
++	/* Read in batches of 4,
++	 * invalidate only valid entries not in the VRMA
++	 * hpte_count will be a multiple of 4
++         */
++	for (i = 0; i < hpte_count; i += 4) {
++		lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
++		if (lpar_rc != H_SUCCESS)
++			continue;
++		for (j = 0; j < 4; j++){
++			if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
++				HPTE_V_VRMA_MASK)
++				continue;
++			if (ptes[j].pteh & HPTE_V_VALID)
++				plpar_pte_remove_raw(0, i + j, 0,
++					&(ptes[j].pteh), &(ptes[j].ptel));
+ 		}
+ 	}
+ }
diff --git a/queue-2.6.33/powerpc-pseries-add-hcall-to-read-4-ptes-at-a-time-in-real-mode.patch b/queue-2.6.33/powerpc-pseries-add-hcall-to-read-4-ptes-at-a-time-in-real-mode.patch
new file mode 100644
index 0000000..c9be8e2
--- /dev/null
+++ b/queue-2.6.33/powerpc-pseries-add-hcall-to-read-4-ptes-at-a-time-in-real-mode.patch
@@ -0,0 +1,109 @@
+From f90ece28c1f5b3ec13fe481406857fe92f4bc7d1 Mon Sep 17 00:00:00 2001
+From: Michael Neuling <mikey@neuling.org>
+Date: Mon, 10 May 2010 20:28:26 +0000
+Subject: powerpc/pseries: Add hcall to read 4 ptes at a time in real mode
+
+From: Michael Neuling <mikey@neuling.org>
+
+commit f90ece28c1f5b3ec13fe481406857fe92f4bc7d1 upstream.
+
+This adds plpar_pte_read_4_raw() which can be used read 4 PTEs from
+PHYP at a time, while in real mode.
+
+It also creates a new hcall9 which can be used in real mode.  It's the
+same as plpar_hcall9 but minus the tracing hcall statistics which may
+require variables outside the RMO.
+
+Signed-off-by: Michael Neuling <mikey@neuling.org>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Kamalesh babulal <kamalesh@linux.vnet.ibm.com>
+Cc: Anton Blanchard <anton@samba.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/include/asm/hvcall.h               |    1 
+ arch/powerpc/platforms/pseries/hvCall.S         |   38 ++++++++++++++++++++++++
+ arch/powerpc/platforms/pseries/plpar_wrappers.h |   18 +++++++++++
+ 3 files changed, 57 insertions(+)
+
+--- a/arch/powerpc/include/asm/hvcall.h
++++ b/arch/powerpc/include/asm/hvcall.h
+@@ -281,6 +281,7 @@ long plpar_hcall_raw(unsigned long opcod
+  */
+ #define PLPAR_HCALL9_BUFSIZE 9
+ long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
++long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...);
+ 
+ /* For hcall instrumentation.  One structure per-hcall, per-CPU */
+ struct hcall_stats {
+--- a/arch/powerpc/platforms/pseries/hvCall.S
++++ b/arch/powerpc/platforms/pseries/hvCall.S
+@@ -228,3 +228,41 @@ _GLOBAL(plpar_hcall9)
+ 	mtcrf	0xff,r0
+ 
+ 	blr				/* return r3 = status */
++
++/* See plpar_hcall_raw to see why this is needed */
++_GLOBAL(plpar_hcall9_raw)
++	HMT_MEDIUM
++
++	mfcr	r0
++	stw	r0,8(r1)
++
++	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
++
++	mr	r4,r5
++	mr	r5,r6
++	mr	r6,r7
++	mr	r7,r8
++	mr	r8,r9
++	mr	r9,r10
++	ld	r10,STK_PARM(r11)(r1)	 /* put arg7 in R10 */
++	ld	r11,STK_PARM(r12)(r1)	 /* put arg8 in R11 */
++	ld	r12,STK_PARM(r13)(r1)    /* put arg9 in R12 */
++
++	HVSC				/* invoke the hypervisor */
++
++	mr	r0,r12
++	ld	r12,STK_PARM(r4)(r1)
++	std	r4,  0(r12)
++	std	r5,  8(r12)
++	std	r6, 16(r12)
++	std	r7, 24(r12)
++	std	r8, 32(r12)
++	std	r9, 40(r12)
++	std	r10,48(r12)
++	std	r11,56(r12)
++	std	r0, 64(r12)
++
++	lwz	r0,8(r1)
++	mtcrf	0xff,r0
++
++	blr				/* return r3 = status */
+--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
++++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
+@@ -191,6 +191,24 @@ static inline long plpar_pte_read_raw(un
+ 	return rc;
+ }
+ 
++/*
++ * plpar_pte_read_4_raw can be called in real mode.
++ * ptes must be 8*sizeof(unsigned long)
++ */
++static inline long plpar_pte_read_4_raw(unsigned long flags, unsigned long ptex,
++					unsigned long *ptes)
++
++{
++	long rc;
++	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
++
++	rc = plpar_hcall9_raw(H_READ, retbuf, flags | H_READ_4, ptex);
++
++	memcpy(ptes, retbuf, 8*sizeof(unsigned long));
++
++	return rc;
++}
++
+ static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex,
+ 		unsigned long avpn)
+ {
diff --git a/queue-2.6.33/powerpc-use-more-accurate-limit-for-first-segment-memory-allocations.patch b/queue-2.6.33/powerpc-use-more-accurate-limit-for-first-segment-memory-allocations.patch
new file mode 100644
index 0000000..71e7f53
--- /dev/null
+++ b/queue-2.6.33/powerpc-use-more-accurate-limit-for-first-segment-memory-allocations.patch
@@ -0,0 +1,83 @@
+From 095c7965f4dc870ed2b65143b1e2610de653416c Mon Sep 17 00:00:00 2001
+From: Anton Blanchard <anton@samba.org>
+Date: Mon, 10 May 2010 18:59:18 +0000
+Subject: powerpc: Use more accurate limit for first segment memory allocations
+
+From: Anton Blanchard <anton@samba.org>
+
+commit 095c7965f4dc870ed2b65143b1e2610de653416c upstream.
+
+Author: Milton Miller <miltonm@bga.com>
+
+On large machines we are running out of room below 256MB. In some cases we
+only need to ensure the allocation is in the first segment, which may be
+256MB or 1TB.
+
+Add slb0_limit and use it to specify the upper limit for the irqstack and
+emergency stacks.
+
+On a large ppc64 box, this fixes a panic at boot when the crashkernel=
+option is specified (previously we would run out of memory below 256MB).
+
+Signed-off-by: Milton Miller <miltonm@bga.com>
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/kernel/setup_64.c |   17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/arch/powerpc/kernel/setup_64.c
++++ b/arch/powerpc/kernel/setup_64.c
+@@ -427,9 +427,18 @@ void __init setup_system(void)
+ 	DBG(" <- setup_system()\n");
+ }
+ 
++static u64 slb0_limit(void)
++{
++	if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
++		return 1UL << SID_SHIFT_1T;
++	}
++	return 1UL << SID_SHIFT;
++}
++
+ #ifdef CONFIG_IRQSTACKS
+ static void __init irqstack_early_init(void)
+ {
++	u64 limit = slb0_limit();
+ 	unsigned int i;
+ 
+ 	/*
+@@ -439,10 +448,10 @@ static void __init irqstack_early_init(v
+ 	for_each_possible_cpu(i) {
+ 		softirq_ctx[i] = (struct thread_info *)
+ 			__va(lmb_alloc_base(THREAD_SIZE,
+-					    THREAD_SIZE, 0x10000000));
++					    THREAD_SIZE, limit));
+ 		hardirq_ctx[i] = (struct thread_info *)
+ 			__va(lmb_alloc_base(THREAD_SIZE,
+-					    THREAD_SIZE, 0x10000000));
++					    THREAD_SIZE, limit));
+ 	}
+ }
+ #else
+@@ -473,7 +482,7 @@ static void __init exc_lvl_early_init(vo
+  */
+ static void __init emergency_stack_init(void)
+ {
+-	unsigned long limit;
++	u64 limit;
+ 	unsigned int i;
+ 
+ 	/*
+@@ -485,7 +494,7 @@ static void __init emergency_stack_init(
+ 	 * bringup, we need to get at them in real mode. This means they
+ 	 * must also be within the RMO region.
+ 	 */
+-	limit = min(0x10000000ULL, lmb.rmo_size);
++	limit = min(slb0_limit(), lmb.rmo_size);
+ 
+ 	for_each_possible_cpu(i) {
+ 		unsigned long sp;
diff --git a/queue-2.6.33/rxrpc-fix-v1-keys.patch b/queue-2.6.33/rxrpc-fix-v1-keys.patch
new file mode 100644
index 0000000..a562bc6
--- /dev/null
+++ b/queue-2.6.33/rxrpc-fix-v1-keys.patch
@@ -0,0 +1,58 @@
+From f009918a1c1bbf8607b8aab3959876913a30193a Mon Sep 17 00:00:00 2001
+From: Anton Blanchard <anton@au1.ibm.com>
+Date: Mon, 28 Feb 2011 03:27:53 +0000
+Subject: RxRPC: Fix v1 keys
+
+From: Anton Blanchard <anton@au1.ibm.com>
+
+commit f009918a1c1bbf8607b8aab3959876913a30193a upstream.
+
+commit 339412841d7 (RxRPC: Allow key payloads to be passed in XDR form)
+broke klog for me. I notice the v1 key struct had a kif_version field
+added:
+
+-struct rxkad_key {
+-       u16     security_index;         /* RxRPC header security index */
+-       u16     ticket_len;             /* length of ticket[] */
+-       u32     expiry;                 /* time at which expires */
+-       u32     kvno;                   /* key version number */
+-       u8      session_key[8];         /* DES session key */
+-       u8      ticket[0];              /* the encrypted ticket */
+-};
+
++struct rxrpc_key_data_v1 {
++       u32             kif_version;            /* 1 */
++       u16             security_index;
++       u16             ticket_length;
++       u32             expiry;                 /* time_t */
++       u32             kvno;
++       u8              session_key[8];
++       u8              ticket[0];
++};
+
+However the code in rxrpc_instantiate strips it away:
+
+	data += sizeof(kver);
+	datalen -= sizeof(kver);
+
+Removing kif_version fixes my problem.
+
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/keys/rxrpc-type.h |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/include/keys/rxrpc-type.h
++++ b/include/keys/rxrpc-type.h
+@@ -99,7 +99,6 @@ struct rxrpc_key_token {
+  * structure of raw payloads passed to add_key() or instantiate key
+  */
+ struct rxrpc_key_data_v1 {
+-	u32		kif_version;		/* 1 */
+ 	u16		security_index;
+ 	u16		ticket_length;
+ 	u32		expiry;			/* time_t */
diff --git a/queue-2.6.33/series b/queue-2.6.33/series
index ef9bf5f..e219c18 100644
--- a/queue-2.6.33/series
+++ b/queue-2.6.33/series
@@ -409,3 +409,15 @@ usb-iowarrior-don-t-trust-report_size-for-buffer-size.patch
 cifs-fix-oplock-break-handling-try-2.patch
 cpuset-add-a-missing-unlock-in-cpuset_write_resmask.patch
 keyboard-integer-underflow-bug.patch
+rxrpc-fix-v1-keys.patch
+drm-ttm-Fix-two-race-conditions-fix-busy-codepaths.patch
+ixgbe-fix-for-82599-erratum-on-header-splitting.patch
+mm-fix-possible-cause-of-a-page_mapped-bug.patch
+powerpc-use-more-accurate-limit-for-first-segment-memory-allocations.patch
+powerpc-kdump-cpus-assume-the-context-of-the-oopsing-cpu.patch
+powerpc-kdump-use-chip-shutdown-to-disable-irqs.patch
+powerpc-pseries-add-hcall-to-read-4-ptes-at-a-time-in-real-mode.patch
+powerpc-kexec-speedup-kexec-hash-pte-tear-down.patch
+powerpc-crashdump-do-not-fail-on-null-pointer-dereferencing.patch
+netfilter-nf_log-avoid-oops-in-un-bind-with-invalid-nfproto-values.patch
+powerpc-kexec-fix-orphaned-offline-cpus-across-kexec.patch
author	Greg Kroah-Hartman <gregkh@suse.de>	2011-03-08 11:59:04 -0800
committer	Greg Kroah-Hartman <gregkh@suse.de>	2011-03-08 11:59:04 -0800
commit	541a89f97b4fe8579c94d13cac5cfb894d13da6d (patch)
tree	e68ea13ca9e339739b8046d635dc337d63c577a4
parent	73984b2809ee78802102714cb171fbabcf013269 (diff)
download	longterm-queue-2.6.33-541a89f97b4fe8579c94d13cac5cfb894d13da6d.tar.gz