.33 patches

author: Greg Kroah-Hartman <gregkh@suse.de> 2011-08-02 15:10:53 -0700
committer: Greg Kroah-Hartman <gregkh@suse.de> 2011-08-02 15:10:53 -0700
commit: 469cf509109da86fa90400564a35254227434747 (patch)
tree: 63e2ea23a483bee958e6c056636d15b7e3b50206
parent: 98a01757aac53bbaa7c5fb1ed44f266314c6b5a8 (diff)
download: longterm-queue-2.6.33-469cf509109da86fa90400564a35254227434747.tar.gz
9 files changed, 614 insertions, 0 deletions
diff --git a/queue-2.6.33/alpha-fix-several-security-issues.patch b/queue-2.6.33/alpha-fix-several-security-issues.patch
new file mode 100644
index 0000000..ad1c281
--- /dev/null
+++ b/queue-2.6.33/alpha-fix-several-security-issues.patch
@@ -0,0 +1,91 @@
+From 21c5977a836e399fc710ff2c5367845ed5c2527f Mon Sep 17 00:00:00 2001
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+Date: Wed, 15 Jun 2011 15:09:01 -0700
+Subject: alpha: fix several security issues
+
+From: Dan Rosenberg <drosenberg@vsecurity.com>
+
+commit 21c5977a836e399fc710ff2c5367845ed5c2527f upstream.
+
+Fix several security issues in Alpha-specific syscalls.  Untested, but
+mostly trivial.
+
+1. Signedness issue in osf_getdomainname allows copying out-of-bounds
+kernel memory to userland.
+
+2. Signedness issue in osf_sysinfo allows copying large amounts of
+kernel memory to userland.
+
+3. Typo (?) in osf_getsysinfo bounds minimum instead of maximum copy
+size, allowing copying large amounts of kernel memory to userland.
+
+4. Usage of user pointer in osf_wait4 while under KERNEL_DS allows
+privilege escalation via writing return value of sys_wait4 to kernel
+memory.
+
+Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
+Cc: Richard Henderson <rth@twiddle.net>
+Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+Cc: Matt Turner <mattst88@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/alpha/kernel/osf_sys.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/alpha/kernel/osf_sys.c
++++ b/arch/alpha/kernel/osf_sys.c
+@@ -431,7 +431,7 @@ SYSCALL_DEFINE2(osf_getdomainname, char
+ 		return -EFAULT;
+ 
+ 	len = namelen;
+-	if (namelen > 32)
++	if (len > 32)
+ 		len = 32;
+ 
+ 	down_read(&uts_sem);
+@@ -618,7 +618,7 @@ SYSCALL_DEFINE3(osf_sysinfo, int, comman
+ 	down_read(&uts_sem);
+ 	res = sysinfo_table[offset];
+ 	len = strlen(res)+1;
+-	if (len > count)
++	if ((unsigned long)len > (unsigned long)count)
+ 		len = count;
+ 	if (copy_to_user(buf, res, len))
+ 		err = -EFAULT;
+@@ -673,7 +673,7 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned
+ 		return 1;
+ 
+ 	case GSI_GET_HWRPB:
+-		if (nbytes < sizeof(*hwrpb))
++		if (nbytes > sizeof(*hwrpb))
+ 			return -EINVAL;
+ 		if (copy_to_user(buffer, hwrpb, nbytes) != 0)
+ 			return -EFAULT;
+@@ -1035,6 +1035,7 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, i
+ {
+ 	struct rusage r;
+ 	long ret, err;
++	unsigned int status = 0;
+ 	mm_segment_t old_fs;
+ 
+ 	if (!ur)
+@@ -1043,13 +1044,15 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, i
+ 	old_fs = get_fs();
+ 		
+ 	set_fs (KERNEL_DS);
+-	ret = sys_wait4(pid, ustatus, options, (struct rusage __user *) &r);
++	ret = sys_wait4(pid, (unsigned int __user *) &status, options,
++			(struct rusage __user *) &r);
+ 	set_fs (old_fs);
+ 
+ 	if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur)))
+ 		return -EFAULT;
+ 
+ 	err = 0;
++	err |= put_user(status, ustatus);
+ 	err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec);
+ 	err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec);
+ 	err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec);
diff --git a/queue-2.6.33/gre-fix-netns-vs-proto-registration-ordering.patch b/queue-2.6.33/gre-fix-netns-vs-proto-registration-ordering.patch
new file mode 100644
index 0000000..2832f6f
--- /dev/null
+++ b/queue-2.6.33/gre-fix-netns-vs-proto-registration-ordering.patch
@@ -0,0 +1,71 @@
+From c2892f02712e9516d72841d5c019ed6916329794 Mon Sep 17 00:00:00 2001
+From: Alexey Dobriyan <adobriyan@gmail.com>
+Date: Tue, 16 Feb 2010 07:57:44 +0000
+Subject: gre: fix netns vs proto registration ordering
+
+From: Alexey Dobriyan <adobriyan@gmail.com>
+
+commit c2892f02712e9516d72841d5c019ed6916329794 upstream.
+
+GRE protocol receive hook can be called right after protocol addition is done.
+If netns stuff is not yet initialized, we're going to oops in
+net_generic().
+
+This is remotely oopsable if ip_gre is compiled as module and packet
+comes at unfortunate moment of module loading.
+
+Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ net/ipv4/ip_gre.c |   19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -1667,14 +1667,15 @@ static int __init ipgre_init(void)
+ 
+ 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+ 
+-	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
+-		printk(KERN_INFO "ipgre init: can't add protocol\n");
+-		return -EAGAIN;
+-	}
+-
+ 	err = register_pernet_device(&ipgre_net_ops);
+ 	if (err < 0)
+-		goto gen_device_failed;
++		return err;
++
++	err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
++	if (err < 0) {
++		printk(KERN_INFO "ipgre init: can't add protocol\n");
++		goto add_proto_failed;
++	}
+ 
+ 	err = rtnl_link_register(&ipgre_link_ops);
+ 	if (err < 0)
+@@ -1690,9 +1691,9 @@ out:
+ tap_ops_failed:
+ 	rtnl_link_unregister(&ipgre_link_ops);
+ rtnl_link_failed:
+-	unregister_pernet_device(&ipgre_net_ops);
+-gen_device_failed:
+ 	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
++add_proto_failed:
++	unregister_pernet_device(&ipgre_net_ops);
+ 	goto out;
+ }
+ 
+@@ -1700,9 +1701,9 @@ static void __exit ipgre_fini(void)
+ {
+ 	rtnl_link_unregister(&ipgre_tap_ops);
+ 	rtnl_link_unregister(&ipgre_link_ops);
+-	unregister_pernet_device(&ipgre_net_ops);
+ 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
+ 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
++	unregister_pernet_device(&ipgre_net_ops);
+ }
+ 
+ module_init(ipgre_init);
diff --git a/queue-2.6.33/netns-xfrm-fixup-xfrm6_tunnel-error-propagation.patch b/queue-2.6.33/netns-xfrm-fixup-xfrm6_tunnel-error-propagation.patch
new file mode 100644
index 0000000..2aa024b
--- /dev/null
+++ b/queue-2.6.33/netns-xfrm-fixup-xfrm6_tunnel-error-propagation.patch
@@ -0,0 +1,52 @@
+From e924960dacdf85d118a98c7262edf2f99c3015cf Mon Sep 17 00:00:00 2001
+From: Alexey Dobriyan <adobriyan@gmail.com>
+Date: Mon, 25 Jan 2010 10:28:21 +0000
+Subject: netns xfrm: fixup xfrm6_tunnel error propagation
+
+From: Alexey Dobriyan <adobriyan@gmail.com>
+
+commit e924960dacdf85d118a98c7262edf2f99c3015cf upstream.
+
+Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ net/ipv6/xfrm6_tunnel.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/net/ipv6/xfrm6_tunnel.c
++++ b/net/ipv6/xfrm6_tunnel.c
+@@ -353,13 +353,19 @@ static struct xfrm6_tunnel xfrm46_tunnel
+ 
+ static int __init xfrm6_tunnel_init(void)
+ {
+-	if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0)
++	int rv;
++
++	rv = xfrm_register_type(&xfrm6_tunnel_type, AF_INET6);
++	if (rv < 0)
+ 		goto err;
+-	if (xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6))
++	rv = xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6);
++	if (rv < 0)
+ 		goto unreg;
+-	if (xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET))
++	rv = xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET);
++	if (rv < 0)
+ 		goto dereg6;
+-	if (xfrm6_tunnel_spi_init() < 0)
++	rv = xfrm6_tunnel_spi_init();
++	if (rv < 0)
+ 		goto dereg46;
+ 	return 0;
+ 
+@@ -370,7 +376,7 @@ dereg6:
+ unreg:
+ 	xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
+ err:
+-	return -EAGAIN;
++	return rv;
+ }
+ 
+ static void __exit xfrm6_tunnel_fini(void)
diff --git a/queue-2.6.33/powerpc-pseries-hvconsole-fix-dropped-console-output.patch b/queue-2.6.33/powerpc-pseries-hvconsole-fix-dropped-console-output.patch
new file mode 100644
index 0000000..07776a9
--- /dev/null
+++ b/queue-2.6.33/powerpc-pseries-hvconsole-fix-dropped-console-output.patch
@@ -0,0 +1,31 @@
+From 51d33021425e1f905beb4208823146f2fb6517da Mon Sep 17 00:00:00 2001
+From: Anton Blanchard <anton@samba.org>
+Date: Tue, 5 Jul 2011 21:51:36 +0000
+Subject: powerpc/pseries/hvconsole: Fix dropped console output
+
+From: Anton Blanchard <anton@samba.org>
+
+commit 51d33021425e1f905beb4208823146f2fb6517da upstream.
+
+Return -EAGAIN when we get H_BUSY back from the hypervisor. This
+makes the hvc console driver retry, avoiding dropped printks.
+
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/powerpc/platforms/pseries/hvconsole.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/powerpc/platforms/pseries/hvconsole.c
++++ b/arch/powerpc/platforms/pseries/hvconsole.c
+@@ -73,7 +73,7 @@ int hvc_put_chars(uint32_t vtermno, cons
+ 	if (ret == H_SUCCESS)
+ 		return count;
+ 	if (ret == H_BUSY)
+-		return 0;
++		return -EAGAIN;
+ 	return -EIO;
+ }
+ 
diff --git a/queue-2.6.33/proc-restrict-access-to-proc-pid-io.patch b/queue-2.6.33/proc-restrict-access-to-proc-pid-io.patch
new file mode 100644
index 0000000..11c2707
--- /dev/null
+++ b/queue-2.6.33/proc-restrict-access-to-proc-pid-io.patch
@@ -0,0 +1,56 @@
+From 1d1221f375c94ef961ba8574ac4f85c8870ddd51 Mon Sep 17 00:00:00 2001
+From: Vasiliy Kulikov <segoon@openwall.com>
+Date: Fri, 24 Jun 2011 16:08:38 +0400
+Subject: proc: restrict access to /proc/PID/io
+
+From: Vasiliy Kulikov <segoon@openwall.com>
+
+commit 1d1221f375c94ef961ba8574ac4f85c8870ddd51 upstream.
+
+/proc/PID/io may be used for gathering private information.  E.g.  for
+openssh and vsftpd daemons wchars/rchars may be used to learn the
+precise password length.  Restrict it to processes being able to ptrace
+the target process.
+
+ptrace_may_access() is needed to prevent keeping open file descriptor of
+"io" file, executing setuid binary and gathering io information of the
+setuid'ed process.
+
+Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/base.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -2519,6 +2519,9 @@ static int do_io_accounting(struct task_
+ 	struct task_io_accounting acct = task->ioac;
+ 	unsigned long flags;
+ 
++	if (!ptrace_may_access(task, PTRACE_MODE_READ))
++		return -EACCES;
++
+ 	if (whole && lock_task_sighand(task, &flags)) {
+ 		struct task_struct *t = task;
+ 
+@@ -2641,7 +2644,7 @@ static const struct pid_entry tgid_base_
+ 	REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
+ #endif
+ #ifdef CONFIG_TASK_IO_ACCOUNTING
+-	INF("io",	S_IRUGO, proc_tgid_io_accounting),
++	INF("io",	S_IRUSR, proc_tgid_io_accounting),
+ #endif
+ };
+ 
+@@ -2977,7 +2980,7 @@ static const struct pid_entry tid_base_s
+ 	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
+ #endif
+ #ifdef CONFIG_TASK_IO_ACCOUNTING
+-	INF("io",	S_IRUGO, proc_tid_io_accounting),
++	INF("io",	S_IRUSR, proc_tid_io_accounting),
+ #endif
+ };
+ 
diff --git a/queue-2.6.33/revert-block-rescan-partitions-on-invalidated-devices-on-enomedia.patch b/queue-2.6.33/revert-block-rescan-partitions-on-invalidated-devices-on-enomedia.patch
new file mode 100644
index 0000000..1314d8f
--- /dev/null
+++ b/queue-2.6.33/revert-block-rescan-partitions-on-invalidated-devices-on-enomedia.patch
@@ -0,0 +1,84 @@
+From a68753aa46dad44915db20f715d9bfbc5815fb8d Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@suse.de>
+Date: Tue, 2 Aug 2011 14:45:26 -0700
+Subject: Revert "block: rescan partitions on invalidated devices on -ENOMEDIA
+ too"
+
+This reverts commit 6f31747bfeb8c74e6d0a10ecef0abe2a04c5a6cb (commit
+02e352287a40bd456eb78df705bf888bc3161d3f upstream)
+
+This should have only been commited on .38 and newer, not older kernels
+like this one, sorry.
+
+Cc: Tejun Heo <tj@kernel.org>
+Cc: David Zeuthen <zeuthen@gmail.com>
+Cc: Martin Pitt <martin.pitt@ubuntu.com>
+Cc: Kay Sievers <kay.sievers@vrfy.org>
+Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
+Cc: Jens Axboe <jaxboe@fusionio.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/block_dev.c |   27 +++++++++------------------
+ 1 file changed, 9 insertions(+), 18 deletions(-)
+
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -1211,7 +1211,6 @@ static int __blkdev_get(struct block_dev
+ 			if (!bdev->bd_part)
+ 				goto out_clear;
+ 
+-			ret = 0;
+ 			if (disk->fops->open) {
+ 				ret = disk->fops->open(bdev, mode);
+ 				if (ret == -ERESTARTSYS) {
+@@ -1227,18 +1226,9 @@ static int __blkdev_get(struct block_dev
+ 					mutex_unlock(&bdev->bd_mutex);
+ 					goto restart;
+ 				}
++				if (ret)
++					goto out_clear;
+ 			}
+-			/*
+-			 * If the device is invalidated, rescan partition
+-			 * if open succeeded or failed with -ENOMEDIUM.
+-			 * The latter is necessary to prevent ghost
+-			 * partitions on a removed medium.
+-			 */
+-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
+-				rescan_partitions(disk, bdev);
+-			if (ret)
+-				goto out_clear;
+-
+ 			if (!bdev->bd_openers) {
+ 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+ 				bdi = blk_get_backing_dev_info(bdev);
+@@ -1246,6 +1236,8 @@ static int __blkdev_get(struct block_dev
+ 					bdi = &default_backing_dev_info;
+ 				bdev->bd_inode->i_data.backing_dev_info = bdi;
+ 			}
++			if (bdev->bd_invalidated)
++				rescan_partitions(disk, bdev);
+ 		} else {
+ 			struct block_device *whole;
+ 			whole = bdget_disk(disk, 0);
+@@ -1272,14 +1264,13 @@ static int __blkdev_get(struct block_dev
+ 		put_disk(disk);
+ 		disk = NULL;
+ 		if (bdev->bd_contains == bdev) {
+-			ret = 0;
+-			if (bdev->bd_disk->fops->open)
++			if (bdev->bd_disk->fops->open) {
+ 				ret = bdev->bd_disk->fops->open(bdev, mode);
+-			/* the same as first opener case, read comment there */
+-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
++				if (ret)
++					goto out_unlock_bdev;
++			}
++			if (bdev->bd_invalidated)
+ 				rescan_partitions(bdev->bd_disk, bdev);
+-			if (ret)
+-				goto out_unlock_bdev;
+ 		}
+ 	}
+ 	bdev->bd_openers++;
diff --git a/queue-2.6.33/series b/queue-2.6.33/series
index c921a49..756542e 100644
--- a/queue-2.6.33/series
+++ b/queue-2.6.33/series
@@ -36,3 +36,11 @@ ext3-fix-oops-in-ext3_try_to_allocate_with_rsv.patch
 svcrpc-fix-list-corrupting-race-on-nfsd-shutdown.patch
 ehci-only-power-off-port-if-over-current-is-active.patch
 ehci-fix-direction-handling-for-interrupt-data-toggles.patch
+powerpc-pseries-hvconsole-fix-dropped-console-output.patch
+x86-hpet-avoid-the-comparator-readback-penalty.patch
+x86-hpet-chose-a-paranoid-safe-value-for-the-etime-check.patch
+revert-block-rescan-partitions-on-invalidated-devices-on-enomedia.patch
+gre-fix-netns-vs-proto-registration-ordering.patch
+netns-xfrm-fixup-xfrm6_tunnel-error-propagation.patch
+alpha-fix-several-security-issues.patch
+proc-restrict-access-to-proc-pid-io.patch
diff --git a/queue-2.6.33/x86-hpet-avoid-the-comparator-readback-penalty.patch b/queue-2.6.33/x86-hpet-avoid-the-comparator-readback-penalty.patch
new file mode 100644
index 0000000..743bc91
--- /dev/null
+++ b/queue-2.6.33/x86-hpet-avoid-the-comparator-readback-penalty.patch
@@ -0,0 +1,119 @@
+From khlebnikov@openvz.org  Tue Aug  2 14:43:41 2011
+From: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Date: Fri, 29 Jul 2011 14:07:55 +0400
+Subject: x86: Hpet: Avoid the comparator readback penalty
+To: <stable@kernel.org>
+Message-ID: <20110729100755.15728.65360.stgit@localhost6>
+
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+(imported from commit v2.6.36-rc4-167-g995bd3b)
+
+Due to the overly intelligent design of HPETs, we need to workaround
+the problem that the compare value which we write is already behind
+the actual counter value at the point where the value hits the real
+compare register. This happens for two reasons:
+
+1) We read out the counter, add the delta and write the result to the
+   compare register. When a NMI or SMI hits between the read out and
+   the write then the counter can be ahead of the event already
+
+2) The write to the compare register is delayed by up to two HPET
+   cycles in certain chipsets.
+
+We worked around this by reading back the compare register to make
+sure that the written value has hit the hardware. For certain ICH9+
+chipsets this can require two readouts, as the first one can return
+the previous compare register value. That's bad performance wise for
+the normal case where the event is far enough in the future.
+
+As we already know that the write can be delayed by up to two cycles
+we can avoid the read back of the compare register completely if we
+make the decision whether the delta has elapsed already or not based
+on the following calculation:
+
+  cmp = event - actual_count;
+
+If cmp is less than 8 HPET clock cycles, then we decide that the event
+has happened already and return -ETIME. That covers the above #1 and
+#2 problems which would cause a wait for HPET wraparound (~306
+seconds).
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Nix <nix@esperi.org.uk>
+Tested-by: Artur Skawina <art.08.09@gmail.com>
+Cc: Damien Wyart <damien.wyart@free.fr>
+Tested-by: John Drescher <drescherjm@gmail.com>
+Cc: Venkatesh Pallipadi <venki@google.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
+Tested-by: Borislav Petkov <borislav.petkov@amd.com>
+Cc: Suresh Siddha <suresh.b.siddha@intel.com>
+LKML-Reference: <alpine.LFD.2.00.1009151500060.2416@localhost6.localdomain6>
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kernel/hpet.c |   43 +++++++++++++++++++++----------------------
+ 1 file changed, 21 insertions(+), 22 deletions(-)
+
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -380,36 +380,35 @@ static int hpet_next_event(unsigned long
+ 			   struct clock_event_device *evt, int timer)
+ {
+ 	u32 cnt;
++	s32 res;
+ 
+ 	cnt = hpet_readl(HPET_COUNTER);
+ 	cnt += (u32) delta;
+ 	hpet_writel(cnt, HPET_Tn_CMP(timer));
+ 
+ 	/*
+-	 * We need to read back the CMP register on certain HPET
+-	 * implementations (ATI chipsets) which seem to delay the
+-	 * transfer of the compare register into the internal compare
+-	 * logic. With small deltas this might actually be too late as
+-	 * the counter could already be higher than the compare value
+-	 * at that point and we would wait for the next hpet interrupt
+-	 * forever. We found out that reading the CMP register back
+-	 * forces the transfer so we can rely on the comparison with
+-	 * the counter register below. If the read back from the
+-	 * compare register does not match the value we programmed
+-	 * then we might have a real hardware problem. We can not do
+-	 * much about it here, but at least alert the user/admin with
+-	 * a prominent warning.
+-	 * An erratum on some chipsets (ICH9,..), results in comparator read
+-	 * immediately following a write returning old value. Workaround
+-	 * for this is to read this value second time, when first
+-	 * read returns old value.
++	 * HPETs are a complete disaster. The compare register is
++	 * based on a equal comparison and neither provides a less
++	 * than or equal functionality (which would require to take
++	 * the wraparound into account) nor a simple count down event
++	 * mode. Further the write to the comparator register is
++	 * delayed internally up to two HPET clock cycles in certain
++	 * chipsets (ATI, ICH9,10). We worked around that by reading
++	 * back the compare register, but that required another
++	 * workaround for ICH9,10 chips where the first readout after
++	 * write can return the old stale value. We already have a
++	 * minimum delta of 5us enforced, but a NMI or SMI hitting
++	 * between the counter readout and the comparator write can
++	 * move us behind that point easily. Now instead of reading
++	 * the compare register back several times, we make the ETIME
++	 * decision based on the following: Return ETIME if the
++	 * counter value after the write is less than 8 HPET cycles
++	 * away from the event or if the counter is already ahead of
++	 * the event.
+ 	 */
+-	if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
+-		WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
+-		  KERN_WARNING "hpet: compare register read back failed.\n");
+-	}
++	res = (s32)(cnt - hpet_readl(HPET_COUNTER));
+ 
+-	return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
++	return res < 8 ? -ETIME : 0;
+ }
+ 
+ static void hpet_legacy_set_mode(enum clock_event_mode mode,
diff --git a/queue-2.6.33/x86-hpet-chose-a-paranoid-safe-value-for-the-etime-check.patch b/queue-2.6.33/x86-hpet-chose-a-paranoid-safe-value-for-the-etime-check.patch
new file mode 100644
index 0000000..192c754
--- /dev/null
+++ b/queue-2.6.33/x86-hpet-chose-a-paranoid-safe-value-for-the-etime-check.patch
@@ -0,0 +1,102 @@
+From khlebnikov@openvz.org  Tue Aug  2 14:43:53 2011
+From: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Date: Fri, 29 Jul 2011 14:07:56 +0400
+Subject: x86: HPET: Chose a paranoid safe value for the ETIME check
+To: <stable@kernel.org>
+Message-ID: <20110729100756.15728.90496.stgit@localhost6>
+
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+(imported from commit v2.6.37-rc5-64-gf1c1807)
+
+commit 995bd3bb5 (x86: Hpet: Avoid the comparator readback penalty)
+chose 8 HPET cycles as a safe value for the ETIME check, as we had the
+confirmation that the posted write to the comparator register is
+delayed by two HPET clock cycles on Intel chipsets which showed
+readback problems.
+
+After that patch hit mainline we got reports from machines with newer
+AMD chipsets which seem to have an even longer delay. See
+http://thread.gmane.org/gmane.linux.kernel/1054283 and
+http://thread.gmane.org/gmane.linux.kernel/1069458 for further
+information.
+
+Boris tried to come up with an ACPI based selection of the minimum
+HPET cycles, but this failed on a couple of test machines. And of
+course we did not get any useful information from the hardware folks.
+
+For now our only option is to chose a paranoid high and safe value for
+the minimum HPET cycles used by the ETIME check. Adjust the minimum ns
+value for the HPET clockevent accordingly.
+
+Reported-Bistected-and-Tested-by: Markus Trippelsdorf <markus@trippelsdorf.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+LKML-Reference: <alpine.LFD.2.00.1012131222420.2653@localhost6.localdomain6>
+Cc: Simon Kirby <sim@hostway.ca>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Andreas Herrmann <Andreas.Herrmann3@amd.com>
+Cc: John Stultz <johnstul@us.ibm.com>
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ arch/x86/kernel/hpet.c |   26 ++++++++++++++++----------
+ 1 file changed, 16 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/hpet.c
++++ b/arch/x86/kernel/hpet.c
+@@ -27,6 +27,9 @@
+ #define HPET_DEV_FSB_CAP		0x1000
+ #define HPET_DEV_PERI_CAP		0x2000
+ 
++#define HPET_MIN_CYCLES			128
++#define HPET_MIN_PROG_DELTA		(HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
++
+ #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
+ 
+ /*
+@@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_regis
+ 	/* Calculate the min / max delta */
+ 	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
+ 							   &hpet_clockevent);
+-	/* 5 usec minimum reprogramming delta. */
+-	hpet_clockevent.min_delta_ns = 5000;
++	/* Setup minimum reprogramming delta. */
++	hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
++							   &hpet_clockevent);
+ 
+ 	/*
+ 	 * Start hpet with the boot cpu mask and make it
+@@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long
+ 	 * the wraparound into account) nor a simple count down event
+ 	 * mode. Further the write to the comparator register is
+ 	 * delayed internally up to two HPET clock cycles in certain
+-	 * chipsets (ATI, ICH9,10). We worked around that by reading
+-	 * back the compare register, but that required another
+-	 * workaround for ICH9,10 chips where the first readout after
+-	 * write can return the old stale value. We already have a
+-	 * minimum delta of 5us enforced, but a NMI or SMI hitting
++	 * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even
++	 * longer delays. We worked around that by reading back the
++	 * compare register, but that required another workaround for
++	 * ICH9,10 chips where the first readout after write can
++	 * return the old stale value. We already had a minimum
++	 * programming delta of 5us enforced, but a NMI or SMI hitting
+ 	 * between the counter readout and the comparator write can
+ 	 * move us behind that point easily. Now instead of reading
+ 	 * the compare register back several times, we make the ETIME
+ 	 * decision based on the following: Return ETIME if the
+-	 * counter value after the write is less than 8 HPET cycles
++	 * counter value after the write is less than HPET_MIN_CYCLES
+ 	 * away from the event or if the counter is already ahead of
+-	 * the event.
++	 * the event. The minimum programming delta for the generic
++	 * clockevents code is set to 1.5 * HPET_MIN_CYCLES.
+ 	 */
+ 	res = (s32)(cnt - hpet_readl(HPET_COUNTER));
+ 
+-	return res < 8 ? -ETIME : 0;
++	return res < HPET_MIN_CYCLES ? -ETIME : 0;
+ }
+ 
+ static void hpet_legacy_set_mode(enum clock_event_mode mode,
author	Greg Kroah-Hartman <gregkh@suse.de>	2011-08-02 15:10:53 -0700
committer	Greg Kroah-Hartman <gregkh@suse.de>	2011-08-02 15:10:53 -0700
commit	469cf509109da86fa90400564a35254227434747 (patch)
tree	63e2ea23a483bee958e6c056636d15b7e3b50206
parent	98a01757aac53bbaa7c5fb1ed44f266314c6b5a8 (diff)
download	longterm-queue-2.6.33-469cf509109da86fa90400564a35254227434747.tar.gz