diff options
author | Greg Kroah-Hartman <gregkh@suse.de> | 2011-08-02 15:10:53 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-08-02 15:10:53 -0700 |
commit | 469cf509109da86fa90400564a35254227434747 (patch) | |
tree | 63e2ea23a483bee958e6c056636d15b7e3b50206 | |
parent | 98a01757aac53bbaa7c5fb1ed44f266314c6b5a8 (diff) | |
download | longterm-queue-2.6.33-469cf509109da86fa90400564a35254227434747.tar.gz |
.33 patches
9 files changed, 614 insertions, 0 deletions
diff --git a/queue-2.6.33/alpha-fix-several-security-issues.patch b/queue-2.6.33/alpha-fix-several-security-issues.patch new file mode 100644 index 0000000..ad1c281 --- /dev/null +++ b/queue-2.6.33/alpha-fix-several-security-issues.patch @@ -0,0 +1,91 @@ +From 21c5977a836e399fc710ff2c5367845ed5c2527f Mon Sep 17 00:00:00 2001 +From: Dan Rosenberg <drosenberg@vsecurity.com> +Date: Wed, 15 Jun 2011 15:09:01 -0700 +Subject: alpha: fix several security issues + +From: Dan Rosenberg <drosenberg@vsecurity.com> + +commit 21c5977a836e399fc710ff2c5367845ed5c2527f upstream. + +Fix several security issues in Alpha-specific syscalls. Untested, but +mostly trivial. + +1. Signedness issue in osf_getdomainname allows copying out-of-bounds +kernel memory to userland. + +2. Signedness issue in osf_sysinfo allows copying large amounts of +kernel memory to userland. + +3. Typo (?) in osf_getsysinfo bounds minimum instead of maximum copy +size, allowing copying large amounts of kernel memory to userland. + +4. Usage of user pointer in osf_wait4 while under KERNEL_DS allows +privilege escalation via writing return value of sys_wait4 to kernel +memory. + +Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com> +Cc: Richard Henderson <rth@twiddle.net> +Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> +Cc: Matt Turner <mattst88@gmail.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/alpha/kernel/osf_sys.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/arch/alpha/kernel/osf_sys.c ++++ b/arch/alpha/kernel/osf_sys.c +@@ -431,7 +431,7 @@ SYSCALL_DEFINE2(osf_getdomainname, char + return -EFAULT; + + len = namelen; +- if (namelen > 32) ++ if (len > 32) + len = 32; + + down_read(&uts_sem); +@@ -618,7 +618,7 @@ SYSCALL_DEFINE3(osf_sysinfo, int, comman + down_read(&uts_sem); + res = sysinfo_table[offset]; + len = strlen(res)+1; +- if (len > count) ++ if ((unsigned long)len > (unsigned long)count) + len = count; + if (copy_to_user(buf, res, len)) + err = -EFAULT; +@@ -673,7 +673,7 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned + return 1; + + case GSI_GET_HWRPB: +- if (nbytes < sizeof(*hwrpb)) ++ if (nbytes > sizeof(*hwrpb)) + return -EINVAL; + if (copy_to_user(buffer, hwrpb, nbytes) != 0) + return -EFAULT; +@@ -1035,6 +1035,7 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, i + { + struct rusage r; + long ret, err; ++ unsigned int status = 0; + mm_segment_t old_fs; + + if (!ur) +@@ -1043,13 +1044,15 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, i + old_fs = get_fs(); + + set_fs (KERNEL_DS); +- ret = sys_wait4(pid, ustatus, options, (struct rusage __user *) &r); ++ ret = sys_wait4(pid, (unsigned int __user *) &status, options, ++ (struct rusage __user *) &r); + set_fs (old_fs); + + if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur))) + return -EFAULT; + + err = 0; ++ err |= put_user(status, ustatus); + err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec); + err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec); + err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec); diff --git a/queue-2.6.33/gre-fix-netns-vs-proto-registration-ordering.patch b/queue-2.6.33/gre-fix-netns-vs-proto-registration-ordering.patch new file mode 100644 index 0000000..2832f6f --- /dev/null +++ b/queue-2.6.33/gre-fix-netns-vs-proto-registration-ordering.patch @@ -0,0 +1,71 @@ +From c2892f02712e9516d72841d5c019ed6916329794 Mon Sep 17 00:00:00 2001 +From: Alexey Dobriyan <adobriyan@gmail.com> +Date: Tue, 16 Feb 2010 07:57:44 +0000 +Subject: gre: fix netns vs proto registration ordering + +From: Alexey Dobriyan <adobriyan@gmail.com> + +commit c2892f02712e9516d72841d5c019ed6916329794 upstream. + +GRE protocol receive hook can be called right after protocol addition is done. +If netns stuff is not yet initialized, we're going to oops in +net_generic(). + +This is remotely oopsable if ip_gre is compiled as module and packet +comes at unfortunate moment of module loading. + +Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + net/ipv4/ip_gre.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -1667,14 +1667,15 @@ static int __init ipgre_init(void) + + printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); + +- if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { +- printk(KERN_INFO "ipgre init: can't add protocol\n"); +- return -EAGAIN; +- } +- + err = register_pernet_device(&ipgre_net_ops); + if (err < 0) +- goto gen_device_failed; ++ return err; ++ ++ err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); ++ if (err < 0) { ++ printk(KERN_INFO "ipgre init: can't add protocol\n"); ++ goto add_proto_failed; ++ } + + err = rtnl_link_register(&ipgre_link_ops); + if (err < 0) +@@ -1690,9 +1691,9 @@ out: + tap_ops_failed: + rtnl_link_unregister(&ipgre_link_ops); + rtnl_link_failed: +- unregister_pernet_device(&ipgre_net_ops); +-gen_device_failed: + inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); ++add_proto_failed: ++ unregister_pernet_device(&ipgre_net_ops); + goto out; + } + +@@ -1700,9 +1701,9 @@ static void __exit ipgre_fini(void) + { + rtnl_link_unregister(&ipgre_tap_ops); + rtnl_link_unregister(&ipgre_link_ops); +- unregister_pernet_device(&ipgre_net_ops); + if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) + printk(KERN_INFO "ipgre close: can't remove protocol\n"); ++ unregister_pernet_device(&ipgre_net_ops); + } + + module_init(ipgre_init); diff --git a/queue-2.6.33/netns-xfrm-fixup-xfrm6_tunnel-error-propagation.patch b/queue-2.6.33/netns-xfrm-fixup-xfrm6_tunnel-error-propagation.patch new file mode 100644 index 0000000..2aa024b --- /dev/null +++ b/queue-2.6.33/netns-xfrm-fixup-xfrm6_tunnel-error-propagation.patch @@ -0,0 +1,52 @@ +From e924960dacdf85d118a98c7262edf2f99c3015cf Mon Sep 17 00:00:00 2001 +From: Alexey Dobriyan <adobriyan@gmail.com> +Date: Mon, 25 Jan 2010 10:28:21 +0000 +Subject: netns xfrm: fixup xfrm6_tunnel error propagation + +From: Alexey Dobriyan <adobriyan@gmail.com> + +commit e924960dacdf85d118a98c7262edf2f99c3015cf upstream. + +Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + net/ipv6/xfrm6_tunnel.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/net/ipv6/xfrm6_tunnel.c ++++ b/net/ipv6/xfrm6_tunnel.c +@@ -353,13 +353,19 @@ static struct xfrm6_tunnel xfrm46_tunnel + + static int __init xfrm6_tunnel_init(void) + { +- if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) ++ int rv; ++ ++ rv = xfrm_register_type(&xfrm6_tunnel_type, AF_INET6); ++ if (rv < 0) + goto err; +- if (xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6)) ++ rv = xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6); ++ if (rv < 0) + goto unreg; +- if (xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET)) ++ rv = xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET); ++ if (rv < 0) + goto dereg6; +- if (xfrm6_tunnel_spi_init() < 0) ++ rv = xfrm6_tunnel_spi_init(); ++ if (rv < 0) + goto dereg46; + return 0; + +@@ -370,7 +376,7 @@ dereg6: + unreg: + xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); + err: +- return -EAGAIN; ++ return rv; + } + + static void __exit xfrm6_tunnel_fini(void) diff --git a/queue-2.6.33/powerpc-pseries-hvconsole-fix-dropped-console-output.patch b/queue-2.6.33/powerpc-pseries-hvconsole-fix-dropped-console-output.patch new file mode 100644 index 0000000..07776a9 --- /dev/null +++ b/queue-2.6.33/powerpc-pseries-hvconsole-fix-dropped-console-output.patch @@ -0,0 +1,31 @@ +From 51d33021425e1f905beb4208823146f2fb6517da Mon Sep 17 00:00:00 2001 +From: Anton Blanchard <anton@samba.org> +Date: Tue, 5 Jul 2011 21:51:36 +0000 +Subject: powerpc/pseries/hvconsole: Fix dropped console output + +From: Anton Blanchard <anton@samba.org> + +commit 51d33021425e1f905beb4208823146f2fb6517da upstream. + +Return -EAGAIN when we get H_BUSY back from the hypervisor. This +makes the hvc console driver retry, avoiding dropped printks. + +Signed-off-by: Anton Blanchard <anton@samba.org> +Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + arch/powerpc/platforms/pseries/hvconsole.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/powerpc/platforms/pseries/hvconsole.c ++++ b/arch/powerpc/platforms/pseries/hvconsole.c +@@ -73,7 +73,7 @@ int hvc_put_chars(uint32_t vtermno, cons + if (ret == H_SUCCESS) + return count; + if (ret == H_BUSY) +- return 0; ++ return -EAGAIN; + return -EIO; + } + diff --git a/queue-2.6.33/proc-restrict-access-to-proc-pid-io.patch b/queue-2.6.33/proc-restrict-access-to-proc-pid-io.patch new file mode 100644 index 0000000..11c2707 --- /dev/null +++ b/queue-2.6.33/proc-restrict-access-to-proc-pid-io.patch @@ -0,0 +1,56 @@ +From 1d1221f375c94ef961ba8574ac4f85c8870ddd51 Mon Sep 17 00:00:00 2001 +From: Vasiliy Kulikov <segoon@openwall.com> +Date: Fri, 24 Jun 2011 16:08:38 +0400 +Subject: proc: restrict access to /proc/PID/io + +From: Vasiliy Kulikov <segoon@openwall.com> + +commit 1d1221f375c94ef961ba8574ac4f85c8870ddd51 upstream. + +/proc/PID/io may be used for gathering private information. E.g. for +openssh and vsftpd daemons wchars/rchars may be used to learn the +precise password length. Restrict it to processes being able to ptrace +the target process. + +ptrace_may_access() is needed to prevent keeping open file descriptor of +"io" file, executing setuid binary and gathering io information of the +setuid'ed process. + +Signed-off-by: Vasiliy Kulikov <segoon@openwall.com> +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + fs/proc/base.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -2519,6 +2519,9 @@ static int do_io_accounting(struct task_ + struct task_io_accounting acct = task->ioac; + unsigned long flags; + ++ if (!ptrace_may_access(task, PTRACE_MODE_READ)) ++ return -EACCES; ++ + if (whole && lock_task_sighand(task, &flags)) { + struct task_struct *t = task; + +@@ -2641,7 +2644,7 @@ static const struct pid_entry tgid_base_ + REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), + #endif + #ifdef CONFIG_TASK_IO_ACCOUNTING +- INF("io", S_IRUGO, proc_tgid_io_accounting), ++ INF("io", S_IRUSR, proc_tgid_io_accounting), + #endif + }; + +@@ -2977,7 +2980,7 @@ static const struct pid_entry tid_base_s + REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), + #endif + #ifdef CONFIG_TASK_IO_ACCOUNTING +- INF("io", S_IRUGO, proc_tid_io_accounting), ++ INF("io", S_IRUSR, proc_tid_io_accounting), + #endif + }; + diff --git a/queue-2.6.33/revert-block-rescan-partitions-on-invalidated-devices-on-enomedia.patch b/queue-2.6.33/revert-block-rescan-partitions-on-invalidated-devices-on-enomedia.patch new file mode 100644 index 0000000..1314d8f --- /dev/null +++ b/queue-2.6.33/revert-block-rescan-partitions-on-invalidated-devices-on-enomedia.patch @@ -0,0 +1,84 @@ +From a68753aa46dad44915db20f715d9bfbc5815fb8d Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman <gregkh@suse.de> +Date: Tue, 2 Aug 2011 14:45:26 -0700 +Subject: Revert "block: rescan partitions on invalidated devices on -ENOMEDIA + too" + +This reverts commit 6f31747bfeb8c74e6d0a10ecef0abe2a04c5a6cb (commit +02e352287a40bd456eb78df705bf888bc3161d3f upstream) + +This should have only been commited on .38 and newer, not older kernels +like this one, sorry. + +Cc: Tejun Heo <tj@kernel.org> +Cc: David Zeuthen <zeuthen@gmail.com> +Cc: Martin Pitt <martin.pitt@ubuntu.com> +Cc: Kay Sievers <kay.sievers@vrfy.org> +Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> +Cc: Jens Axboe <jaxboe@fusionio.com> +Cc: Andi Kleen <ak@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + fs/block_dev.c | 27 +++++++++------------------ + 1 file changed, 9 insertions(+), 18 deletions(-) + +--- a/fs/block_dev.c ++++ b/fs/block_dev.c +@@ -1211,7 +1211,6 @@ static int __blkdev_get(struct block_dev + if (!bdev->bd_part) + goto out_clear; + +- ret = 0; + if (disk->fops->open) { + ret = disk->fops->open(bdev, mode); + if (ret == -ERESTARTSYS) { +@@ -1227,18 +1226,9 @@ static int __blkdev_get(struct block_dev + mutex_unlock(&bdev->bd_mutex); + goto restart; + } ++ if (ret) ++ goto out_clear; + } +- /* +- * If the device is invalidated, rescan partition +- * if open succeeded or failed with -ENOMEDIUM. +- * The latter is necessary to prevent ghost +- * partitions on a removed medium. +- */ +- if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) +- rescan_partitions(disk, bdev); +- if (ret) +- goto out_clear; +- + if (!bdev->bd_openers) { + bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + bdi = blk_get_backing_dev_info(bdev); +@@ -1246,6 +1236,8 @@ static int __blkdev_get(struct block_dev + bdi = &default_backing_dev_info; + bdev->bd_inode->i_data.backing_dev_info = bdi; + } ++ if (bdev->bd_invalidated) ++ rescan_partitions(disk, bdev); + } else { + struct block_device *whole; + whole = bdget_disk(disk, 0); +@@ -1272,14 +1264,13 @@ static int __blkdev_get(struct block_dev + put_disk(disk); + disk = NULL; + if (bdev->bd_contains == bdev) { +- ret = 0; +- if (bdev->bd_disk->fops->open) ++ if (bdev->bd_disk->fops->open) { + ret = bdev->bd_disk->fops->open(bdev, mode); +- /* the same as first opener case, read comment there */ +- if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) ++ if (ret) ++ goto out_unlock_bdev; ++ } ++ if (bdev->bd_invalidated) + rescan_partitions(bdev->bd_disk, bdev); +- if (ret) +- goto out_unlock_bdev; + } + } + bdev->bd_openers++; diff --git a/queue-2.6.33/series b/queue-2.6.33/series index c921a49..756542e 100644 --- a/queue-2.6.33/series +++ b/queue-2.6.33/series @@ -36,3 +36,11 @@ ext3-fix-oops-in-ext3_try_to_allocate_with_rsv.patch svcrpc-fix-list-corrupting-race-on-nfsd-shutdown.patch ehci-only-power-off-port-if-over-current-is-active.patch ehci-fix-direction-handling-for-interrupt-data-toggles.patch +powerpc-pseries-hvconsole-fix-dropped-console-output.patch +x86-hpet-avoid-the-comparator-readback-penalty.patch +x86-hpet-chose-a-paranoid-safe-value-for-the-etime-check.patch +revert-block-rescan-partitions-on-invalidated-devices-on-enomedia.patch +gre-fix-netns-vs-proto-registration-ordering.patch +netns-xfrm-fixup-xfrm6_tunnel-error-propagation.patch +alpha-fix-several-security-issues.patch +proc-restrict-access-to-proc-pid-io.patch diff --git a/queue-2.6.33/x86-hpet-avoid-the-comparator-readback-penalty.patch b/queue-2.6.33/x86-hpet-avoid-the-comparator-readback-penalty.patch new file mode 100644 index 0000000..743bc91 --- /dev/null +++ b/queue-2.6.33/x86-hpet-avoid-the-comparator-readback-penalty.patch @@ -0,0 +1,119 @@ +From khlebnikov@openvz.org Tue Aug 2 14:43:41 2011 +From: Konstantin Khlebnikov <khlebnikov@openvz.org> +Date: Fri, 29 Jul 2011 14:07:55 +0400 +Subject: x86: Hpet: Avoid the comparator readback penalty +To: <stable@kernel.org> +Message-ID: <20110729100755.15728.65360.stgit@localhost6> + + +From: Thomas Gleixner <tglx@linutronix.de> + +(imported from commit v2.6.36-rc4-167-g995bd3b) + +Due to the overly intelligent design of HPETs, we need to workaround +the problem that the compare value which we write is already behind +the actual counter value at the point where the value hits the real +compare register. This happens for two reasons: + +1) We read out the counter, add the delta and write the result to the + compare register. When a NMI or SMI hits between the read out and + the write then the counter can be ahead of the event already + +2) The write to the compare register is delayed by up to two HPET + cycles in certain chipsets. + +We worked around this by reading back the compare register to make +sure that the written value has hit the hardware. For certain ICH9+ +chipsets this can require two readouts, as the first one can return +the previous compare register value. That's bad performance wise for +the normal case where the event is far enough in the future. + +As we already know that the write can be delayed by up to two cycles +we can avoid the read back of the compare register completely if we +make the decision whether the delta has elapsed already or not based +on the following calculation: + + cmp = event - actual_count; + +If cmp is less than 8 HPET clock cycles, then we decide that the event +has happened already and return -ETIME. That covers the above #1 and +#2 problems which would cause a wait for HPET wraparound (~306 +seconds). + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Tested-by: Nix <nix@esperi.org.uk> +Tested-by: Artur Skawina <art.08.09@gmail.com> +Cc: Damien Wyart <damien.wyart@free.fr> +Tested-by: John Drescher <drescherjm@gmail.com> +Cc: Venkatesh Pallipadi <venki@google.com> +Cc: Arjan van de Ven <arjan@linux.intel.com> +Cc: Andreas Herrmann <andreas.herrmann3@amd.com> +Tested-by: Borislav Petkov <borislav.petkov@amd.com> +Cc: Suresh Siddha <suresh.b.siddha@intel.com> +LKML-Reference: <alpine.LFD.2.00.1009151500060.2416@localhost6.localdomain6> +Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/x86/kernel/hpet.c | 43 +++++++++++++++++++++---------------------- + 1 file changed, 21 insertions(+), 22 deletions(-) + +--- a/arch/x86/kernel/hpet.c ++++ b/arch/x86/kernel/hpet.c +@@ -380,36 +380,35 @@ static int hpet_next_event(unsigned long + struct clock_event_device *evt, int timer) + { + u32 cnt; ++ s32 res; + + cnt = hpet_readl(HPET_COUNTER); + cnt += (u32) delta; + hpet_writel(cnt, HPET_Tn_CMP(timer)); + + /* +- * We need to read back the CMP register on certain HPET +- * implementations (ATI chipsets) which seem to delay the +- * transfer of the compare register into the internal compare +- * logic. With small deltas this might actually be too late as +- * the counter could already be higher than the compare value +- * at that point and we would wait for the next hpet interrupt +- * forever. We found out that reading the CMP register back +- * forces the transfer so we can rely on the comparison with +- * the counter register below. If the read back from the +- * compare register does not match the value we programmed +- * then we might have a real hardware problem. We can not do +- * much about it here, but at least alert the user/admin with +- * a prominent warning. +- * An erratum on some chipsets (ICH9,..), results in comparator read +- * immediately following a write returning old value. Workaround +- * for this is to read this value second time, when first +- * read returns old value. ++ * HPETs are a complete disaster. The compare register is ++ * based on a equal comparison and neither provides a less ++ * than or equal functionality (which would require to take ++ * the wraparound into account) nor a simple count down event ++ * mode. Further the write to the comparator register is ++ * delayed internally up to two HPET clock cycles in certain ++ * chipsets (ATI, ICH9,10). We worked around that by reading ++ * back the compare register, but that required another ++ * workaround for ICH9,10 chips where the first readout after ++ * write can return the old stale value. We already have a ++ * minimum delta of 5us enforced, but a NMI or SMI hitting ++ * between the counter readout and the comparator write can ++ * move us behind that point easily. Now instead of reading ++ * the compare register back several times, we make the ETIME ++ * decision based on the following: Return ETIME if the ++ * counter value after the write is less than 8 HPET cycles ++ * away from the event or if the counter is already ahead of ++ * the event. + */ +- if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { +- WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, +- KERN_WARNING "hpet: compare register read back failed.\n"); +- } ++ res = (s32)(cnt - hpet_readl(HPET_COUNTER)); + +- return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; ++ return res < 8 ? -ETIME : 0; + } + + static void hpet_legacy_set_mode(enum clock_event_mode mode, diff --git a/queue-2.6.33/x86-hpet-chose-a-paranoid-safe-value-for-the-etime-check.patch b/queue-2.6.33/x86-hpet-chose-a-paranoid-safe-value-for-the-etime-check.patch new file mode 100644 index 0000000..192c754 --- /dev/null +++ b/queue-2.6.33/x86-hpet-chose-a-paranoid-safe-value-for-the-etime-check.patch @@ -0,0 +1,102 @@ +From khlebnikov@openvz.org Tue Aug 2 14:43:53 2011 +From: Konstantin Khlebnikov <khlebnikov@openvz.org> +Date: Fri, 29 Jul 2011 14:07:56 +0400 +Subject: x86: HPET: Chose a paranoid safe value for the ETIME check +To: <stable@kernel.org> +Message-ID: <20110729100756.15728.90496.stgit@localhost6> + + +From: Thomas Gleixner <tglx@linutronix.de> + +(imported from commit v2.6.37-rc5-64-gf1c1807) + +commit 995bd3bb5 (x86: Hpet: Avoid the comparator readback penalty) +chose 8 HPET cycles as a safe value for the ETIME check, as we had the +confirmation that the posted write to the comparator register is +delayed by two HPET clock cycles on Intel chipsets which showed +readback problems. + +After that patch hit mainline we got reports from machines with newer +AMD chipsets which seem to have an even longer delay. See +http://thread.gmane.org/gmane.linux.kernel/1054283 and +http://thread.gmane.org/gmane.linux.kernel/1069458 for further +information. + +Boris tried to come up with an ACPI based selection of the minimum +HPET cycles, but this failed on a couple of test machines. And of +course we did not get any useful information from the hardware folks. + +For now our only option is to chose a paranoid high and safe value for +the minimum HPET cycles used by the ETIME check. Adjust the minimum ns +value for the HPET clockevent accordingly. + +Reported-Bistected-and-Tested-by: Markus Trippelsdorf <markus@trippelsdorf.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +LKML-Reference: <alpine.LFD.2.00.1012131222420.2653@localhost6.localdomain6> +Cc: Simon Kirby <sim@hostway.ca> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Andreas Herrmann <Andreas.Herrmann3@amd.com> +Cc: John Stultz <johnstul@us.ibm.com> +Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + arch/x86/kernel/hpet.c | 26 ++++++++++++++++---------- + 1 file changed, 16 insertions(+), 10 deletions(-) + +--- a/arch/x86/kernel/hpet.c ++++ b/arch/x86/kernel/hpet.c +@@ -27,6 +27,9 @@ + #define HPET_DEV_FSB_CAP 0x1000 + #define HPET_DEV_PERI_CAP 0x2000 + ++#define HPET_MIN_CYCLES 128 ++#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) ++ + #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) + + /* +@@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_regis + /* Calculate the min / max delta */ + hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, + &hpet_clockevent); +- /* 5 usec minimum reprogramming delta. */ +- hpet_clockevent.min_delta_ns = 5000; ++ /* Setup minimum reprogramming delta. */ ++ hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, ++ &hpet_clockevent); + + /* + * Start hpet with the boot cpu mask and make it +@@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long + * the wraparound into account) nor a simple count down event + * mode. Further the write to the comparator register is + * delayed internally up to two HPET clock cycles in certain +- * chipsets (ATI, ICH9,10). We worked around that by reading +- * back the compare register, but that required another +- * workaround for ICH9,10 chips where the first readout after +- * write can return the old stale value. We already have a +- * minimum delta of 5us enforced, but a NMI or SMI hitting ++ * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even ++ * longer delays. We worked around that by reading back the ++ * compare register, but that required another workaround for ++ * ICH9,10 chips where the first readout after write can ++ * return the old stale value. We already had a minimum ++ * programming delta of 5us enforced, but a NMI or SMI hitting + * between the counter readout and the comparator write can + * move us behind that point easily. Now instead of reading + * the compare register back several times, we make the ETIME + * decision based on the following: Return ETIME if the +- * counter value after the write is less than 8 HPET cycles ++ * counter value after the write is less than HPET_MIN_CYCLES + * away from the event or if the counter is already ahead of +- * the event. ++ * the event. The minimum programming delta for the generic ++ * clockevents code is set to 1.5 * HPET_MIN_CYCLES. + */ + res = (s32)(cnt - hpet_readl(HPET_COUNTER)); + +- return res < 8 ? -ETIME : 0; ++ return res < HPET_MIN_CYCLES ? -ETIME : 0; + } + + static void hpet_legacy_set_mode(enum clock_event_mode mode, |