diff options
author | Paul Gortmaker <paul.gortmaker@windriver.com> | 2013-01-08 15:05:29 -0500 |
---|---|---|
committer | Paul Gortmaker <paul.gortmaker@windriver.com> | 2013-01-08 15:12:33 -0500 |
commit | c5799d88da74ac0b2f496562e8301831531174e3 (patch) | |
tree | a9da78d065b5b6651d7da639901236b0b09f4b99 | |
parent | 9a83f0046d1c7affd6d80688568cb47de1b5dd81 (diff) | |
download | longterm-queue-2.6.34-c5799d88da74ac0b2f496562e8301831531174e3.tar.gz |
misc patches (perf, sched, net, kvm, crypto)
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
9 files changed, 887 insertions, 0 deletions
diff --git a/queue/KVM-x86-Prevent-starting-PIT-timers-in-the-absence-o.patch b/queue/KVM-x86-Prevent-starting-PIT-timers-in-the-absence-o.patch new file mode 100644 index 0000000..18ad4ba --- /dev/null +++ b/queue/KVM-x86-Prevent-starting-PIT-timers-in-the-absence-o.patch @@ -0,0 +1,72 @@ +From 607e095e4caffaa5dc5519307a6fc3b8dc61fd46 Mon Sep 17 00:00:00 2001 +From: Jan Kiszka <jan.kiszka@siemens.com> +Date: Wed, 14 Dec 2011 19:25:13 +0100 +Subject: [PATCH] KVM: x86: Prevent starting PIT timers in the absence of + irqchip support + +commit 0924ab2cfa98b1ece26c033d696651fd62896c69 upstream. + +User space may create the PIT and forgets about setting up the irqchips. +In that case, firing PIT IRQs will crash the host: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000128 +IP: [<ffffffffa10f6280>] kvm_set_irq+0x30/0x170 [kvm] +... +Call Trace: + [<ffffffffa11228c1>] pit_do_work+0x51/0xd0 [kvm] + [<ffffffff81071431>] process_one_work+0x111/0x4d0 + [<ffffffff81071bb2>] worker_thread+0x152/0x340 + [<ffffffff81075c8e>] kthread+0x7e/0x90 + [<ffffffff815a4474>] kernel_thread_helper+0x4/0x10 + +Prevent this by checking the irqchip mode before starting a timer. We +can't deny creating the PIT if the irqchips aren't set up yet as +current user land expects this order to work. + +Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> +Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +--- + arch/x86/kvm/i8254.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c +index 0150aff..ac06e2a 100644 +--- a/arch/x86/kvm/i8254.c ++++ b/arch/x86/kvm/i8254.c +@@ -280,11 +280,15 @@ static struct kvm_timer_ops kpit_ops = { + .is_periodic = kpit_is_periodic, + }; + +-static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) ++static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) + { ++ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; + struct kvm_timer *pt = &ps->pit_timer; + s64 interval; + ++ if (!irqchip_in_kernel(kvm)) ++ return; ++ + interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); + + pr_debug("create pit timer, interval is %llu nsec\n", interval); +@@ -336,13 +340,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) + /* FIXME: enhance mode 4 precision */ + case 4: + if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { +- create_pit_timer(ps, val, 0); ++ create_pit_timer(kvm, val, 0); + } + break; + case 2: + case 3: + if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ +- create_pit_timer(ps, val, 1); ++ create_pit_timer(kvm, val, 1); + } + break; + default: +-- +1.7.12.1 + diff --git a/queue/crypto-ghash-Avoid-null-pointer-dereference-if-no-ke.patch b/queue/crypto-ghash-Avoid-null-pointer-dereference-if-no-ke.patch new file mode 100644 index 0000000..a8f3154 --- /dev/null +++ b/queue/crypto-ghash-Avoid-null-pointer-dereference-if-no-ke.patch @@ -0,0 +1,105 @@ +From acf336a38a8312ac07e025d8e11cece6a2ebb708 Mon Sep 17 00:00:00 2001 +From: Nick Bowler <nbowler@elliptictech.com> +Date: Thu, 20 Oct 2011 14:16:55 +0200 +Subject: [PATCH] crypto: ghash - Avoid null pointer dereference if no key is + set + +commit 7ed47b7d142ec99ad6880bbbec51e9f12b3af74c upstream. + +The ghash_update function passes a pointer to gf128mul_4k_lle which will +be NULL if ghash_setkey is not called or if the most recent call to +ghash_setkey failed to allocate memory. This causes an oops. Fix this +up by returning an error code in the null case. + +This is trivially triggered from unprivileged userspace through the +AF_ALG interface by simply writing to the socket without setting a key. + +The ghash_final function has a similar issue, but triggering it requires +a memory allocation failure in ghash_setkey _after_ at least one +successful call to ghash_update. + + BUG: unable to handle kernel NULL pointer dereference at 00000670 + IP: [<d88c92d4>] gf128mul_4k_lle+0x23/0x60 [gf128mul] + *pde = 00000000 + Oops: 0000 [#1] PREEMPT SMP + Modules linked in: ghash_generic gf128mul algif_hash af_alg nfs lockd nfs_acl sunrpc bridge ipv6 stp llc + + Pid: 1502, comm: hashatron Tainted: G W 3.1.0-rc9-00085-ge9308cf #32 Bochs Bochs + EIP: 0060:[<d88c92d4>] EFLAGS: 00000202 CPU: 0 + EIP is at gf128mul_4k_lle+0x23/0x60 [gf128mul] + EAX: d69db1f0 EBX: d6b8ddac ECX: 00000004 EDX: 00000000 + ESI: 00000670 EDI: d6b8ddac EBP: d6b8ddc8 ESP: d6b8dda4 + DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 + Process hashatron (pid: 1502, ti=d6b8c000 task=d6810000 task.ti=d6b8c000) + Stack: + 00000000 d69db1f0 00000163 00000000 d6b8ddc8 c101a520 d69db1f0 d52aa000 + 00000ff0 d6b8dde8 d88d310f d6b8a3f8 d52aa000 00001000 d88d502c d6b8ddfc + 00001000 d6b8ddf4 c11676ed d69db1e8 d6b8de24 c11679ad d52aa000 00000000 + Call Trace: + [<c101a520>] ? kmap_atomic_prot+0x37/0xa6 + [<d88d310f>] ghash_update+0x85/0xbe [ghash_generic] + [<c11676ed>] crypto_shash_update+0x18/0x1b + [<c11679ad>] shash_ahash_update+0x22/0x36 + [<c11679cc>] shash_async_update+0xb/0xd + [<d88ce0ba>] hash_sendpage+0xba/0xf2 [algif_hash] + [<c121b24c>] kernel_sendpage+0x39/0x4e + [<d88ce000>] ? 0xd88cdfff + [<c121b298>] sock_sendpage+0x37/0x3e + [<c121b261>] ? kernel_sendpage+0x4e/0x4e + [<c10b4dbc>] pipe_to_sendpage+0x56/0x61 + [<c10b4e1f>] splice_from_pipe_feed+0x58/0xcd + [<c10b4d66>] ? splice_from_pipe_begin+0x10/0x10 + [<c10b51f5>] __splice_from_pipe+0x36/0x55 + [<c10b4d66>] ? splice_from_pipe_begin+0x10/0x10 + [<c10b6383>] splice_from_pipe+0x51/0x64 + [<c10b63c2>] ? default_file_splice_write+0x2c/0x2c + [<c10b63d5>] generic_splice_sendpage+0x13/0x15 + [<c10b4d66>] ? splice_from_pipe_begin+0x10/0x10 + [<c10b527f>] do_splice_from+0x5d/0x67 + [<c10b6865>] sys_splice+0x2bf/0x363 + [<c129373b>] ? sysenter_exit+0xf/0x16 + [<c104dc1e>] ? trace_hardirqs_on_caller+0x10e/0x13f + [<c129370c>] sysenter_do_call+0x12/0x32 + Code: 83 c4 0c 5b 5e 5f c9 c3 55 b9 04 00 00 00 89 e5 57 8d 7d e4 56 53 8d 5d e4 83 ec 18 89 45 e0 89 55 dc 0f b6 70 0f c1 e6 04 01 d6 <f3> a5 be 0f 00 00 00 4e 89 d8 e8 48 ff ff ff 8b 45 e0 89 da 0f + EIP: [<d88c92d4>] gf128mul_4k_lle+0x23/0x60 [gf128mul] SS:ESP 0068:d6b8dda4 + CR2: 0000000000000670 + ---[ end trace 4eaa2a86a8e2da24 ]--- + note: hashatron[1502] exited with preempt_count 1 + BUG: scheduling while atomic: hashatron/1502/0x10000002 + INFO: lockdep is turned off. + [...] + +Signed-off-by: Nick Bowler <nbowler@elliptictech.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +--- + crypto/ghash-generic.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c +index be44256..7835b8f 100644 +--- a/crypto/ghash-generic.c ++++ b/crypto/ghash-generic.c +@@ -67,6 +67,9 @@ static int ghash_update(struct shash_desc *desc, + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *dst = dctx->buffer; + ++ if (!ctx->gf128) ++ return -ENOKEY; ++ + if (dctx->bytes) { + int n = min(srclen, dctx->bytes); + u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); +@@ -119,6 +122,9 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *buf = dctx->buffer; + ++ if (!ctx->gf128) ++ return -ENOKEY; ++ + ghash_flush(ctx, dctx); + memcpy(dst, buf, GHASH_BLOCK_SIZE); + +-- +1.7.12.1 + diff --git a/queue/mutex-Place-lock-in-contended-state-after-fastpath_l.patch b/queue/mutex-Place-lock-in-contended-state-after-fastpath_l.patch new file mode 100644 index 0000000..01dcab1 --- /dev/null +++ b/queue/mutex-Place-lock-in-contended-state-after-fastpath_l.patch @@ -0,0 +1,86 @@ +From 7f645a2e33119ec42ab204a0b42249c57074f70e Mon Sep 17 00:00:00 2001 +From: Will Deacon <will.deacon@arm.com> +Date: Fri, 10 Aug 2012 15:22:09 +0100 +Subject: [PATCH] mutex: Place lock in contended state after fastpath_lock + failure + +commit 0bce9c46bf3b15f485d82d7e81dabed6ebcc24b1 upstream. + +ARM recently moved to asm-generic/mutex-xchg.h for its mutex +implementation after the previous implementation was found to be missing +some crucial memory barriers. However, this has revealed some problems +running hackbench on SMP platforms due to the way in which the +MUTEX_SPIN_ON_OWNER code operates. + +The symptoms are that a bunch of hackbench tasks are left waiting on an +unlocked mutex and therefore never get woken up to claim it. This boils +down to the following sequence of events: + + Task A Task B Task C Lock value +0 1 +1 lock() 0 +2 lock() 0 +3 spin(A) 0 +4 unlock() 1 +5 lock() 0 +6 cmpxchg(1,0) 0 +7 contended() -1 +8 lock() 0 +9 spin(C) 0 +10 unlock() 1 +11 cmpxchg(1,0) 0 +12 unlock() 1 + +At this point, the lock is unlocked, but Task B is in an uninterruptible +sleep with nobody to wake it up. + +This patch fixes the problem by ensuring we put the lock into the +contended state if we fail to acquire it on the fastpath, ensuring that +any blocked waiters are woken up when the mutex is released. + +Signed-off-by: Will Deacon <will.deacon@arm.com> +Cc: Arnd Bergmann <arnd@arndb.de> +Cc: Chris Mason <chris.mason@fusionio.com> +Cc: Ingo Molnar <mingo@elte.hu> +Cc: <stable@vger.kernel.org> +Reviewed-by: Nicolas Pitre <nico@linaro.org> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Link: http://lkml.kernel.org/n/tip-6e9lrw2avczr0617fzl5vqb8@git.kernel.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +--- + include/asm-generic/mutex-xchg.h | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h +index 580a6d3..c04e0db 100644 +--- a/include/asm-generic/mutex-xchg.h ++++ b/include/asm-generic/mutex-xchg.h +@@ -26,7 +26,13 @@ static inline void + __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) + { + if (unlikely(atomic_xchg(count, 0) != 1)) +- fail_fn(count); ++ /* ++ * We failed to acquire the lock, so mark it contended ++ * to ensure that any waiting tasks are woken up by the ++ * unlock slow path. ++ */ ++ if (likely(atomic_xchg(count, -1) != 1)) ++ fail_fn(count); + } + + /** +@@ -43,7 +49,8 @@ static inline int + __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) + { + if (unlikely(atomic_xchg(count, 0) != 1)) +- return fail_fn(count); ++ if (likely(atomic_xchg(count, -1) != 1)) ++ return fail_fn(count); + return 0; + } + +-- +1.7.12.1 + diff --git a/queue/net-Fix-ip-link-add-netns-oops.patch b/queue/net-Fix-ip-link-add-netns-oops.patch new file mode 100644 index 0000000..76bb0ac --- /dev/null +++ b/queue/net-Fix-ip-link-add-netns-oops.patch @@ -0,0 +1,122 @@ +From b3b1bb9d6e500ee7c98b90b54592c3bf656eb5e7 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" <ebiederm@xmission.com> +Date: Sat, 29 Jan 2011 14:57:22 +0000 +Subject: [PATCH] net: Fix ip link add netns oops + +commit 13ad17745c2cbd437d9e24b2d97393e0be11c439 upstream. + +Ed Swierk <eswierk@bigswitch.com> writes: +> On 2.6.35.7 +> ip link add link eth0 netns 9999 type macvlan +> where 9999 is a nonexistent PID triggers an oops and causes all network functions to hang: +> [10663.821898] BUG: unable to handle kernel NULL pointer dereference at 000000000000006d +> [10663.821917] IP: [<ffffffff8149c2fa>] __dev_alloc_name+0x9a/0x170 +> [10663.821933] PGD 1d3927067 PUD 22f5c5067 PMD 0 +> [10663.821944] Oops: 0000 [#1] SMP +> [10663.821953] last sysfs file: /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq +> [10663.821959] CPU 3 +> [10663.821963] Modules linked in: macvlan ip6table_filter ip6_tables rfcomm ipt_MASQUERADE binfmt_misc iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack sco ipt_REJECT bnep l2cap xt_tcpudp iptable_filter ip_tables x_tables bridge stp vboxnetadp vboxnetflt vboxdrv kvm_intel kvm parport_pc ppdev snd_hda_codec_intelhdmi snd_hda_codec_conexant arc4 iwlagn iwlcore mac80211 snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_seq_midi snd_rawmidi i915 snd_seq_midi_event snd_seq thinkpad_acpi drm_kms_helper btusb tpm_tis nvram uvcvideo snd_timer snd_seq_device bluetooth videodev v4l1_compat v4l2_compat_ioctl32 tpm drm tpm_bios snd cfg80211 psmouse serio_raw intel_ips soundcore snd_page_alloc intel_agp i2c_algo_bit video output netconsole configfs lp parport usbhid hid e1000e sdhci_pci ahci libahci sdhci led_class +> [10663.822155] +> [10663.822161] Pid: 6000, comm: ip Not tainted 2.6.35-23-generic #41-Ubuntu 2901CTO/2901CTO +> [10663.822167] RIP: 0010:[<ffffffff8149c2fa>] [<ffffffff8149c2fa>] __dev_alloc_name+0x9a/0x170 +> [10663.822177] RSP: 0018:ffff88014aebf7b8 EFLAGS: 00010286 +> [10663.822182] RAX: 00000000fffffff4 RBX: ffff8801ad900800 RCX: 0000000000000000 +> [10663.822187] RDX: ffff880000000000 RSI: 0000000000000000 RDI: ffff88014ad63000 +> [10663.822191] RBP: ffff88014aebf808 R08: 0000000000000041 R09: 0000000000000041 +> [10663.822196] R10: 0000000000000000 R11: dead000000200200 R12: ffff88014aebf818 +> [10663.822201] R13: fffffffffffffffd R14: ffff88014aebf918 R15: ffff88014ad62000 +> [10663.822207] FS: 00007f00c487f700(0000) GS:ffff880001f80000(0000) knlGS:0000000000000000 +> [10663.822212] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +> [10663.822216] CR2: 000000000000006d CR3: 0000000231f19000 CR4: 00000000000026e0 +> [10663.822221] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +> [10663.822226] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +> [10663.822231] Process ip (pid: 6000, threadinfo ffff88014aebe000, task ffff88014afb16e0) +> [10663.822236] Stack: +> [10663.822240] ffff88014aebf808 ffffffff814a2bb5 ffff88014aebf7e8 00000000a00ee8d6 +> [10663.822251] <0> 0000000000000000 ffffffffa00ef940 ffff8801ad900800 ffff88014aebf818 +> [10663.822265] <0> ffff88014aebf918 ffff8801ad900800 ffff88014aebf858 ffffffff8149c413 +> [10663.822281] Call Trace: +> [10663.822290] [<ffffffff814a2bb5>] ? dev_addr_init+0x75/0xb0 +> [10663.822298] [<ffffffff8149c413>] dev_alloc_name+0x43/0x90 +> [10663.822307] [<ffffffff814a85ee>] rtnl_create_link+0xbe/0x1b0 +> [10663.822314] [<ffffffff814ab2aa>] rtnl_newlink+0x48a/0x570 +> [10663.822321] [<ffffffff814aafcc>] ? rtnl_newlink+0x1ac/0x570 +> [10663.822332] [<ffffffff81030064>] ? native_x2apic_icr_read+0x4/0x20 +> [10663.822339] [<ffffffff814a8c17>] rtnetlink_rcv_msg+0x177/0x290 +> [10663.822346] [<ffffffff814a8aa0>] ? rtnetlink_rcv_msg+0x0/0x290 +> [10663.822354] [<ffffffff814c25d9>] netlink_rcv_skb+0xa9/0xd0 +> [10663.822360] [<ffffffff814a8a85>] rtnetlink_rcv+0x25/0x40 +> [10663.822367] [<ffffffff814c223e>] netlink_unicast+0x2de/0x2f0 +> [10663.822374] [<ffffffff814c303e>] netlink_sendmsg+0x1fe/0x2e0 +> [10663.822383] [<ffffffff81488533>] sock_sendmsg+0xf3/0x120 +> [10663.822391] [<ffffffff815899fe>] ? _raw_spin_lock+0xe/0x20 +> [10663.822400] [<ffffffff81168656>] ? __d_lookup+0x136/0x150 +> [10663.822406] [<ffffffff815899fe>] ? _raw_spin_lock+0xe/0x20 +> [10663.822414] [<ffffffff812b7a0d>] ? _atomic_dec_and_lock+0x4d/0x80 +> [10663.822422] [<ffffffff8116ea90>] ? mntput_no_expire+0x30/0x110 +> [10663.822429] [<ffffffff81486ff5>] ? move_addr_to_kernel+0x65/0x70 +> [10663.822435] [<ffffffff81493308>] ? verify_iovec+0x88/0xe0 +> [10663.822442] [<ffffffff81489020>] sys_sendmsg+0x240/0x3a0 +> [10663.822450] [<ffffffff8111e2a9>] ? __do_fault+0x479/0x560 +> [10663.822457] [<ffffffff815899fe>] ? _raw_spin_lock+0xe/0x20 +> [10663.822465] [<ffffffff8116cf4a>] ? alloc_fd+0x10a/0x150 +> [10663.822473] [<ffffffff8158d76e>] ? do_page_fault+0x15e/0x350 +> [10663.822482] [<ffffffff8100a0f2>] system_call_fastpath+0x16/0x1b +> [10663.822487] Code: 90 48 8d 78 02 be 25 00 00 00 e8 92 1d e2 ff 48 85 c0 75 cf bf 20 00 00 00 e8 c3 b1 c6 ff 49 89 c7 b8 f4 ff ff ff 4d 85 ff 74 bd <4d> 8b 75 70 49 8d 45 70 48 89 45 b8 49 83 ee 58 eb 28 48 8d 55 +> [10663.822618] RIP [<ffffffff8149c2fa>] __dev_alloc_name+0x9a/0x170 +> [10663.822627] RSP <ffff88014aebf7b8> +> [10663.822631] CR2: 000000000000006d +> [10663.822636] ---[ end trace 3dfd6c3ad5327ca7 ]--- + +This bug was introduced in: +commit 81adee47dfb608df3ad0b91d230fb3cef75f0060 +Author: Eric W. Biederman <ebiederm@aristanetworks.com> +Date: Sun Nov 8 00:53:51 2009 -0800 + + net: Support specifying the network namespace upon device creation. + + There is no good reason to not support userspace specifying the + network namespace during device creation, and it makes it easier + to create a network device and pass it to a child network namespace + with a well known name. + + We have to be careful to ensure that the target network namespace + for the new device exists through the life of the call. To keep + that logic clear I have factored out the network namespace grabbing + logic into rtnl_link_get_net. + + In addtion we need to continue to pass the source network namespace + to the rtnl_link_ops.newlink method so that we can find the base + device source network namespace. + + Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com> + Acked-by: Eric Dumazet <eric.dumazet@gmail.com> + +Where apparently I forgot to add error handling to the path where we create +a new network device in a new network namespace, and pass in an invalid pid. + +Reported-by: Ed Swierk <eswierk@bigswitch.com> +Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +--- + net/core/rtnetlink.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c +index 835f38c..e01b484 100644 +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1324,6 +1324,9 @@ replay: + snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + + dest_net = rtnl_link_get_net(net, tb); ++ if (IS_ERR(dest_net)) ++ return PTR_ERR(dest_net); ++ + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); + + if (IS_ERR(dev)) +-- +1.7.12.1 + diff --git a/queue/perf-Fix-tear-down-of-inherited-group-events.patch b/queue/perf-Fix-tear-down-of-inherited-group-events.patch new file mode 100644 index 0000000..98f40ce --- /dev/null +++ b/queue/perf-Fix-tear-down-of-inherited-group-events.patch @@ -0,0 +1,53 @@ +From 04d8850f687bd5079bb234119c3b6b0d43bdd40f Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <a.p.zijlstra@chello.nl> +Date: Tue, 15 Mar 2011 14:37:10 +0100 +Subject: [PATCH] perf: Fix tear-down of inherited group events + +commit 38b435b16c36b0d863efcf3f07b34a6fac9873fd upstream. + +When destroying inherited events, we need to destroy groups too, +otherwise the event iteration in perf_event_exit_task_context() will +miss group siblings and we leak events with all the consequences. + +Reported-and-tested-by: Vince Weaver <vweaver1@eecs.utk.edu> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +LKML-Reference: <1300196470.2203.61.camel@twins> +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +--- + kernel/perf_event.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/kernel/perf_event.c b/kernel/perf_event.c +index 0e4499e..30e164e 100644 +--- a/kernel/perf_event.c ++++ b/kernel/perf_event.c +@@ -5196,17 +5196,20 @@ __perf_event_exit_task(struct perf_event *child_event, + struct perf_event_context *child_ctx, + struct task_struct *child) + { +- struct perf_event *parent_event; ++ if (child_event->parent) { ++ raw_spin_lock_irq(&child_ctx->lock); ++ perf_group_detach(child_event); ++ raw_spin_unlock_irq(&child_ctx->lock); ++ } + + perf_event_remove_from_context(child_event); + +- parent_event = child_event->parent; + /* +- * It can happen that parent exits first, and has events ++ * It can happen that the parent exits first, and has events + * that are still around due to the child reference. These +- * events need to be zapped - but otherwise linger. ++ * events need to be zapped. + */ +- if (parent_event) { ++ if (child_event->parent) { + sync_child_event(child_event, child); + free_event(child_event); + } +-- +1.7.12.1 + diff --git a/queue/perf_events-Fix-races-in-group-composition.patch b/queue/perf_events-Fix-races-in-group-composition.patch new file mode 100644 index 0000000..7892cb0 --- /dev/null +++ b/queue/perf_events-Fix-races-in-group-composition.patch @@ -0,0 +1,231 @@ +From 59cf37bdf1b0a00b7126f1056e842781edfbb968 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <a.p.zijlstra@chello.nl> +Date: Thu, 27 May 2010 15:47:49 +0200 +Subject: [PATCH] perf_events: Fix races in group composition + +commit 8a49542c0554af7d0073aac0ee73ee65b807ef34 upstream. + +Group siblings don't pin each-other or the parent, so when we destroy +events we must make sure to clean up all cross referencing pointers. + +In particular, for destruction of a group leader we must be able to +find all its siblings and remove their reference to it. + +This means that detaching an event from its context must not detach it +from the group, otherwise we can end up failing to clear all pointers. + +Solve this by clearly separating the attachment to a context and +attachment to a group, and keep the group composed until we destroy +the events. + +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +LKML-Reference: <new-submission> +Signed-off-by: Ingo Molnar <mingo@elte.hu> +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +--- + include/linux/perf_event.h | 4 ++ + kernel/perf_event.c | 91 ++++++++++++++++++++++++++++++++++------------ + 2 files changed, 71 insertions(+), 24 deletions(-) + +diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h +index eea9188..c6e1432 100644 +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -571,6 +571,9 @@ enum perf_group_flag { + PERF_GROUP_SOFTWARE = 0x1, + }; + ++#define PERF_ATTACH_CONTEXT 0x01 ++#define PERF_ATTACH_GROUP 0x02 ++ + /** + * struct perf_event - performance event kernel representation: + */ +@@ -585,6 +588,7 @@ struct perf_event { + const struct pmu *pmu; + + enum perf_event_active_state state; ++ unsigned int attach_state; + atomic64_t count; + + /* +diff --git a/kernel/perf_event.c b/kernel/perf_event.c +index 540c26b..0e4499e 100644 +--- a/kernel/perf_event.c ++++ b/kernel/perf_event.c +@@ -291,14 +291,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) + static void + list_add_event(struct perf_event *event, struct perf_event_context *ctx) + { +- struct perf_event *group_leader = event->group_leader; ++ WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); ++ event->attach_state |= PERF_ATTACH_CONTEXT; + + /* +- * Depending on whether it is a standalone or sibling event, +- * add it straight to the context's event list, or to the group +- * leader's sibling list: ++ * If we're a stand alone event or group leader, we go to the context ++ * list, group events are kept attached to the group so that ++ * perf_group_detach can, at all times, locate all siblings. + */ +- if (group_leader == event) { ++ if (event->group_leader == event) { + struct list_head *list; + + if (is_software_event(event)) +@@ -306,13 +307,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) + + list = ctx_group_list(event, ctx); + list_add_tail(&event->group_entry, list); +- } else { +- if (group_leader->group_flags & PERF_GROUP_SOFTWARE && +- !is_software_event(event)) +- group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; +- +- list_add_tail(&event->group_entry, &group_leader->sibling_list); +- group_leader->nr_siblings++; + } + + list_add_rcu(&event->event_entry, &ctx->event_list); +@@ -321,6 +315,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) + ctx->nr_stat++; + } + ++static void perf_group_attach(struct perf_event *event) ++{ ++ struct perf_event *group_leader = event->group_leader; ++ ++ WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP); ++ event->attach_state |= PERF_ATTACH_GROUP; ++ ++ if (group_leader == event) ++ return; ++ ++ if (group_leader->group_flags & PERF_GROUP_SOFTWARE && ++ !is_software_event(event)) ++ group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; ++ ++ list_add_tail(&event->group_entry, &group_leader->sibling_list); ++ group_leader->nr_siblings++; ++} ++ + /* + * Remove a event from the lists for its context. + * Must be called with ctx->mutex and ctx->lock held. +@@ -328,17 +340,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) + static void + list_del_event(struct perf_event *event, struct perf_event_context *ctx) + { +- if (list_empty(&event->group_entry)) ++ /* ++ * We can have double detach due to exit/hot-unplug + close. ++ */ ++ if (!(event->attach_state & PERF_ATTACH_CONTEXT)) + return; ++ ++ event->attach_state &= ~PERF_ATTACH_CONTEXT; ++ + ctx->nr_events--; + if (event->attr.inherit_stat) + ctx->nr_stat--; + +- list_del_init(&event->group_entry); + list_del_rcu(&event->event_entry); + +- if (event->group_leader != event) +- event->group_leader->nr_siblings--; ++ if (event->group_leader == event) ++ list_del_init(&event->group_entry); + + update_group_times(event); + +@@ -353,21 +370,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) + event->state = PERF_EVENT_STATE_OFF; + } + +-static void +-perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx) ++static void perf_group_detach(struct perf_event *event) + { + struct perf_event *sibling, *tmp; ++ struct list_head *list = NULL; ++ ++ /* ++ * We can have double detach due to exit/hot-unplug + close. ++ */ ++ if (!(event->attach_state & PERF_ATTACH_GROUP)) ++ return; ++ ++ event->attach_state &= ~PERF_ATTACH_GROUP; ++ ++ /* ++ * If this is a sibling, remove it from its group. ++ */ ++ if (event->group_leader != event) { ++ list_del_init(&event->group_entry); ++ event->group_leader->nr_siblings--; ++ return; ++ } ++ ++ if (!list_empty(&event->group_entry)) ++ list = &event->group_entry; + + /* + * If this was a group event with sibling events then + * upgrade the siblings to singleton events by adding them +- * to the context list directly: ++ * to whatever list we are on. + */ + list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { +- struct list_head *list; +- +- list = ctx_group_list(event, ctx); +- list_move_tail(&sibling->group_entry, list); ++ if (list) ++ list_move_tail(&sibling->group_entry, list); + sibling->group_leader = sibling; + + /* Inherit group flags from the previous leader */ +@@ -720,6 +755,7 @@ static void add_event_to_ctx(struct perf_event *event, + struct perf_event_context *ctx) + { + list_add_event(event, ctx); ++ perf_group_attach(event); + event->tstamp_enabled = ctx->time; + event->tstamp_running = ctx->time; + event->tstamp_stopped = ctx->time; +@@ -1874,8 +1910,8 @@ int perf_event_release_kernel(struct perf_event *event) + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + raw_spin_lock_irq(&ctx->lock); ++ perf_group_detach(event); + list_del_event(event, ctx); +- perf_destroy_group(event, ctx); + raw_spin_unlock_irq(&ctx->lock); + mutex_unlock(&ctx->mutex); + +@@ -4946,6 +4982,12 @@ SYSCALL_DEFINE5(perf_event_open, + list_add_tail(&event->owner_entry, ¤t->perf_event_list); + mutex_unlock(¤t->perf_event_mutex); + ++ /* ++ * Drop the reference on the group_event after placing the ++ * new event on the sibling_list. This ensures destruction ++ * of the group leader will find the pointer to itself in ++ * perf_group_detach(). ++ */ + fput_light(group_file, fput_needed); + fd_install(event_fd, event_file); + return event_fd; +@@ -5267,6 +5309,7 @@ static void perf_free_event(struct perf_event *event, + + fput(parent->filp); + ++ perf_group_detach(event); + list_del_event(event, ctx); + free_event(event); + } +-- +1.7.12.1 + diff --git a/queue/sched-fix-divide-by-zero-at-thread_group-task-_times.patch b/queue/sched-fix-divide-by-zero-at-thread_group-task-_times.patch new file mode 100644 index 0000000..60aae13 --- /dev/null +++ b/queue/sched-fix-divide-by-zero-at-thread_group-task-_times.patch @@ -0,0 +1,115 @@ +From 917911ec79e8ad046b733eb3b3ef65cb923c3779 Mon Sep 17 00:00:00 2001 +From: Stanislaw Gruszka <sgruszka@redhat.com> +Date: Wed, 8 Aug 2012 11:27:15 +0200 +Subject: [PATCH] sched: fix divide by zero at {thread_group,task}_times + +commit bea6832cc8c4a0a9a65dd17da6aaa657fe27bc3e upstream. + +On architectures where cputime_t is 64 bit type, is possible to trigger +divide by zero on do_div(temp, (__force u32) total) line, if total is a +non zero number but has lower 32 bit's zeroed. Removing casting is not +a good solution since some do_div() implementations do cast to u32 +internally. + +This problem can be triggered in practice on very long lived processes: + + PID: 2331 TASK: ffff880472814b00 CPU: 2 COMMAND: "oraagent.bin" + #0 [ffff880472a51b70] machine_kexec at ffffffff8103214b + #1 [ffff880472a51bd0] crash_kexec at ffffffff810b91c2 + #2 [ffff880472a51ca0] oops_end at ffffffff814f0b00 + #3 [ffff880472a51cd0] die at ffffffff8100f26b + #4 [ffff880472a51d00] do_trap at ffffffff814f03f4 + #5 [ffff880472a51d60] do_divide_error at ffffffff8100cfff + #6 [ffff880472a51e00] divide_error at ffffffff8100be7b + [exception RIP: thread_group_times+0x56] + RIP: ffffffff81056a16 RSP: ffff880472a51eb8 RFLAGS: 00010046 + RAX: bc3572c9fe12d194 RBX: ffff880874150800 RCX: 0000000110266fad + RDX: 0000000000000000 RSI: ffff880472a51eb8 RDI: 001038ae7d9633dc + RBP: ffff880472a51ef8 R8: 00000000b10a3a64 R9: ffff880874150800 + R10: 00007fcba27ab680 R11: 0000000000000202 R12: ffff880472a51f08 + R13: ffff880472a51f10 R14: 0000000000000000 R15: 0000000000000007 + ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 + #7 [ffff880472a51f00] do_sys_times at ffffffff8108845d + #8 [ffff880472a51f40] sys_times at ffffffff81088524 + #9 [ffff880472a51f80] system_call_fastpath at ffffffff8100b0f2 + RIP: 0000003808caac3a RSP: 00007fcba27ab6d8 RFLAGS: 00000202 + RAX: 0000000000000064 RBX: ffffffff8100b0f2 RCX: 0000000000000000 + RDX: 00007fcba27ab6e0 RSI: 000000000076d58e RDI: 00007fcba27ab6e0 + RBP: 00007fcba27ab700 R8: 0000000000000020 R9: 000000000000091b + R10: 00007fcba27ab680 R11: 0000000000000202 R12: 00007fff9ca41940 + R13: 0000000000000000 R14: 00007fcba27ac9c0 R15: 00007fff9ca41940 + ORIG_RAX: 0000000000000064 CS: 0033 SS: 002b + +Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Link: http://lkml.kernel.org/r/20120808092714.GA3580@redhat.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +[PG: sched/core.c is just sched.c in 2.6.34; also the do_div() on + __force u32 isn't explicitly seen since that is in v3.3-rc1~191^2~11] +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +--- + kernel/sched.c | 34 ++++++++++++++++++++-------------- + 1 file changed, 20 insertions(+), 14 deletions(-) + +diff --git a/kernel/sched.c b/kernel/sched.c +index 245458e..e24d139 100644 +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -3410,6 +3410,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) + # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) + #endif + ++static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) ++{ ++ u64 temp = (__force u64) rtime; ++ ++ temp *= (__force u64) utime; ++ ++ if (sizeof(cputime_t) == 4) ++ temp = div_u64(temp, (__force u32) total); ++ else ++ temp = div64_u64(temp, (__force u64) total); ++ ++ return (__force cputime_t) temp; ++} ++ + void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) + { + cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); +@@ -3419,13 +3433,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) + */ + rtime = nsecs_to_cputime(p->se.sum_exec_runtime); + +- if (total) { +- u64 temp = rtime; +- +- temp *= utime; +- do_div(temp, total); +- utime = (cputime_t)temp; +- } else ++ if (total) ++ utime = scale_utime(utime, rtime, total); ++ else + utime = rtime; + + /* +@@ -3452,13 +3462,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) + total = cputime_add(cputime.utime, cputime.stime); + rtime = nsecs_to_cputime(cputime.sum_exec_runtime); + +- if (total) { +- u64 temp = rtime; +- +- temp *= cputime.utime; +- do_div(temp, total); +- utime = (cputime_t)temp; +- } else ++ if (total) ++ utime = scale_utime(cputime.utime, rtime, total); ++ else + utime = rtime; + + sig->prev_utime = max(sig->prev_utime, utime); +-- +1.7.12.1 + diff --git a/queue/sched-rt-Fix-task-stack-corruption-under-__ARCH_WANT.patch b/queue/sched-rt-Fix-task-stack-corruption-under-__ARCH_WANT.patch new file mode 100644 index 0000000..b95192a --- /dev/null +++ b/queue/sched-rt-Fix-task-stack-corruption-under-__ARCH_WANT.patch @@ -0,0 +1,95 @@ +From e0c60dc5038395b66cf92016843c715a5e56b48c Mon Sep 17 00:00:00 2001 +From: Chanho Min <chanho0207@gmail.com> +Date: Thu, 5 Jan 2012 20:00:19 +0900 +Subject: [PATCH] sched/rt: Fix task stack corruption under + __ARCH_WANT_INTERRUPTS_ON_CTXSW + +commit cb297a3e433dbdcf7ad81e0564e7b804c941ff0d upstream. + +This issue happens under the following conditions: + + 1. preemption is off + 2. __ARCH_WANT_INTERRUPTS_ON_CTXSW is defined + 3. RT scheduling class + 4. SMP system + +Sequence is as follows: + + 1.suppose current task is A. start schedule() + 2.task A is enqueued pushable task at the entry of schedule() + __schedule + prev = rq->curr; + ... + put_prev_task + put_prev_task_rt + enqueue_pushable_task + 4.pick the task B as next task. + next = pick_next_task(rq); + 3.rq->curr set to task B and context_switch is started. + rq->curr = next; + 4.At the entry of context_swtich, release this cpu's rq->lock. + context_switch + prepare_task_switch + prepare_lock_switch + raw_spin_unlock_irq(&rq->lock); + 5.Shortly after rq->lock is released, interrupt is occurred and start IRQ context + 6.try_to_wake_up() which called by ISR acquires rq->lock + try_to_wake_up + ttwu_remote + rq = __task_rq_lock(p) + ttwu_do_wakeup(rq, p, wake_flags); + task_woken_rt + 7.push_rt_task picks the task A which is enqueued before. + task_woken_rt + push_rt_tasks(rq) + next_task = pick_next_pushable_task(rq) + 8.At find_lock_lowest_rq(), If double_lock_balance() returns 0, + lowest_rq can be the remote rq. + (But,If preemption is on, double_lock_balance always return 1 and it + does't happen.) + push_rt_task + find_lock_lowest_rq + if (double_lock_balance(rq, lowest_rq)).. + 9.find_lock_lowest_rq return the available rq. task A is migrated to + the remote cpu/rq. + push_rt_task + ... + deactivate_task(rq, next_task, 0); + set_task_cpu(next_task, lowest_rq->cpu); + activate_task(lowest_rq, next_task, 0); + 10. But, task A is on irq context at this cpu. + So, task A is scheduled by two cpus at the same time until restore from IRQ. + Task A's stack is corrupted. + +To fix it, don't migrate an RT task if it's still running. + +Signed-off-by: Chanho Min <chanho.min@lge.com> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Acked-by: Steven Rostedt <rostedt@goodmis.org> +Link: http://lkml.kernel.org/r/CAOAMb1BHA=5fm7KTewYyke6u-8DP0iUuJMpgQw54vNeXFsGpoQ@mail.gmail.com +Signed-off-by: Ingo Molnar <mingo@elte.hu> +[PG: in 2.6.34, sched/rt.c is just sched_rt.c] +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +--- + kernel/sched_rt.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c +index fd8c1a3..abd5cba 100644 +--- a/kernel/sched_rt.c ++++ b/kernel/sched_rt.c +@@ -1315,6 +1315,11 @@ static int push_rt_task(struct rq *rq) + if (!next_task) + return 0; + ++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW ++ if (unlikely(task_running(rq, next_task))) ++ return 0; ++#endif ++ + retry: + if (unlikely(next_task == rq->curr)) { + WARN_ON(1); +-- +1.7.12.1 + diff --git a/queue/series b/queue/series index 141139f..15bc304 100644 --- a/queue/series +++ b/queue/series @@ -69,3 +69,11 @@ sctp-Fix-list-corruption-resulting-from-freeing-an-a.patch sctp-ABORT-if-receive-reassmbly-or-reodering-queue-i.patch sctp-Enforce-retransmission-limit-during-shutdown.patch SCTP-fix-race-between-sctp_bind_addr_free-and-sctp_b.patch +KVM-x86-Prevent-starting-PIT-timers-in-the-absence-o.patch +perf_events-Fix-races-in-group-composition.patch +perf-Fix-tear-down-of-inherited-group-events.patch +sched-fix-divide-by-zero-at-thread_group-task-_times.patch +mutex-Place-lock-in-contended-state-after-fastpath_l.patch +crypto-ghash-Avoid-null-pointer-dereference-if-no-ke.patch +net-Fix-ip-link-add-netns-oops.patch +sched-rt-Fix-task-stack-corruption-under-__ARCH_WANT.patch |