diff options
author | Greg Kroah-Hartman <gregkh@suse.de> | 2011-05-30 09:15:23 +0800 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-05-30 09:15:23 +0800 |
commit | 3cd4ea5bf7cea44ac6caf435796f8139296194b3 (patch) | |
tree | fde6174acc14a3ed648c513f3409932ed79f30f0 | |
parent | 51b55d5fb5e6866c3dab3ecafd84b7dacc61f6b5 (diff) | |
download | stable-queue-3cd4ea5bf7cea44ac6caf435796f8139296194b3.tar.gz |
.38 patches
22 files changed, 1301 insertions, 0 deletions
diff --git a/queue-2.6.38/bonding-802.3ad-fix-agg_device_up.patch b/queue-2.6.38/bonding-802.3ad-fix-agg_device_up.patch new file mode 100644 index 0000000000..894923d330 --- /dev/null +++ b/queue-2.6.38/bonding-802.3ad-fix-agg_device_up.patch @@ -0,0 +1,45 @@ +From 367b42c3aa769ff570950553a0d38ca1557df453 Mon Sep 17 00:00:00 2001 +From: Jiri Bohac <jbohac@suse.cz> +Date: Tue, 19 Apr 2011 02:09:55 +0000 +Subject: bonding: 802.3ad - fix agg_device_up + + +From: Jiri Bohac <jbohac@suse.cz> + +[ Upstream commit 2430af8b7fa37ac0be102c77f9dc6ee669d24ba9 ] + +The slave member of struct aggregator does not necessarily point +to a slave which is part of the aggregator. It points to the +slave structure containing the aggregator structure, while +completely different slaves (or no slaves at all) may be part of +the aggregator. + +The agg_device_up() function wrongly uses agg->slave to find the state +of the aggregator. Use agg->lag_ports->slave instead. The bug has +been introduced by commit 4cd6fe1c6483cde93e2ec91f58b7af9c9eea51ad +("bonding: fix link down handling in 802.3ad mode"). + +Signed-off-by: Jiri Bohac <jbohac@suse.cz> +Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + drivers/net/bonding/bond_3ad.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -1482,8 +1482,11 @@ static struct aggregator *ad_agg_selecti + + static int agg_device_up(const struct aggregator *agg) + { +- return (netif_running(agg->slave->dev) && +- netif_carrier_ok(agg->slave->dev)); ++ struct port *port = agg->lag_ports; ++ if (!port) ++ return 0; ++ return (netif_running(port->slave->dev) && ++ netif_carrier_ok(port->slave->dev)); + } + + /** diff --git a/queue-2.6.38/bonding-prevent-deadlock-on-slave-store-with-alb-mode-v3.patch b/queue-2.6.38/bonding-prevent-deadlock-on-slave-store-with-alb-mode-v3.patch new file mode 100644 index 0000000000..23c7357a3e --- /dev/null +++ b/queue-2.6.38/bonding-prevent-deadlock-on-slave-store-with-alb-mode-v3.patch @@ -0,0 +1,159 @@ +From 987d2d3de1269aa6d91a0c157c549e6420b4de6c Mon Sep 17 00:00:00 2001 +From: Neil Horman <nhorman@tuxdriver.com> +Date: Wed, 25 May 2011 08:13:01 +0000 +Subject: bonding: prevent deadlock on slave store with alb mode (v3) + + +From: Neil Horman <nhorman@tuxdriver.com> + +[ Upstream commit 9fe0617d9b6d21f700ee9e658e1c9fe3be2fb402 ] + +This soft lockup was recently reported: + +[root@dell-per715-01 ~]# echo +bond5 > /sys/class/net/bonding_masters +[root@dell-per715-01 ~]# echo +eth1 > /sys/class/net/bond5/bonding/slaves +bonding: bond5: doing slave updates when interface is down. +bonding bond5: master_dev is not up in bond_enslave +[root@dell-per715-01 ~]# echo -eth1 > /sys/class/net/bond5/bonding/slaves +bonding: bond5: doing slave updates when interface is down. + +BUG: soft lockup - CPU#12 stuck for 60s! [bash:6444] +CPU 12: +Modules linked in: bonding autofs4 hidp rfcomm l2cap bluetooth lockd sunrpc +be2d +Pid: 6444, comm: bash Not tainted 2.6.18-262.el5 #1 +RIP: 0010:[<ffffffff80064bf0>] [<ffffffff80064bf0>] +.text.lock.spinlock+0x26/00 +RSP: 0018:ffff810113167da8 EFLAGS: 00000286 +RAX: ffff810113167fd8 RBX: ffff810123a47800 RCX: 0000000000ff1025 +RDX: 0000000000000000 RSI: ffff810123a47800 RDI: ffff81021b57f6f8 +RBP: ffff81021b57f500 R08: 0000000000000000 R09: 000000000000000c +R10: 00000000ffffffff R11: ffff81011d41c000 R12: ffff81021b57f000 +R13: 0000000000000000 R14: 0000000000000282 R15: 0000000000000282 +FS: 00002b3b41ef3f50(0000) GS:ffff810123b27940(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +CR2: 00002b3b456dd000 CR3: 000000031fc60000 CR4: 00000000000006e0 + +Call Trace: + [<ffffffff80064af9>] _spin_lock_bh+0x9/0x14 + [<ffffffff886937d7>] :bonding:tlb_clear_slave+0x22/0xa1 + [<ffffffff8869423c>] :bonding:bond_alb_deinit_slave+0xba/0xf0 + [<ffffffff8868dda6>] :bonding:bond_release+0x1b4/0x450 + [<ffffffff8006457b>] __down_write_nested+0x12/0x92 + [<ffffffff88696ae4>] :bonding:bonding_store_slaves+0x25c/0x2f7 + [<ffffffff801106f7>] sysfs_write_file+0xb9/0xe8 + [<ffffffff80016b87>] vfs_write+0xce/0x174 + [<ffffffff80017450>] sys_write+0x45/0x6e + [<ffffffff8005d28d>] tracesys+0xd5/0xe0 + +It occurs because we are able to change the slave configuarion of a bond while +the bond interface is down. The bonding driver initializes some data structures +only after its ndo_open routine is called. Among them is the initalization of +the alb tx and rx hash locks. So if we add or remove a slave without first +opening the bond master device, we run the risk of trying to lock/unlock a +spinlock that has garbage for data in it, which results in our above softlock. + +Note that sometimes this works, because in many cases an unlocked spinlock has +the raw_lock parameter initialized to zero (meaning that the kzalloc of the +net_device private data is equivalent to calling spin_lock_init), but thats not +true in all cases, and we aren't guaranteed that condition, so we need to pass +the relevant spinlocks through the spin_lock_init function. + +Fix it by moving the spin_lock_init calls for the tx and rx hashtable locks to +the ndo_init path, so they are ready for use by the bond_store_slaves path. + +Change notes: +v2) Based on conversation with Jay and Nicolas it seems that the ability to +enslave devices while the bond master is down should be safe to do. As such +this is an outlier bug, and so instead we'll just initalize the errant spinlocks +in the init path rather than the open path, solving the problem. We'll also +remove the warnings about the bond being down during enslave operations, since +it should be safe + +v3) Fix spelling error + +Signed-off-by: Neil Horman <nhorman@tuxdriver.com> +Reported-by: jtluka@redhat.com +CC: Jay Vosburgh <fubar@us.ibm.com> +CC: Andy Gospodarek <andy@greyhouse.net> +CC: nicolas.2p.debian@gmail.com +CC: "David S. Miller" <davem@davemloft.net> +Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + drivers/net/bonding/bond_alb.c | 4 ---- + drivers/net/bonding/bond_main.c | 16 ++++++++++------ + drivers/net/bonding/bond_sysfs.c | 6 ------ + 3 files changed, 10 insertions(+), 16 deletions(-) + +--- a/drivers/net/bonding/bond_alb.c ++++ b/drivers/net/bonding/bond_alb.c +@@ -163,8 +163,6 @@ static int tlb_initialize(struct bonding + struct tlb_client_info *new_hashtbl; + int i; + +- spin_lock_init(&(bond_info->tx_hashtbl_lock)); +- + new_hashtbl = kzalloc(size, GFP_KERNEL); + if (!new_hashtbl) { + pr_err("%s: Error: Failed to allocate TLB hash table\n", +@@ -764,8 +762,6 @@ static int rlb_initialize(struct bonding + int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); + int i; + +- spin_lock_init(&(bond_info->rx_hashtbl_lock)); +- + new_hashtbl = kmalloc(size, GFP_KERNEL); + if (!new_hashtbl) { + pr_err("%s: Error: Failed to allocate RLB hash table\n", +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1441,12 +1441,6 @@ int bond_enslave(struct net_device *bond + bond_dev->name, slave_dev->name); + } + +- /* bond must be initialized by bond_open() before enslaving */ +- if (!(bond_dev->flags & IFF_UP)) { +- pr_warning("%s: master_dev is not up in bond_enslave\n", +- bond_dev->name); +- } +- + /* already enslaved */ + if (slave_dev->flags & IFF_SLAVE) { + pr_debug("Error, Device was already enslaved\n"); +@@ -5157,9 +5151,19 @@ static int bond_init(struct net_device * + { + struct bonding *bond = netdev_priv(bond_dev); + struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); ++ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + pr_debug("Begin bond_init for %s\n", bond_dev->name); + ++ /* ++ * Initialize locks that may be required during ++ * en/deslave operations. All of the bond_open work ++ * (of which this is part) should really be moved to ++ * a phase prior to dev_open ++ */ ++ spin_lock_init(&(bond_info->tx_hashtbl_lock)); ++ spin_lock_init(&(bond_info->rx_hashtbl_lock)); ++ + bond->wq = create_singlethread_workqueue(bond_dev->name); + if (!bond->wq) + return -ENOMEM; +--- a/drivers/net/bonding/bond_sysfs.c ++++ b/drivers/net/bonding/bond_sysfs.c +@@ -224,12 +224,6 @@ static ssize_t bonding_store_slaves(stru + struct net_device *dev; + struct bonding *bond = to_bond(d); + +- /* Quick sanity check -- is the bond interface up? */ +- if (!(bond->dev->flags & IFF_UP)) { +- pr_warning("%s: doing slave updates when interface is down.\n", +- bond->dev->name); +- } +- + if (!rtnl_trylock()) + return restart_syscall(); + diff --git a/queue-2.6.38/bridge-fix-forwarding-of-ipv6.patch b/queue-2.6.38/bridge-fix-forwarding-of-ipv6.patch new file mode 100644 index 0000000000..2a63af80f5 --- /dev/null +++ b/queue-2.6.38/bridge-fix-forwarding-of-ipv6.patch @@ -0,0 +1,35 @@ +From fc9fdc008b37acd8e306b187736b723d1e8787a7 Mon Sep 17 00:00:00 2001 +From: Stephen Hemminger <shemminger@vyatta.com> +Date: Fri, 13 May 2011 16:03:24 -0400 +Subject: bridge: fix forwarding of IPv6 + + +From: Stephen Hemminger <shemminger@vyatta.com> + +[ Upstream commit cb68552858c64db302771469b1202ea09e696329 ] + +The commit 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e + bridge: Reset IPCB when entering IP stack on NF_FORWARD +broke forwarding of IPV6 packets in bridge because it would +call bp_parse_ip_options with an IPV6 packet. + +Reported-by: Noah Meyerhans <noahm@debian.org> +Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> +Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/bridge/br_netfilter.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_netfilter.c ++++ b/net/bridge/br_netfilter.c +@@ -739,7 +739,7 @@ static unsigned int br_nf_forward_ip(uns + nf_bridge->mask |= BRNF_PKT_TYPE; + } + +- if (br_parse_ip_options(skb)) ++ if (pf == PF_INET && br_parse_ip_options(skb)) + return NF_DROP; + + /* The physdev module checks on this */ diff --git a/queue-2.6.38/ieee802154-remove-hacked-cflags-in-net-ieee802154-makefile.patch b/queue-2.6.38/ieee802154-remove-hacked-cflags-in-net-ieee802154-makefile.patch new file mode 100644 index 0000000000..1ebb426b5b --- /dev/null +++ b/queue-2.6.38/ieee802154-remove-hacked-cflags-in-net-ieee802154-makefile.patch @@ -0,0 +1,31 @@ +From fe6c16688854a5d9799c65ee7adc9b0e1d86df93 Mon Sep 17 00:00:00 2001 +From: David S. Miller <davem@davemloft.net> +Date: Tue, 12 Apr 2011 15:33:23 -0700 +Subject: ieee802154: Remove hacked CFLAGS in net/ieee802154/Makefile + + +From: David S. Miller <davem@davemloft.net> + +[ Upstream commit bfac3693c426d280b026f6a1b77dc2294ea43fea ] + +It adds -Wall (which the kernel carefully controls already) and of all +things -DDEBUG (which should be set by other means if desired, please +we have dynamic-debug these days). + +Kill this noise. + +Reported-by: Dave Jones <davej@redhat.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/ieee802154/Makefile | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/ieee802154/Makefile ++++ b/net/ieee802154/Makefile +@@ -1,5 +1,3 @@ + obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o + ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o + af_802154-y := af_ieee802154.o raw.o dgram.o +- +-ccflags-y += -Wall -DDEBUG diff --git a/queue-2.6.38/igmp-call-ip_mc_clear_src-only-when-we-have-no-users-of-ip_mc_list.patch b/queue-2.6.38/igmp-call-ip_mc_clear_src-only-when-we-have-no-users-of-ip_mc_list.patch new file mode 100644 index 0000000000..739471c440 --- /dev/null +++ b/queue-2.6.38/igmp-call-ip_mc_clear_src-only-when-we-have-no-users-of-ip_mc_list.patch @@ -0,0 +1,71 @@ +From 802d2fd9b43a5cf7cafca72683c7fb0458b6cab2 Mon Sep 17 00:00:00 2001 +From: Veaceslav Falico <vfalico@redhat.com> +Date: Mon, 23 May 2011 23:15:05 +0000 +Subject: igmp: call ip_mc_clear_src() only when we have no users of ip_mc_list + + +From: Veaceslav Falico <vfalico@redhat.com> + +[ Upstream commit 24cf3af3fed5edcf90bc2a0ed181e6ce1513d2dc ] + +In igmp_group_dropped() we call ip_mc_clear_src(), which resets the number +of source filters per mulitcast. However, igmp_group_dropped() is also +called on NETDEV_DOWN, NETDEV_PRE_TYPE_CHANGE and NETDEV_UNREGISTER, which +means that the group might get added back on NETDEV_UP, NETDEV_REGISTER and +NETDEV_POST_TYPE_CHANGE respectively, leaving us with broken source +filters. + +To fix that, we must clear the source filters only when there are no users +in the ip_mc_list, i.e. in ip_mc_dec_group() and on device destroy. + +Acked-by: David L Stevens <dlstevens@us.ibm.com> +Signed-off-by: Veaceslav Falico <vfalico@redhat.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/ipv4/igmp.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -1172,20 +1172,18 @@ static void igmp_group_dropped(struct ip + + if (!in_dev->dead) { + if (IGMP_V1_SEEN(in_dev)) +- goto done; ++ return; + if (IGMP_V2_SEEN(in_dev)) { + if (reporter) + igmp_send_report(in_dev, im, IGMP_HOST_LEAVE_MESSAGE); +- goto done; ++ return; + } + /* IGMPv3 */ + igmpv3_add_delrec(in_dev, im); + + igmp_ifc_event(in_dev); + } +-done: + #endif +- ip_mc_clear_src(im); + } + + static void igmp_group_added(struct ip_mc_list *im) +@@ -1322,6 +1320,7 @@ void ip_mc_dec_group(struct in_device *i + *ip = i->next_rcu; + in_dev->mc_count--; + igmp_group_dropped(i); ++ ip_mc_clear_src(i); + + if (!in_dev->dead) + ip_rt_multicast_event(in_dev); +@@ -1431,7 +1430,8 @@ void ip_mc_destroy_dev(struct in_device + in_dev->mc_list = i->next_rcu; + in_dev->mc_count--; + +- igmp_group_dropped(i); ++ /* We've dropped the groups in ip_mc_down already */ ++ ip_mc_clear_src(i); + ip_ma_put(i); + } + } diff --git a/queue-2.6.38/inetpeer-reduce-stack-usage.patch b/queue-2.6.38/inetpeer-reduce-stack-usage.patch new file mode 100644 index 0000000000..417efd627d --- /dev/null +++ b/queue-2.6.38/inetpeer-reduce-stack-usage.patch @@ -0,0 +1,115 @@ +From b55f6119c24685770e32649092fe25f06d40f573 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <eric.dumazet@gmail.com> +Date: Mon, 11 Apr 2011 22:39:40 +0000 +Subject: inetpeer: reduce stack usage + + +From: Eric Dumazet <eric.dumazet@gmail.com> + +[ Upstream commit 66944e1c5797562cebe2d1857d46dff60bf9a69e ] + +On 64bit arches, we use 752 bytes of stack when cleanup_once() is called +from inet_getpeer(). + +Lets share the avl stack to save ~376 bytes. + +Before patch : + +# objdump -d net/ipv4/inetpeer.o | scripts/checkstack.pl + +0x000006c3 unlink_from_pool [inetpeer.o]: 376 +0x00000721 unlink_from_pool [inetpeer.o]: 376 +0x00000cb1 inet_getpeer [inetpeer.o]: 376 +0x00000e6d inet_getpeer [inetpeer.o]: 376 +0x0004 inet_initpeers [inetpeer.o]: 112 +# size net/ipv4/inetpeer.o + text data bss dec hex filename + 5320 432 21 5773 168d net/ipv4/inetpeer.o + +After patch : + +objdump -d net/ipv4/inetpeer.o | scripts/checkstack.pl +0x00000c11 inet_getpeer [inetpeer.o]: 376 +0x00000dcd inet_getpeer [inetpeer.o]: 376 +0x00000ab9 peer_check_expire [inetpeer.o]: 328 +0x00000b7f peer_check_expire [inetpeer.o]: 328 +0x0004 inet_initpeers [inetpeer.o]: 112 +# size net/ipv4/inetpeer.o + text data bss dec hex filename + 5163 432 21 5616 15f0 net/ipv4/inetpeer.o + +Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> +Cc: Scot Doyle <lkml@scotdoyle.com> +Cc: Stephen Hemminger <shemminger@vyatta.com> +Cc: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com> +Reviewed-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/ipv4/inetpeer.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/net/ipv4/inetpeer.c ++++ b/net/ipv4/inetpeer.c +@@ -366,7 +366,8 @@ static void inetpeer_free_rcu(struct rcu + } + + /* May be called with local BH enabled. */ +-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) ++static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, ++ struct inet_peer __rcu **stack[PEER_MAXDEPTH]) + { + int do_free; + +@@ -380,7 +381,6 @@ static void unlink_from_pool(struct inet + * We use refcnt=-1 to alert lockless readers this entry is deleted. + */ + if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { +- struct inet_peer __rcu **stack[PEER_MAXDEPTH]; + struct inet_peer __rcu ***stackptr, ***delp; + if (lookup(&p->daddr, stack, base) != p) + BUG(); +@@ -435,7 +435,7 @@ static struct inet_peer_base *peer_to_ba + } + + /* May be called with local BH enabled. */ +-static int cleanup_once(unsigned long ttl) ++static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) + { + struct inet_peer *p = NULL; + +@@ -467,7 +467,7 @@ static int cleanup_once(unsigned long tt + * happen because of entry limits in route cache. */ + return -1; + +- unlink_from_pool(p, peer_to_base(p)); ++ unlink_from_pool(p, peer_to_base(p), stack); + return 0; + } + +@@ -523,7 +523,7 @@ struct inet_peer *inet_getpeer(struct in + + if (base->total >= inet_peer_threshold) + /* Remove one less-recently-used entry. */ +- cleanup_once(0); ++ cleanup_once(0, stack); + + return p; + } +@@ -539,6 +539,7 @@ static void peer_check_expire(unsigned l + { + unsigned long now = jiffies; + int ttl, total; ++ struct inet_peer __rcu **stack[PEER_MAXDEPTH]; + + total = compute_total(); + if (total >= inet_peer_threshold) +@@ -547,7 +548,7 @@ static void peer_check_expire(unsigned l + ttl = inet_peer_maxttl + - (inet_peer_maxttl - inet_peer_minttl) / HZ * + total / inet_peer_threshold * HZ; +- while (!cleanup_once(ttl)) { ++ while (!cleanup_once(ttl, stack)) { + if (jiffies != now) + break; + } diff --git a/queue-2.6.38/ipv6-remove-hoplimit-initialization-to-1.patch b/queue-2.6.38/ipv6-remove-hoplimit-initialization-to-1.patch new file mode 100644 index 0000000000..4ff5807322 --- /dev/null +++ b/queue-2.6.38/ipv6-remove-hoplimit-initialization-to-1.patch @@ -0,0 +1,34 @@ +From c27f8c1fe27db3f30f9c9313158007e5444c802a Mon Sep 17 00:00:00 2001 +From: Thomas Egerer <thomas.egerer@secunet.com> +Date: Wed, 20 Apr 2011 22:56:02 +0000 +Subject: ipv6: Remove hoplimit initialization to -1 + + +From: Thomas Egerer <thomas.egerer@secunet.com> + +[ Upstream commit e965c05dabdabb85af0187952ccd75e43995c4b3 ] + +The changes introduced with git-commit a02e4b7d ("ipv6: Demark default +hoplimit as zero.") missed to remove the hoplimit initialization. As a +result, ipv6_get_mtu interprets the return value of dst_metric_raw +(-1) as 255 and answers ping6 with this hoplimit. This patche removes +the line such that ping6 is answered with the hoplimit value +configured via sysctl. + +Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/ipv6/route.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1970,7 +1970,6 @@ struct rt6_info *addrconf_dst_alloc(stru + rt->dst.output = ip6_output; + rt->rt6i_dev = net->loopback_dev; + rt->rt6i_idev = idev; +- dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); + rt->dst.obsolete = -1; + + rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; diff --git a/queue-2.6.38/ipv6-udp-fix-the-wrong-headroom-check.patch b/queue-2.6.38/ipv6-udp-fix-the-wrong-headroom-check.patch new file mode 100644 index 0000000000..d0699e79bc --- /dev/null +++ b/queue-2.6.38/ipv6-udp-fix-the-wrong-headroom-check.patch @@ -0,0 +1,39 @@ +From f3e703221bb1ee6ae68879bea3942e7a77d9dd10 Mon Sep 17 00:00:00 2001 +From: Shan Wei <shanwei@cn.fujitsu.com> +Date: Tue, 19 Apr 2011 22:52:49 +0000 +Subject: ipv6: udp: fix the wrong headroom check + + +From: Shan Wei <shanwei@cn.fujitsu.com> + +[ Upstream commit a9cf73ea7ff78f52662c8658d93c226effbbedde ] + +At this point, skb->data points to skb_transport_header. +So, headroom check is wrong. + +For some case:bridge(UFO is on) + eth device(UFO is off), +there is no enough headroom for IPv6 frag head. +But headroom check is always false. + +This will bring about data be moved to there prior to skb->head, +when adding IPv6 frag header to skb. + +Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com> +Acked-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/ipv6/udp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -1339,7 +1339,7 @@ static struct sk_buff *udp6_ufo_fragment + skb->ip_summed = CHECKSUM_NONE; + + /* Check if there is enough headroom to insert fragment header. */ +- if ((skb_headroom(skb) < frag_hdr_sz) && ++ if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && + pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) + goto out; + diff --git a/queue-2.6.38/irda-fix-locking-unbalance-in-irda_sendmsg.patch b/queue-2.6.38/irda-fix-locking-unbalance-in-irda_sendmsg.patch new file mode 100644 index 0000000000..8da7e951e4 --- /dev/null +++ b/queue-2.6.38/irda-fix-locking-unbalance-in-irda_sendmsg.patch @@ -0,0 +1,76 @@ +From 32db36b060e08f3eb3ee9bb0bc2457b5f480286a Mon Sep 17 00:00:00 2001 +From: Dave Jones <davej@redhat.com> +Date: Tue, 12 Apr 2011 15:29:54 -0700 +Subject: irda: fix locking unbalance in irda_sendmsg + + +From: Dave Jones <davej@redhat.com> + +[ Upstream commit 020318d0d2af51e0fd59ba654ede9b2171558720 ] + +5b40964eadea40509d353318d2c82e8b7bf5e8a5 ("irda: Remove BKL instances +from af_irda.c") introduced a path where we have a locking unbalance. +If we pass invalid flags, we unlock a socket we never locked, +resulting in this... + +===================================== +[ BUG: bad unlock balance detected! ] +------------------------------------- +trinity/20101 is trying to release lock (sk_lock-AF_IRDA) at: +[<ffffffffa057f001>] irda_sendmsg+0x207/0x21d [irda] +but there are no more locks to release! + +other info that might help us debug this: +no locks held by trinity/20101. + +stack backtrace: +Pid: 20101, comm: trinity Not tainted 2.6.39-rc3+ #3 +Call Trace: + [<ffffffffa057f001>] ? irda_sendmsg+0x207/0x21d [irda] + [<ffffffff81085041>] print_unlock_inbalance_bug+0xc7/0xd2 + [<ffffffffa057f001>] ? irda_sendmsg+0x207/0x21d [irda] + [<ffffffff81086aca>] lock_release+0xcf/0x18e + [<ffffffff813ed190>] release_sock+0x2d/0x155 + [<ffffffffa057f001>] irda_sendmsg+0x207/0x21d [irda] + [<ffffffff813e9f8c>] __sock_sendmsg+0x69/0x75 + [<ffffffff813ea105>] sock_sendmsg+0xa1/0xb6 + [<ffffffff81100ca3>] ? might_fault+0x5c/0xac + [<ffffffff81086b7c>] ? lock_release+0x181/0x18e + [<ffffffff81100cec>] ? might_fault+0xa5/0xac + [<ffffffff81100ca3>] ? might_fault+0x5c/0xac + [<ffffffff81133b94>] ? fcheck_files+0xb9/0xf0 + [<ffffffff813f387a>] ? copy_from_user+0x2f/0x31 + [<ffffffff813f3b70>] ? verify_iovec+0x52/0xa6 + [<ffffffff813eb4e3>] sys_sendmsg+0x23a/0x2b8 + [<ffffffff81086b7c>] ? lock_release+0x181/0x18e + [<ffffffff810773c6>] ? up_read+0x28/0x2c + [<ffffffff814bec3d>] ? do_page_fault+0x360/0x3b4 + [<ffffffff81087043>] ? trace_hardirqs_on_caller+0x10b/0x12f + [<ffffffff810458aa>] ? finish_task_switch+0xb2/0xe3 + [<ffffffff8104583e>] ? finish_task_switch+0x46/0xe3 + [<ffffffff8108364a>] ? trace_hardirqs_off_caller+0x33/0x90 + [<ffffffff814bbaf9>] ? retint_swapgs+0x13/0x1b + [<ffffffff81087043>] ? trace_hardirqs_on_caller+0x10b/0x12f + [<ffffffff810a9dd3>] ? audit_syscall_entry+0x11c/0x148 + [<ffffffff8125609e>] ? trace_hardirqs_on_thunk+0x3a/0x3f + [<ffffffff814c22c2>] system_call_fastpath+0x16/0x1b + +Signed-off-by: Dave Jones <davej@redhat.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/irda/af_irda.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -1297,8 +1297,7 @@ static int irda_sendmsg(struct kiocb *io + /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ + if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | + MSG_NOSIGNAL)) { +- err = -EINVAL; +- goto out; ++ return -EINVAL; + } + + lock_sock(sk); diff --git a/queue-2.6.38/macvlan-fix-panic-if-lowerdev-in-a-bond.patch b/queue-2.6.38/macvlan-fix-panic-if-lowerdev-in-a-bond.patch new file mode 100644 index 0000000000..91aa948aed --- /dev/null +++ b/queue-2.6.38/macvlan-fix-panic-if-lowerdev-in-a-bond.patch @@ -0,0 +1,45 @@ +From db5a55536b23676675149df8c521d7cb90d9ea36 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <eric.dumazet@gmail.com> +Date: Fri, 20 May 2011 14:59:23 -0400 +Subject: macvlan: fix panic if lowerdev in a bond + + +From: Eric Dumazet <eric.dumazet@gmail.com> + +[ Upstream commit d93515611bbc70c2fe4db232e5feb448ed8e4cc9 ] + +commit a35e2c1b6d905 (macvlan: use rx_handler_data pointer to store +macvlan_port pointer V2) added a bug in macvlan_port_create() + +Steps to reproduce the bug: + +# ifenslave bond0 eth0 eth1 + +# ip link add link eth0 up name eth0#1 type macvlan +->error EBUSY + +# ip link add link eth0 up name eth0#1 type macvlan +->panic + +Fix: Dont set IFF_MACVLAN_PORT in error case. + +Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + drivers/net/macvlan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -586,8 +586,8 @@ static int macvlan_port_create(struct ne + err = netdev_rx_handler_register(dev, macvlan_handle_frame, port); + if (err) + kfree(port); +- +- dev->priv_flags |= IFF_MACVLAN_PORT; ++ else ++ dev->priv_flags |= IFF_MACVLAN_PORT; + return err; + } + diff --git a/queue-2.6.38/net-add-skb_dst_force-in-sock_queue_err_skb.patch b/queue-2.6.38/net-add-skb_dst_force-in-sock_queue_err_skb.patch new file mode 100644 index 0000000000..10a5aa8224 --- /dev/null +++ b/queue-2.6.38/net-add-skb_dst_force-in-sock_queue_err_skb.patch @@ -0,0 +1,73 @@ +From af693fa5652343376cbe07dea2181f188e8adeeb Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <eric.dumazet@gmail.com> +Date: Wed, 18 May 2011 02:21:31 -0400 +Subject: net: add skb_dst_force() in sock_queue_err_skb() + + +From: Eric Dumazet <eric.dumazet@gmail.com> + +[ Upstream commit abb57ea48fd9431fa320a5c55f73e6b5a44c2efb ] + +Commit 7fee226ad239 (add a noref bit on skb dst) forgot to use +skb_dst_force() on packets queued in sk_error_queue + +This triggers following warning, for applications using IP_CMSG_PKTINFO +receiving one error status + +------------[ cut here ]------------ +WARNING: at include/linux/skbuff.h:457 ip_cmsg_recv_pktinfo+0xa6/0xb0() +Hardware name: 2669UYD +Modules linked in: isofs vboxnetadp vboxnetflt nfsd ebtable_nat ebtables +lib80211_crypt_ccmp uinput xcbc hdaps tp_smapi thinkpad_ec radeonfb fb_ddc +radeon ttm drm_kms_helper drm ipw2200 intel_agp intel_gtt libipw i2c_algo_bit +i2c_i801 agpgart rng_core cfbfillrect cfbcopyarea cfbimgblt video raid10 raid1 +raid0 linear md_mod vboxdrv +Pid: 4697, comm: miredo Not tainted 2.6.39-rc6-00569-g5895198-dirty #22 +Call Trace: + [<c17746b6>] ? printk+0x1d/0x1f + [<c1058302>] warn_slowpath_common+0x72/0xa0 + [<c15bbca6>] ? ip_cmsg_recv_pktinfo+0xa6/0xb0 + [<c15bbca6>] ? ip_cmsg_recv_pktinfo+0xa6/0xb0 + [<c1058350>] warn_slowpath_null+0x20/0x30 + [<c15bbca6>] ip_cmsg_recv_pktinfo+0xa6/0xb0 + [<c15bbdd7>] ip_cmsg_recv+0x127/0x260 + [<c154f82d>] ? skb_dequeue+0x4d/0x70 + [<c1555523>] ? skb_copy_datagram_iovec+0x53/0x300 + [<c178e834>] ? sub_preempt_count+0x24/0x50 + [<c15bdd2d>] ip_recv_error+0x23d/0x270 + [<c15de554>] udp_recvmsg+0x264/0x2b0 + [<c15ea659>] inet_recvmsg+0xd9/0x130 + [<c1547752>] sock_recvmsg+0xf2/0x120 + [<c11179cb>] ? might_fault+0x4b/0xa0 + [<c15546bc>] ? verify_iovec+0x4c/0xc0 + [<c1547660>] ? sock_recvmsg_nosec+0x100/0x100 + [<c1548294>] __sys_recvmsg+0x114/0x1e0 + [<c1093895>] ? __lock_acquire+0x365/0x780 + [<c1148b66>] ? fget_light+0xa6/0x3e0 + [<c1148b7f>] ? fget_light+0xbf/0x3e0 + [<c1148aee>] ? fget_light+0x2e/0x3e0 + [<c1549f29>] sys_recvmsg+0x39/0x60 + +Close bug https://bugzilla.kernel.org/show_bug.cgi?id=34622 + +Reported-by: Witold Baryluk <baryluk@smp.if.uj.edu.pl> +Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> +CC: Stephen Hemminger <shemminger@vyatta.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/core/skbuff.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2997,6 +2997,9 @@ int sock_queue_err_skb(struct sock *sk, + skb->destructor = sock_rmem_free; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + ++ /* before exiting rcu section, make sure dst is refcounted */ ++ skb_dst_force(skb); ++ + skb_queue_tail(&sk->sk_error_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb->len); diff --git a/queue-2.6.38/net-do-not-wrap-sysctl-igmp_max_memberships-in-ip_multicast.patch b/queue-2.6.38/net-do-not-wrap-sysctl-igmp_max_memberships-in-ip_multicast.patch new file mode 100644 index 0000000000..f41542b645 --- /dev/null +++ b/queue-2.6.38/net-do-not-wrap-sysctl-igmp_max_memberships-in-ip_multicast.patch @@ -0,0 +1,44 @@ +From ad19102cd6809dab398b200b66312e4dad72ae2a Mon Sep 17 00:00:00 2001 +From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se> +Date: Tue, 12 Apr 2011 13:59:33 -0700 +Subject: net: Do not wrap sysctl igmp_max_memberships in IP_MULTICAST + + +From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se> + +[ Upstream commit 192910a6cca5e50e5bd6cbd1da0e7376c7adfe62 ] + +controlling igmp_max_membership is useful even when IP_MULTICAST +is off. +Quagga(an OSPF deamon) uses multicast addresses for all interfaces +using a single socket and hits igmp_max_membership limit when +there are 20 interfaces or more. +Always export sysctl igmp_max_memberships in proc, just like +igmp_max_msf + +Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/ipv4/sysctl_net_ipv4.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/net/ipv4/sysctl_net_ipv4.c ++++ b/net/ipv4/sysctl_net_ipv4.c +@@ -311,7 +311,6 @@ static struct ctl_table ipv4_table[] = { + .mode = 0644, + .proc_handler = proc_do_large_bitmap, + }, +-#ifdef CONFIG_IP_MULTICAST + { + .procname = "igmp_max_memberships", + .data = &sysctl_igmp_max_memberships, +@@ -319,8 +318,6 @@ static struct ctl_table ipv4_table[] = { + .mode = 0644, + .proc_handler = proc_dointvec + }, +- +-#endif + { + .procname = "igmp_max_msf", + .data = &sysctl_igmp_max_msf, diff --git a/queue-2.6.38/net-use-hlist_del_rcu-in-dev_change_name.patch b/queue-2.6.38/net-use-hlist_del_rcu-in-dev_change_name.patch new file mode 100644 index 0000000000..f4fc206c1f --- /dev/null +++ b/queue-2.6.38/net-use-hlist_del_rcu-in-dev_change_name.patch @@ -0,0 +1,34 @@ +From be39612bb5da45d19a5429f63d05b9c247316262 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <eric.dumazet@gmail.com> +Date: Tue, 17 May 2011 13:56:59 -0400 +Subject: net: use hlist_del_rcu() in dev_change_name() + + +From: Eric Dumazet <eric.dumazet@gmail.com> + +[ Upstream commit 372b2312010bece1e36f577d6c99a6193ec54cbd ] + +Using plain hlist_del() in dev_change_name() is wrong since a +concurrent reader can crash trying to dereference LIST_POISON1. + +Bug introduced in commit 72c9528bab94 (net: Introduce +dev_get_by_name_rcu()) + +Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/core/dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1006,7 +1006,7 @@ rollback: + } + + write_lock_bh(&dev_base_lock); +- hlist_del(&dev->name_hlist); ++ hlist_del_rcu(&dev->name_hlist); + write_unlock_bh(&dev_base_lock); + + synchronize_rcu(); diff --git a/queue-2.6.38/revert-bridge-forward-reserved-group-addresses-if-stp.patch b/queue-2.6.38/revert-bridge-forward-reserved-group-addresses-if-stp.patch new file mode 100644 index 0000000000..ee2380e5fa --- /dev/null +++ b/queue-2.6.38/revert-bridge-forward-reserved-group-addresses-if-stp.patch @@ -0,0 +1,37 @@ +From 6f0c8ad3193361f71ed4430a41afda2f7d641c07 Mon Sep 17 00:00:00 2001 +From: David S. Miller <davem@davemloft.net> +Date: Thu, 21 Apr 2011 21:17:25 -0700 +Subject: Revert "bridge: Forward reserved group addresses if !STP" + + +From: David S. Miller <davem@davemloft.net> + +[ Upstream commit f01cb5fbea1c1613621f9f32f385e12c1a29dde0 ] + +This reverts commit 1e253c3b8a1aeed51eef6fc366812f219b97de65. + +It breaks 802.3ad bonding inside of a bridge. + +The commit was meant to support transport bridging, and specifically +virtual machines bridged to an ethernet interface connected to a +switch port wiht 802.1x enabled. + +But this isn't the way to do it, it breaks too many other things. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/bridge/br_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -163,7 +163,7 @@ struct sk_buff *br_handle_frame(struct s + goto drop; + + /* If STP is turned off, then forward */ +- if (p->br->stp_enabled == BR_NO_STP) ++ if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) + goto forward; + + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, diff --git a/queue-2.6.38/revert-tcp-disallow-bind-to-reuse-addr-port.patch b/queue-2.6.38/revert-tcp-disallow-bind-to-reuse-addr-port.patch new file mode 100644 index 0000000000..c608fa1661 --- /dev/null +++ b/queue-2.6.38/revert-tcp-disallow-bind-to-reuse-addr-port.patch @@ -0,0 +1,59 @@ +From 7f6291368cf25321b3377c9555572e37f3e7cd03 Mon Sep 17 00:00:00 2001 +From: David S. Miller <davem@davemloft.net> +Date: Wed, 13 Apr 2011 12:01:14 -0700 +Subject: Revert "tcp: disallow bind() to reuse addr/port" + + +From: David S. Miller <davem@davemloft.net> + +[ Upstream commit 3e8c806a08c7beecd972e7ce15c570b9aba64baa ] + +This reverts commit c191a836a908d1dd6b40c503741f91b914de3348. + +It causes known regressions for programs that expect to be able to use +SO_REUSEADDR to shutdown a socket, then successfully rebind another +socket to the same ID. + +Programs such as haproxy and amavisd expect this to work. + +This should fix kernel bugzilla 32832. + +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/ipv4/inet_connection_sock.c | 5 ++--- + net/ipv6/inet6_connection_sock.c | 2 +- + 2 files changed, 3 insertions(+), 4 deletions(-) + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct + !sk2->sk_bound_dev_if || + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if (!reuse || !sk2->sk_reuse || +- ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { ++ sk2->sk_state == TCP_LISTEN) { + const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); + if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || + sk2_rcv_saddr == sk_rcv_saddr(sk)) +@@ -122,8 +122,7 @@ again: + (tb->num_owners < smallest_size || smallest_size == -1)) { + smallest_size = tb->num_owners; + smallest_rover = rover; +- if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && +- !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { ++ if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { + spin_unlock(&head->lock); + snum = smallest_rover; + goto have_snum; +--- a/net/ipv6/inet6_connection_sock.c ++++ b/net/ipv6/inet6_connection_sock.c +@@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct + !sk2->sk_bound_dev_if || + sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && + (!sk->sk_reuse || !sk2->sk_reuse || +- ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) && ++ sk2->sk_state == TCP_LISTEN) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; + } diff --git a/queue-2.6.38/sch_sfq-avoid-giving-spurious-net_xmit_cn-signals.patch b/queue-2.6.38/sch_sfq-avoid-giving-spurious-net_xmit_cn-signals.patch new file mode 100644 index 0000000000..833d8669d9 --- /dev/null +++ b/queue-2.6.38/sch_sfq-avoid-giving-spurious-net_xmit_cn-signals.patch @@ -0,0 +1,59 @@ +From 979ae42cf9f1cb7e48358a9301c91802653dc6ec Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <eric.dumazet@gmail.com> +Date: Mon, 23 May 2011 11:02:42 +0000 +Subject: sch_sfq: avoid giving spurious NET_XMIT_CN signals + + +From: Eric Dumazet <eric.dumazet@gmail.com> + +[ Upstream commit 8efa885406359af300d46910642b50ca82c0fe47 ] + +While chasing a possible net_sched bug, I found that IP fragments have +litle chance to pass a congestioned SFQ qdisc : + +- Say SFQ qdisc is full because one flow is non responsive. +- ip_fragment() wants to send two fragments belonging to an idle flow. +- sfq_enqueue() queues first packet, but see queue limit reached : +- sfq_enqueue() drops one packet from 'big consumer', and returns +NET_XMIT_CN. +- ip_fragment() cancel remaining fragments. + +This patch restores fairness, making sure we return NET_XMIT_CN only if +we dropped a packet from the same flow. + +Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> +CC: Patrick McHardy <kaber@trash.net> +CC: Jarek Poplawski <jarkao2@gmail.com> +CC: Jamal Hadi Salim <hadi@cyberus.ca> +CC: Stephen Hemminger <shemminger@vyatta.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/sched/sch_sfq.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -361,7 +361,7 @@ sfq_enqueue(struct sk_buff *skb, struct + { + struct sfq_sched_data *q = qdisc_priv(sch); + unsigned int hash; +- sfq_index x; ++ sfq_index x, qlen; + struct sfq_slot *slot; + int uninitialized_var(ret); + +@@ -405,8 +405,12 @@ sfq_enqueue(struct sk_buff *skb, struct + if (++sch->q.qlen <= q->limit) + return NET_XMIT_SUCCESS; + ++ qlen = slot->qlen; + sfq_drop(sch); +- return NET_XMIT_CN; ++ /* Return Congestion Notification only if we dropped a packet ++ * from this flow. ++ */ ++ return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS; + } + + static struct sk_buff * diff --git a/queue-2.6.38/sch_sfq-fix-peek-implementation.patch b/queue-2.6.38/sch_sfq-fix-peek-implementation.patch new file mode 100644 index 0000000000..6f6b152cb1 --- /dev/null +++ b/queue-2.6.38/sch_sfq-fix-peek-implementation.patch @@ -0,0 +1,57 @@ +From e43b1e770b5742a84481cb1ceb922ea287c95d5d Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <eric.dumazet@gmail.com> +Date: Wed, 25 May 2011 04:40:11 +0000 +Subject: sch_sfq: fix peek() implementation + + +From: Eric Dumazet <eric.dumazet@gmail.com> + +[ Upstream commit 07bd8df5df4369487812bf85a237322ff3569b77 ] + +Since commit eeaeb068f139 (sch_sfq: allow big packets and be fair), +sfq_peek() can return a different skb that would be normally dequeued by +sfq_dequeue() [ if current slot->allot is negative ] + +Use generic qdisc_peek_dequeued() instead of custom implementation, to +get consistent result. + +Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> +CC: Jarek Poplawski <jarkao2@gmail.com> +CC: Patrick McHardy <kaber@trash.net> +CC: Jesper Dangaard Brouer <hawk@diku.dk> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/sched/sch_sfq.c | 14 +------------- + 1 file changed, 1 insertion(+), 13 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -414,18 +414,6 @@ sfq_enqueue(struct sk_buff *skb, struct + } + + static struct sk_buff * +-sfq_peek(struct Qdisc *sch) +-{ +- struct sfq_sched_data *q = qdisc_priv(sch); +- +- /* No active slots */ +- if (q->tail == NULL) +- return NULL; +- +- return q->slots[q->tail->next].skblist_next; +-} +- +-static struct sk_buff * + sfq_dequeue(struct Qdisc *sch) + { + struct sfq_sched_data *q = qdisc_priv(sch); +@@ -683,7 +671,7 @@ static struct Qdisc_ops sfq_qdisc_ops __ + .priv_size = sizeof(struct sfq_sched_data), + .enqueue = sfq_enqueue, + .dequeue = sfq_dequeue, +- .peek = sfq_peek, ++ .peek = qdisc_peek_dequeued, + .drop = sfq_drop, + .init = sfq_init, + .reset = sfq_reset, diff --git a/queue-2.6.38/sctp-fix-memory-leak-of-the-asconf-queue-when-free-asoc.patch b/queue-2.6.38/sctp-fix-memory-leak-of-the-asconf-queue-when-free-asoc.patch new file mode 100644 index 0000000000..87331a5392 --- /dev/null +++ b/queue-2.6.38/sctp-fix-memory-leak-of-the-asconf-queue-when-free-asoc.patch @@ -0,0 +1,61 @@ +From 31f8d1e17a6da26d1fe73ba048de887b691e32ef Mon Sep 17 00:00:00 2001 +From: Wei Yongjun <yjwei@cn.fujitsu.com> +Date: Tue, 24 May 2011 21:48:02 +0000 +Subject: sctp: fix memory leak of the ASCONF queue when free asoc + + +From: Wei Yongjun <yjwei@cn.fujitsu.com> + +[ Upstream commit 8b4472cc13136d04727e399c6fdadf58d2218b0a ] + +If an ASCONF chunk is outstanding, then the following ASCONF +chunk will be queued for later transmission. But when we free +the asoc, we forget to free the ASCONF queue at the same time, +this will cause memory leak. + +Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/sctp/associola.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -64,6 +64,7 @@ + /* Forward declarations for internal functions. */ + static void sctp_assoc_bh_rcv(struct work_struct *work); + static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); ++static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc); + + /* Keep track of the new idr low so that we don't re-use association id + * numbers too fast. It is protected by they idr spin lock is in the +@@ -446,6 +447,9 @@ void sctp_association_free(struct sctp_a + /* Free any cached ASCONF_ACK chunk. */ + sctp_assoc_free_asconf_acks(asoc); + ++ /* Free the ASCONF queue. */ ++ sctp_assoc_free_asconf_queue(asoc); ++ + /* Free any cached ASCONF chunk. */ + if (asoc->addip_last_asconf) + sctp_chunk_free(asoc->addip_last_asconf); +@@ -1576,6 +1580,18 @@ retry: + return error; + } + ++/* Free the ASCONF queue */ ++static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc) ++{ ++ struct sctp_chunk *asconf; ++ struct sctp_chunk *tmp; ++ ++ list_for_each_entry_safe(asconf, tmp, &asoc->addip_chunk_list, list) { ++ list_del_init(&asconf->list); ++ sctp_chunk_free(asconf); ++ } ++} ++ + /* Free asconf_ack cache */ + static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc) + { diff --git a/queue-2.6.38/sctp-fix-race-between-sctp_bind_addr_free-and-sctp_bind_addr_conflict.patch b/queue-2.6.38/sctp-fix-race-between-sctp_bind_addr_free-and-sctp_bind_addr_conflict.patch new file mode 100644 index 0000000000..52d0af1642 --- /dev/null +++ b/queue-2.6.38/sctp-fix-race-between-sctp_bind_addr_free-and-sctp_bind_addr_conflict.patch @@ -0,0 +1,75 @@ +From 55bcd96df3d754e1240266827dcbfcb46a678bda Mon Sep 17 00:00:00 2001 +From: Jacek Luczak <difrost.kernel@gmail.com> +Date: Thu, 19 May 2011 09:55:13 +0000 +Subject: SCTP: fix race between sctp_bind_addr_free() and sctp_bind_addr_conflict() + + +From: Jacek Luczak <difrost.kernel@gmail.com> + +[ Upstream commit c182f90bc1f22ce5039b8722e45621d5f96862c2 ] + +During the sctp_close() call, we do not use rcu primitives to +destroy the address list attached to the endpoint. At the same +time, we do the removal of addresses from this list before +attempting to remove the socket from the port hash + +As a result, it is possible for another process to find the socket +in the port hash that is in the process of being closed. It then +proceeds to traverse the address list to find the conflict, only +to have that address list suddenly disappear without rcu() critical +section. + +Fix issue by closing address list removal inside RCU critical +section. + +Race can result in a kernel crash with general protection fault or +kernel NULL pointer dereference: + +kernel: general protection fault: 0000 [#1] SMP +kernel: RIP: 0010:[<ffffffffa02f3dde>] [<ffffffffa02f3dde>] sctp_bind_addr_conflict+0x64/0x82 [sctp] +kernel: Call Trace: +kernel: [<ffffffffa02f415f>] ? sctp_get_port_local+0x17b/0x2a3 [sctp] +kernel: [<ffffffffa02f3d45>] ? sctp_bind_addr_match+0x33/0x68 [sctp] +kernel: [<ffffffffa02f4416>] ? sctp_do_bind+0xd3/0x141 [sctp] +kernel: [<ffffffffa02f5030>] ? sctp_bindx_add+0x4d/0x8e [sctp] +kernel: [<ffffffffa02f5183>] ? sctp_setsockopt_bindx+0x112/0x4a4 [sctp] +kernel: [<ffffffff81089e82>] ? generic_file_aio_write+0x7f/0x9b +kernel: [<ffffffffa02f763e>] ? sctp_setsockopt+0x14f/0xfee [sctp] +kernel: [<ffffffff810c11fb>] ? do_sync_write+0xab/0xeb +kernel: [<ffffffff810e82ab>] ? fsnotify+0x239/0x282 +kernel: [<ffffffff810c2462>] ? alloc_file+0x18/0xb1 +kernel: [<ffffffff8134a0b1>] ? compat_sys_setsockopt+0x1a5/0x1d9 +kernel: [<ffffffff8134aaf1>] ? compat_sys_socketcall+0x143/0x1a4 +kernel: [<ffffffff810467dc>] ? sysenter_dispatch+0x7/0x32 + +Signed-off-by: Jacek Luczak <luczak.jacek@gmail.com> +Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com> +CC: Eric Dumazet <eric.dumazet@gmail.com> +Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/sctp/bind_addr.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/net/sctp/bind_addr.c ++++ b/net/sctp/bind_addr.c +@@ -140,14 +140,12 @@ void sctp_bind_addr_init(struct sctp_bin + /* Dispose of the address list. */ + static void sctp_bind_addr_clean(struct sctp_bind_addr *bp) + { +- struct sctp_sockaddr_entry *addr; +- struct list_head *pos, *temp; ++ struct sctp_sockaddr_entry *addr, *temp; + + /* Empty the bind address list. */ +- list_for_each_safe(pos, temp, &bp->address_list) { +- addr = list_entry(pos, struct sctp_sockaddr_entry, list); +- list_del(pos); +- kfree(addr); ++ list_for_each_entry_safe(addr, temp, &bp->address_list, list) { ++ list_del_rcu(&addr->list); ++ call_rcu(&addr->rcu, sctp_local_addr_free); + SCTP_DBG_OBJCNT_DEC(addr); + } + } diff --git a/queue-2.6.38/series b/queue-2.6.38/series index 31ae6c70a8..4281821e28 100644 --- a/queue-2.6.38/series +++ b/queue-2.6.38/series @@ -44,3 +44,24 @@ ext3-fix-fs-corruption-when-make_indexed_dir-fails.patch jbd-fix-forever-sleeping-process-in-do_get_write_access.patch jbd-fix-fsync-tid-wraparound-bug.patch ext4-release-page-cache-in-ext4_mb_load_buddy-error-path.patch +bonding-802.3ad-fix-agg_device_up.patch +bridge-fix-forwarding-of-ipv6.patch +ieee802154-remove-hacked-cflags-in-net-ieee802154-makefile.patch +irda-fix-locking-unbalance-in-irda_sendmsg.patch +inetpeer-reduce-stack-usage.patch +ipv6-remove-hoplimit-initialization-to-1.patch +ipv6-udp-fix-the-wrong-headroom-check.patch +macvlan-fix-panic-if-lowerdev-in-a-bond.patch +net-do-not-wrap-sysctl-igmp_max_memberships-in-ip_multicast.patch +net-use-hlist_del_rcu-in-dev_change_name.patch +revert-bridge-forward-reserved-group-addresses-if-stp.patch +revert-tcp-disallow-bind-to-reuse-addr-port.patch +sctp-fix-race-between-sctp_bind_addr_free-and-sctp_bind_addr_conflict.patch +tcp-len-check-is-unnecessarily-devastating-change-to-warn_on.patch +vlan-fix-gvrp-at-dismantle-time-mime-version-1.0.patch +igmp-call-ip_mc_clear_src-only-when-we-have-no-users-of-ip_mc_list.patch +net-add-skb_dst_force-in-sock_queue_err_skb.patch +sch_sfq-avoid-giving-spurious-net_xmit_cn-signals.patch +sctp-fix-memory-leak-of-the-asconf-queue-when-free-asoc.patch +sch_sfq-fix-peek-implementation.patch +bonding-prevent-deadlock-on-slave-store-with-alb-mode-v3.patch diff --git a/queue-2.6.38/tcp-len-check-is-unnecessarily-devastating-change-to-warn_on.patch b/queue-2.6.38/tcp-len-check-is-unnecessarily-devastating-change-to-warn_on.patch new file mode 100644 index 0000000000..8ed6ad935e --- /dev/null +++ b/queue-2.6.38/tcp-len-check-is-unnecessarily-devastating-change-to-warn_on.patch @@ -0,0 +1,35 @@ +From fe5727fa719f4b3fe8b8cba9dcedf7e044b5a9b6 Mon Sep 17 00:00:00 2001 +From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> +Date: Fri, 1 Apr 2011 21:47:41 -0700 +Subject: tcp: len check is unnecessarily devastating, change to WARN_ON + + +From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi> + +[ Upstream commit 2fceec13375e5d98ef033c6b0ee03943fc460950 ] + +All callers are prepared for alloc failures anyway, so this error +can safely be boomeranged to the callers domain without super +bad consequences. ...At worst the connection might go into a state +where each RTO tries to (unsuccessfully) re-fragment with such +a mis-sized value and eventually dies. + +Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/ipv4/tcp_output.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1003,7 +1003,8 @@ int tcp_fragment(struct sock *sk, struct + int nlen; + u8 flags; + +- BUG_ON(len > skb->len); ++ if (WARN_ON(len > skb->len)) ++ return -EINVAL; + + nsize = skb_headlen(skb) - len; + if (nsize < 0) diff --git a/queue-2.6.38/vlan-fix-gvrp-at-dismantle-time-mime-version-1.0.patch b/queue-2.6.38/vlan-fix-gvrp-at-dismantle-time-mime-version-1.0.patch new file mode 100644 index 0000000000..586974d11f --- /dev/null +++ b/queue-2.6.38/vlan-fix-gvrp-at-dismantle-time-mime-version-1.0.patch @@ -0,0 +1,96 @@ +From 38520bd1076bd0b886ec4b7824b85c057c27bd1c Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <eric.dumazet@gmail.com> +Date: Tue, 10 May 2011 12:22:54 -0700 +Subject: vlan: fix GVRP at dismantle time MIME-Version: 1.0 + + +From: Eric Dumazet <eric.dumazet@gmail.com> + +[ Upstream commit 0442277740ec56109c5b5f7bcfded299cf9e72bd ] + +ip link add link eth2 eth2.103 type vlan id 103 gvrp on loose_binding on +ip link set eth2.103 up +rmmod tg3 # driver providing eth2 + + BUG: unable to handle kernel NULL pointer dereference at (null) + IP: [<ffffffffa0030c9e>] garp_request_leave+0x3e/0xc0 [garp] + PGD 11d251067 PUD 11b9e0067 PMD 0 + Oops: 0000 [#1] SMP + last sysfs file: /sys/devices/virtual/net/eth2.104/ifindex + CPU 0 + Modules linked in: tg3(-) 8021q garp nfsd lockd auth_rpcgss sunrpc libphy sg [last unloaded: x_tables] + + Pid: 11494, comm: rmmod Tainted: G W 2.6.39-rc6-00261-gfd71257-dirty #580 HP ProLiant BL460c G6 + RIP: 0010:[<ffffffffa0030c9e>] [<ffffffffa0030c9e>] garp_request_leave+0x3e/0xc0 [garp] + RSP: 0018:ffff88007a19bae8 EFLAGS: 00010286 + RAX: 0000000000000000 RBX: ffff88011b5e2000 RCX: 0000000000000002 + RDX: 0000000000000000 RSI: 0000000000000175 RDI: ffffffffa0030d5b + RBP: ffff88007a19bb18 R08: 0000000000000001 R09: ffff88011bd64a00 + R10: ffff88011d34ec00 R11: 0000000000000000 R12: 0000000000000002 + R13: ffff88007a19bc48 R14: ffff88007a19bb88 R15: 0000000000000001 + FS: 0000000000000000(0000) GS:ffff88011fc00000(0063) knlGS:00000000f77d76c0 + CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b + CR2: 0000000000000000 CR3: 000000011a675000 CR4: 00000000000006f0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 + Process rmmod (pid: 11494, threadinfo ffff88007a19a000, task ffff8800798595c0) + Stack: + ffff88007a19bb36 ffff88011c84b800 ffff88011b5e2000 ffff88007a19bc48 + ffff88007a19bb88 0000000000000006 ffff88007a19bb38 ffffffffa003a5f6 + ffff88007a19bb38 670088007a19bba8 ffff88007a19bb58 ffffffffa00397e7 + Call Trace: + [<ffffffffa003a5f6>] vlan_gvrp_request_leave+0x46/0x50 [8021q] + [<ffffffffa00397e7>] vlan_dev_stop+0xb7/0xc0 [8021q] + [<ffffffff8137e427>] __dev_close_many+0x87/0xe0 + [<ffffffff8137e507>] dev_close_many+0x87/0x110 + [<ffffffff8137e630>] rollback_registered_many+0xa0/0x240 + [<ffffffff8137e7e9>] unregister_netdevice_many+0x19/0x60 + [<ffffffffa00389eb>] vlan_device_event+0x53b/0x550 [8021q] + [<ffffffff8143f448>] ? ip6mr_device_event+0xa8/0xd0 + [<ffffffff81479d03>] notifier_call_chain+0x53/0x80 + [<ffffffff81062539>] __raw_notifier_call_chain+0x9/0x10 + [<ffffffff81062551>] raw_notifier_call_chain+0x11/0x20 + [<ffffffff8137df82>] call_netdevice_notifiers+0x32/0x60 + [<ffffffff8137e69f>] rollback_registered_many+0x10f/0x240 + [<ffffffff8137e85f>] rollback_registered+0x2f/0x40 + [<ffffffff8137e8c8>] unregister_netdevice_queue+0x58/0x90 + [<ffffffff8137e9eb>] unregister_netdev+0x1b/0x30 + [<ffffffffa005d73f>] tg3_remove_one+0x6f/0x10b [tg3] + +We should call vlan_gvrp_request_leave() from unregister_vlan_dev(), +not from vlan_dev_stop(), because vlan_gvrp_uninit_applicant() +is called right after unregister_netdevice_queue(). In batch mode, +unregister_netdevice_queue() doesn’t immediately call vlan_dev_stop(). + +Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> +--- + net/8021q/vlan.c | 3 +++ + net/8021q/vlan_dev.c | 3 --- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/net/8021q/vlan.c ++++ b/net/8021q/vlan.c +@@ -124,6 +124,9 @@ void unregister_vlan_dev(struct net_devi + + grp->nr_vlans--; + ++ if (vlan->flags & VLAN_FLAG_GVRP) ++ vlan_gvrp_request_leave(dev); ++ + vlan_group_set_device(grp, vlan_id, NULL); + if (!grp->killall) + synchronize_net(); +--- a/net/8021q/vlan_dev.c ++++ b/net/8021q/vlan_dev.c +@@ -487,9 +487,6 @@ static int vlan_dev_stop(struct net_devi + struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + +- if (vlan->flags & VLAN_FLAG_GVRP) +- vlan_gvrp_request_leave(dev); +- + dev_mc_unsync(real_dev, dev); + dev_uc_unsync(real_dev, dev); + if (dev->flags & IFF_ALLMULTI) |