aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c90
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/esp6.c1
-rw-r--r--net/ipv6/exthdrs.c4
-rw-r--r--net/ipv6/inet6_connection_sock.c199
-rw-r--r--net/ipv6/inet6_hashtables.c183
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipcomp6.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c24
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c315
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c1
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c3
-rw-r--r--net/ipv6/netfilter/ip6t_dst.c4
-rw-r--r--net/ipv6/netfilter/ip6t_esp.c3
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c2
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c4
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c47
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c77
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/sit.c3
-rw-r--r--net/ipv6/tcp_ipv6.c639
-rw-r--r--net/ipv6/udp.c16
28 files changed, 864 insertions, 786 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 6460eec834b7b9..9601fd7f9d66f2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,7 +8,8 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
- ip6_flowlabel.o ipv6_syms.o netfilter.o
+ ip6_flowlabel.o ipv6_syms.o netfilter.o \
+ inet6_connection_sock.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
xfrm6_output.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a60585fd85ad02..704fb73e6c5ff3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1195,7 +1195,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
- const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d9546380fa048a..68afc53be6628a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -167,6 +167,7 @@ lookup_protocol:
sk->sk_reuse = 1;
inet = inet_sk(sk);
+ inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
if (SOCK_RAW == sock->type) {
inet->num = protocol;
@@ -389,6 +390,8 @@ int inet6_destroy_sock(struct sock *sk)
return 0;
}
+EXPORT_SYMBOL_GPL(inet6_destroy_sock);
+
/*
* This does both peername and sockname.
*/
@@ -431,7 +434,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
- int err = -EINVAL;
switch(cmd)
{
@@ -450,16 +452,15 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFDSTADDR:
return addrconf_set_dstaddr((void __user *) arg);
default:
- if (!sk->sk_prot->ioctl ||
- (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD)
- return(dev_ioctl(cmd,(void __user *) arg));
- return err;
+ if (!sk->sk_prot->ioctl)
+ return -ENOIOCTLCMD;
+ return sk->sk_prot->ioctl(sk, cmd, arg);
}
/*NOTREACHED*/
return(0);
}
-struct proto_ops inet6_stream_ops = {
+const struct proto_ops inet6_stream_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
.release = inet6_release,
@@ -480,7 +481,7 @@ struct proto_ops inet6_stream_ops = {
.sendpage = tcp_sendpage
};
-struct proto_ops inet6_dgram_ops = {
+const struct proto_ops inet6_dgram_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
.release = inet6_release,
@@ -508,7 +509,7 @@ static struct net_proto_family inet6_family_ops = {
};
/* Same as inet6_dgram_ops, sans udp_poll. */
-static struct proto_ops inet6_sockraw_ops = {
+static const struct proto_ops inet6_sockraw_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
.release = inet6_release,
@@ -609,6 +610,79 @@ inet6_unregister_protosw(struct inet_protosw *p)
}
}
+int inet6_sk_rebuild_header(struct sock *sk)
+{
+ int err;
+ struct dst_entry *dst;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ dst = __sk_dst_check(sk, np->dst_cookie);
+
+ if (dst == NULL) {
+ struct inet_sock *inet = inet_sk(sk);
+ struct in6_addr *final_p = NULL, final;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.proto = sk->sk_protocol;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+ fl.fl6_flowlabel = np->flow_label;
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fl_ip_dport = inet->dport;
+ fl.fl_ip_sport = inet->sport;
+
+ if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
+ }
+
+ err = ip6_dst_lookup(sk, &dst, &fl);
+ if (err) {
+ sk->sk_route_caps = 0;
+ return err;
+ }
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+ if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+ sk->sk_err_soft = -err;
+ return err;
+ }
+
+ ip6_dst_store(sk, dst, NULL);
+ sk->sk_route_caps = dst->dev->features &
+ ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
+
+int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet6_skb_parm *opt = IP6CB(skb);
+
+ if (np->rxopt.all) {
+ if ((opt->hop && (np->rxopt.bits.hopopts ||
+ np->rxopt.bits.ohopopts)) ||
+ ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) &&
+ np->rxopt.bits.rxflow) ||
+ (opt->srcrt && (np->rxopt.bits.srcrt ||
+ np->rxopt.bits.osrcrt)) ||
+ ((opt->dst1 || opt->dst0) &&
+ (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
+ return 1;
+ }
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
+
int
snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
{
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f3629730eb1579..13cc7f89558373 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <net/icmp.h>
#include <net/ipv6.h>
+#include <net/protocol.h>
#include <net/xfrm.h>
#include <asm/scatterlist.h>
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bfbe9970793cb..6de8ee1a5ad9ec 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
#include <linux/random.h>
#include <net/icmp.h>
#include <net/ipv6.h>
+#include <net/protocol.h>
#include <linux/icmpv6.h>
static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index be6faf311387ce..113374dc342c1f 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -413,6 +413,8 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
return opt;
}
+EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
+
/**********************************
Hop-by-hop options.
**********************************/
@@ -579,6 +581,8 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
return opt2;
}
+EXPORT_SYMBOL_GPL(ipv6_dup_options);
+
static int ipv6_renew_option(void *ohdr,
struct ipv6_opt_hdr __user *newopt, int newoptlen,
int inherit,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
new file mode 100644
index 00000000000000..792f90f0f9ece2
--- /dev/null
+++ b/net/ipv6/inet6_connection_sock.c
@@ -0,0 +1,199 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Support for INET6 connection oriented protocols.
+ *
+ * Authors: See the TCPv6 sources
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or(at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+
+#include <net/addrconf.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_ecn.h>
+#include <net/inet_hashtables.h>
+#include <net/ip6_route.h>
+#include <net/sock.h>
+
+int inet6_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb)
+{
+ const struct sock *sk2;
+ const struct hlist_node *node;
+
+ /* We must walk the whole port owner list in this case. -DaveM */
+ sk_for_each_bound(sk2, node, &tb->owners) {
+ if (sk != sk2 &&
+ (!sk->sk_bound_dev_if ||
+ !sk2->sk_bound_dev_if ||
+ sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+ (!sk->sk_reuse || !sk2->sk_reuse ||
+ sk2->sk_state == TCP_LISTEN) &&
+ ipv6_rcv_saddr_equal(sk, sk2))
+ break;
+ }
+
+ return node != NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
+
+/*
+ * request_sock (formerly open request) hash tables.
+ */
+static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport,
+ const u32 rnd, const u16 synq_hsize)
+{
+ u32 a = raddr->s6_addr32[0];
+ u32 b = raddr->s6_addr32[1];
+ u32 c = raddr->s6_addr32[2];
+
+ a += JHASH_GOLDEN_RATIO;
+ b += JHASH_GOLDEN_RATIO;
+ c += rnd;
+ __jhash_mix(a, b, c);
+
+ a += raddr->s6_addr32[3];
+ b += (u32)rport;
+ __jhash_mix(a, b, c);
+
+ return c & (synq_hsize - 1);
+}
+
+struct request_sock *inet6_csk_search_req(const struct sock *sk,
+ struct request_sock ***prevp,
+ const __u16 rport,
+ const struct in6_addr *raddr,
+ const struct in6_addr *laddr,
+ const int iif)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+ struct request_sock *req, **prev;
+
+ for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
+ lopt->hash_rnd,
+ lopt->nr_table_entries)];
+ (req = *prev) != NULL;
+ prev = &req->dl_next) {
+ const struct inet6_request_sock *treq = inet6_rsk(req);
+
+ if (inet_rsk(req)->rmt_port == rport &&
+ req->rsk_ops->family == AF_INET6 &&
+ ipv6_addr_equal(&treq->rmt_addr, raddr) &&
+ ipv6_addr_equal(&treq->loc_addr, laddr) &&
+ (!treq->iif || treq->iif == iif)) {
+ BUG_TRAP(req->sk == NULL);
+ *prevp = prev;
+ return req;
+ }
+ }
+
+ return NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_search_req);
+
+void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
+ struct request_sock *req,
+ const unsigned long timeout)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+ const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
+ inet_rsk(req)->rmt_port,
+ lopt->hash_rnd, lopt->nr_table_entries);
+
+ reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
+ inet_csk_reqsk_queue_added(sk, timeout);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+
+void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+
+ sin6->sin6_family = AF_INET6;
+ ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+ sin6->sin6_port = inet_sk(sk)->dport;
+ /* We do not store received flowlabel for TCP */
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_scope_id = 0;
+ if (sk->sk_bound_dev_if &&
+ ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6->sin6_scope_id = sk->sk_bound_dev_if;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
+
+int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
+{
+ struct sock *sk = skb->sk;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi fl;
+ struct dst_entry *dst;
+ struct in6_addr *final_p = NULL, final;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.proto = sk->sk_protocol;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+ fl.fl6_flowlabel = np->flow_label;
+ IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fl_ip_sport = inet->sport;
+ fl.fl_ip_dport = inet->dport;
+
+ if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
+ }
+
+ dst = __sk_dst_check(sk, np->dst_cookie);
+
+ if (dst == NULL) {
+ int err = ip6_dst_lookup(sk, &dst, &fl);
+
+ if (err) {
+ sk->sk_err_soft = -err;
+ return err;
+ }
+
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+ if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+ sk->sk_route_caps = 0;
+ return err;
+ }
+
+ ip6_dst_store(sk, dst, NULL);
+ sk->sk_route_caps = dst->dev->features &
+ ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+ }
+
+ skb->dst = dst_clone(dst);
+
+ /* Restore final destination back after routing done */
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+
+ return ip6_xmit(sk, skb, &fl, np->opt, 0);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 01d5f46d4e4005..4154f3a8b6cf3e 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -5,7 +5,8 @@
*
* Generic INET6 transport hashtables
*
- * Authors: Lotsa people, from code originally in tcp
+ * Authors: Lotsa people, from code originally in tcp, generalised here
+ * by Arnaldo Carvalho de Melo <acme@mandriva.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -14,12 +15,13 @@
*/
#include <linux/config.h>
-
#include <linux/module.h>
+#include <linux/random.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
+#include <net/ip.h>
struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
const struct in6_addr *daddr,
@@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
}
EXPORT_SYMBOL_GPL(inet6_lookup);
+
+static int __inet6_check_established(struct inet_timewait_death_row *death_row,
+ struct sock *sk, const __u16 lport,
+ struct inet_timewait_sock **twp)
+{
+ struct inet_hashinfo *hinfo = death_row->hashinfo;
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct in6_addr *daddr = &np->rcv_saddr;
+ const struct in6_addr *saddr = &np->daddr;
+ const int dif = sk->sk_bound_dev_if;
+ const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+ const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
+ inet->dport);
+ struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+ struct sock *sk2;
+ const struct hlist_node *node;
+ struct inet_timewait_sock *tw;
+
+ prefetch(head->chain.first);
+ write_lock(&head->lock);
+
+ /* Check TIME-WAIT sockets first. */
+ sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+ const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
+
+ tw = inet_twsk(sk2);
+
+ if(*((__u32 *)&(tw->tw_dport)) == ports &&
+ sk2->sk_family == PF_INET6 &&
+ ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) &&
+ ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+ if (twsk_unique(sk, sk2, twp))
+ goto unique;
+ else
+ goto not_unique;
+ }
+ }
+ tw = NULL;
+
+ /* And established part... */
+ sk_for_each(sk2, node, &head->chain) {
+ if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
+ goto not_unique;
+ }
+
+unique:
+ BUG_TRAP(sk_unhashed(sk));
+ __sk_add_node(sk, &head->chain);
+ sk->sk_hash = hash;
+ sock_prot_inc_use(sk->sk_prot);
+ write_unlock(&head->lock);
+
+ if (twp != NULL) {
+ *twp = tw;
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ } else if (tw != NULL) {
+ /* Silly. Should hash-dance instead... */
+ inet_twsk_deschedule(tw, death_row);
+ NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+ inet_twsk_put(tw);
+ }
+ return 0;
+
+not_unique:
+ write_unlock(&head->lock);
+ return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet6_sk_port_offset(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
+ np->daddr.s6_addr32,
+ inet->dport);
+}
+
+int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk)
+{
+ struct inet_hashinfo *hinfo = death_row->hashinfo;
+ const unsigned short snum = inet_sk(sk)->num;
+ struct inet_bind_hashbucket *head;
+ struct inet_bind_bucket *tb;
+ int ret;
+
+ if (snum == 0) {
+ const int low = sysctl_local_port_range[0];
+ const int high = sysctl_local_port_range[1];
+ const int range = high - low;
+ int i, port;
+ static u32 hint;
+ const u32 offset = hint + inet6_sk_port_offset(sk);
+ struct hlist_node *node;
+ struct inet_timewait_sock *tw = NULL;
+
+ local_bh_disable();
+ for (i = 1; i <= range; i++) {
+ port = low + (i + offset) % range;
+ head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+ * because the established check is already
+ * unique enough.
+ */
+ inet_bind_bucket_for_each(tb, node, &head->chain) {
+ if (tb->port == port) {
+ BUG_TRAP(!hlist_empty(&tb->owners));
+ if (tb->fastreuse >= 0)
+ goto next_port;
+ if (!__inet6_check_established(death_row,
+ sk, port,
+ &tw))
+ goto ok;
+ goto next_port;
+ }
+ }
+
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ head, port);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ break;
+ }
+ tb->fastreuse = -1;
+ goto ok;
+
+ next_port:
+ spin_unlock(&head->lock);
+ }
+ local_bh_enable();
+
+ return -EADDRNOTAVAIL;
+
+ok:
+ hint += i;
+
+ /* Head lock still held and bh's disabled */
+ inet_bind_hash(sk, tb, port);
+ if (sk_unhashed(sk)) {
+ inet_sk(sk)->sport = htons(port);
+ __inet6_hash(hinfo, sk);
+ }
+ spin_unlock(&head->lock);
+
+ if (tw) {
+ inet_twsk_deschedule(tw, death_row);
+ inet_twsk_put(tw);
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+
+ if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+ __inet6_hash(hinfo, sk);
+ spin_unlock_bh(&head->lock);
+ return 0;
+ } else {
+ spin_unlock(&head->lock);
+ /* No definite answer... Walk to established hash table */
+ ret = __inet6_check_established(death_row, sk, snum, NULL);
+out:
+ local_bh_enable();
+ return ret;
+ }
+}
+
+EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1cf02765fb5cae..89d12b4817a9fd 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -200,6 +200,8 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
return NULL;
}
+EXPORT_SYMBOL_GPL(fl6_sock_lookup);
+
void fl6_free_socklist(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8523c76ebf768b..b4c4beba0ede94 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -775,6 +775,8 @@ out_err_release:
return err;
}
+EXPORT_SYMBOL_GPL(ip6_dst_lookup);
+
static inline int ip6_ufo_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 55917fb170949c..626dd39685f2c4 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -47,6 +47,7 @@
#include <linux/rtnetlink.h>
#include <net/icmp.h>
#include <net/ipv6.h>
+#include <net/protocol.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3620718defe619..c63868dd2ca29c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -163,17 +163,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
sk_refcnt_debug_dec(sk);
if (sk->sk_protocol == IPPROTO_TCP) {
- struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
local_bh_disable();
sock_prot_dec_use(sk->sk_prot);
sock_prot_inc_use(&tcp_prot);
local_bh_enable();
sk->sk_prot = &tcp_prot;
- tp->af_specific = &ipv4_specific;
+ icsk->icsk_af_ops = &ipv4_specific;
sk->sk_socket->ops = &inet_stream_ops;
sk->sk_family = PF_INET;
- tcp_sync_mss(sk, tp->pmtu_cookie);
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
local_bh_disable();
sock_prot_dec_use(sk->sk_prot);
@@ -317,14 +317,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
}
retv = 0;
- if (sk->sk_type == SOCK_STREAM) {
+ if (inet_sk(sk)->is_icsk) {
if (opt) {
- struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
if (!((1 << sk->sk_state) &
(TCPF_LISTEN | TCPF_CLOSE))
&& inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
- tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
- tcp_sync_mss(sk, tp->pmtu_cookie);
+ icsk->icsk_ext_hdr_len =
+ opt->opt_flen + opt->opt_nflen;
+ icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
opt = xchg(&np->opt, opt);
@@ -380,14 +381,15 @@ sticky_done:
goto done;
update:
retv = 0;
- if (sk->sk_type == SOCK_STREAM) {
+ if (inet_sk(sk)->is_icsk) {
if (opt) {
- struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
if (!((1 << sk->sk_state) &
(TCPF_LISTEN | TCPF_CLOSE))
&& inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
- tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
- tcp_sync_mss(sk, tp->pmtu_cookie);
+ icsk->icsk_ext_hdr_len =
+ opt->opt_flen + opt->opt_nflen;
+ icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
opt = xchg(&np->opt, opt);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f829a4ad3ccca1..1cf305a9f8ddf4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -224,7 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
mc_lst->ifindex = dev->ifindex;
mc_lst->sfmode = MCAST_EXCLUDE;
- mc_lst->sflock = RW_LOCK_UNLOCKED;
+ rwlock_init(&mc_lst->sflock);
mc_lst->sflist = NULL;
/*
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 95d469271c4d18..925b42d48347c9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -15,6 +15,7 @@
* - new extension header parser code
*/
#include <linux/config.h>
+#include <linux/in.h>
#include <linux/skbuff.h>
#include <linux/kmod.h>
#include <linux/vmalloc.h>
@@ -86,11 +87,6 @@ static DECLARE_MUTEX(ip6t_mutex);
context stops packets coming through and allows user context to read
the counters or update the rules.
- To be cache friendly on SMP, we arrange them like so:
- [ n-entries ]
- ... cache-align padding ...
- [ n-entries ]
-
Hence the start of any table is given by get_table() below. */
/* The table itself */
@@ -108,33 +104,29 @@ struct ip6t_table_info
unsigned int underflow[NF_IP6_NUMHOOKS];
/* ip6t_entry tables: one per CPU */
- char entries[0] ____cacheline_aligned;
+ void *entries[NR_CPUS];
};
static LIST_HEAD(ip6t_target);
static LIST_HEAD(ip6t_match);
static LIST_HEAD(ip6t_tables);
+#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
-#ifdef CONFIG_SMP
-#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
-#else
-#define TABLE_OFFSET(t,p) 0
-#endif
-
#if 0
#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
#endif
-static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
- struct in6_addr addr2)
+int
+ip6_masked_addrcmp(const struct in6_addr *addr1, const struct in6_addr *mask,
+ const struct in6_addr *addr2)
{
int i;
for( i = 0; i < 16; i++){
- if((addr1.s6_addr[i] & mask.s6_addr[i]) !=
- (addr2.s6_addr[i] & mask.s6_addr[i]))
+ if((addr1->s6_addr[i] & mask->s6_addr[i]) !=
+ (addr2->s6_addr[i] & mask->s6_addr[i]))
return 1;
}
return 0;
@@ -168,10 +160,10 @@ ip6_packet_match(const struct sk_buff *skb,
#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
- if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
- IP6T_INV_SRCIP)
- || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
- IP6T_INV_DSTIP)) {
+ if (FWINV(ip6_masked_addrcmp(&ipv6->saddr, &ip6info->smsk,
+ &ip6info->src), IP6T_INV_SRCIP)
+ || FWINV(ip6_masked_addrcmp(&ipv6->daddr, &ip6info->dmsk,
+ &ip6info->dst), IP6T_INV_DSTIP)) {
dprintf("Source or dest mismatch.\n");
/*
dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
@@ -214,69 +206,21 @@ ip6_packet_match(const struct sk_buff *skb,
/* look for the desired protocol header */
if((ip6info->flags & IP6T_F_PROTO)) {
- u_int8_t currenthdr = ipv6->nexthdr;
- struct ipv6_opt_hdr _hdr, *hp;
- u_int16_t ptr; /* Header offset in skb */
- u_int16_t hdrlen; /* Header */
- u_int16_t _fragoff = 0, *fp = NULL;
-
- ptr = IPV6_HDR_LEN;
-
- while (ip6t_ext_hdr(currenthdr)) {
- /* Is there enough space for the next ext header? */
- if (skb->len - ptr < IPV6_OPTHDR_LEN)
- return 0;
-
- /* NONE or ESP: there isn't protocol part */
- /* If we want to count these packets in '-p all',
- * we will change the return 0 to 1*/
- if ((currenthdr == IPPROTO_NONE) ||
- (currenthdr == IPPROTO_ESP))
- break;
+ int protohdr;
+ unsigned short _frag_off;
- hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
- BUG_ON(hp == NULL);
-
- /* Size calculation */
- if (currenthdr == IPPROTO_FRAGMENT) {
- fp = skb_header_pointer(skb,
- ptr+offsetof(struct frag_hdr,
- frag_off),
- sizeof(_fragoff),
- &_fragoff);
- if (fp == NULL)
- return 0;
-
- _fragoff = ntohs(*fp) & ~0x7;
- hdrlen = 8;
- } else if (currenthdr == IPPROTO_AH)
- hdrlen = (hp->hdrlen+2)<<2;
- else
- hdrlen = ipv6_optlen(hp);
-
- currenthdr = hp->nexthdr;
- ptr += hdrlen;
- /* ptr is too large */
- if ( ptr > skb->len )
- return 0;
- if (_fragoff) {
- if (ip6t_ext_hdr(currenthdr))
- return 0;
- break;
- }
- }
-
- *protoff = ptr;
- *fragoff = _fragoff;
+ protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
+ if (protohdr < 0)
+ return 0;
- /* currenthdr contains the protocol header */
+ *fragoff = _frag_off;
dprintf("Packet protocol %hi ?= %s%hi.\n",
- currenthdr,
+ protohdr,
ip6info->invflags & IP6T_INV_PROTO ? "!":"",
ip6info->proto);
- if (ip6info->proto == currenthdr) {
+ if (ip6info->proto == protohdr) {
if(ip6info->invflags & IP6T_INV_PROTO) {
return 0;
}
@@ -376,8 +320,7 @@ ip6t_do_table(struct sk_buff **pskb,
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- table_base = (void *)table->private->entries
- + TABLE_OFFSET(table->private, smp_processor_id());
+ table_base = (void *)table->private->entries[smp_processor_id()];
e = get_entry(table_base, table->private->hook_entry[hook]);
#ifdef CONFIG_NETFILTER_DEBUG
@@ -649,7 +592,8 @@ unconditional(const struct ip6t_ip6 *ipv6)
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
-mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
+mark_source_chains(struct ip6t_table_info *newinfo,
+ unsigned int valid_hooks, void *entry0)
{
unsigned int hook;
@@ -658,7 +602,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
unsigned int pos = newinfo->hook_entry[hook];
struct ip6t_entry *e
- = (struct ip6t_entry *)(newinfo->entries + pos);
+ = (struct ip6t_entry *)(entry0 + pos);
if (!(valid_hooks & (1 << hook)))
continue;
@@ -708,13 +652,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
goto next;
e = (struct ip6t_entry *)
- (newinfo->entries + pos);
+ (entry0 + pos);
} while (oldpos == pos + e->next_offset);
/* Move along one */
size = e->next_offset;
e = (struct ip6t_entry *)
- (newinfo->entries + pos + size);
+ (entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
} else {
@@ -731,7 +675,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
newpos = pos + e->next_offset;
}
e = (struct ip6t_entry *)
- (newinfo->entries + newpos);
+ (entry0 + newpos);
e->counters.pcnt = pos;
pos = newpos;
}
@@ -941,6 +885,7 @@ static int
translate_table(const char *name,
unsigned int valid_hooks,
struct ip6t_table_info *newinfo,
+ void *entry0,
unsigned int size,
unsigned int number,
const unsigned int *hook_entries,
@@ -961,11 +906,11 @@ translate_table(const char *name,
duprintf("translate_table: size %u\n", newinfo->size);
i = 0;
/* Walk through entries, checking offsets. */
- ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
check_entry_size_and_hooks,
newinfo,
- newinfo->entries,
- newinfo->entries + size,
+ entry0,
+ entry0 + size,
hook_entries, underflows, &i);
if (ret != 0)
return ret;
@@ -993,27 +938,24 @@ translate_table(const char *name,
}
}
- if (!mark_source_chains(newinfo, valid_hooks))
+ if (!mark_source_chains(newinfo, valid_hooks, entry0))
return -ELOOP;
/* Finally, each sanity check must pass */
i = 0;
- ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
check_entry, name, size, &i);
if (ret != 0) {
- IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ IP6T_ENTRY_ITERATE(entry0, newinfo->size,
cleanup_entry, &i);
return ret;
}
/* And one copy for every other CPU */
for_each_cpu(i) {
- if (i == 0)
- continue;
- memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
- newinfo->entries,
- SMP_ALIGN(newinfo->size));
+ if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+ memcpy(newinfo->entries[i], entry0, newinfo->size);
}
return ret;
@@ -1029,15 +971,12 @@ replace_table(struct ip6t_table *table,
#ifdef CONFIG_NETFILTER_DEBUG
{
- struct ip6t_entry *table_base;
- unsigned int i;
+ int cpu;
- for_each_cpu(i) {
- table_base =
- (void *)newinfo->entries
- + TABLE_OFFSET(newinfo, i);
-
- table_base->comefrom = 0xdead57ac;
+ for_each_cpu(cpu) {
+ struct ip6t_entry *table_base = newinfo->entries[cpu];
+ if (table_base)
+ table_base->comefrom = 0xdead57ac;
}
}
#endif
@@ -1072,16 +1011,44 @@ add_entry_to_counter(const struct ip6t_entry *e,
return 0;
}
+static inline int
+set_entry_to_counter(const struct ip6t_entry *e,
+ struct ip6t_counters total[],
+ unsigned int *i)
+{
+ SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
static void
get_counters(const struct ip6t_table_info *t,
struct ip6t_counters counters[])
{
unsigned int cpu;
unsigned int i;
+ unsigned int curcpu;
+
+ /* Instead of clearing (by a previous call to memset())
+ * the counters and using adds, we set the counters
+ * with data used by 'current' CPU
+ * We dont care about preemption here.
+ */
+ curcpu = raw_smp_processor_id();
+
+ i = 0;
+ IP6T_ENTRY_ITERATE(t->entries[curcpu],
+ t->size,
+ set_entry_to_counter,
+ counters,
+ &i);
for_each_cpu(cpu) {
+ if (cpu == curcpu)
+ continue;
i = 0;
- IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+ IP6T_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
@@ -1098,6 +1065,7 @@ copy_entries_to_user(unsigned int total_size,
struct ip6t_entry *e;
struct ip6t_counters *counters;
int ret = 0;
+ void *loc_cpu_entry;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -1109,13 +1077,13 @@ copy_entries_to_user(unsigned int total_size,
return -ENOMEM;
/* First, sum counters... */
- memset(counters, 0, countersize);
write_lock_bh(&table->lock);
get_counters(table->private, counters);
write_unlock_bh(&table->lock);
- /* ... then copy entire thing from CPU 0... */
- if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ /* choose the copy that is on ourc node/cpu */
+ loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
+ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
}
@@ -1127,7 +1095,7 @@ copy_entries_to_user(unsigned int total_size,
struct ip6t_entry_match *m;
struct ip6t_entry_target *t;
- e = (struct ip6t_entry *)(table->private->entries + off);
+ e = (struct ip6t_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
+ offsetof(struct ip6t_entry, counters),
&counters[num],
@@ -1196,6 +1164,46 @@ get_entries(const struct ip6t_get_entries *entries,
return ret;
}
+static void free_table_info(struct ip6t_table_info *info)
+{
+ int cpu;
+ for_each_cpu(cpu) {
+ if (info->size <= PAGE_SIZE)
+ kfree(info->entries[cpu]);
+ else
+ vfree(info->entries[cpu]);
+ }
+ kfree(info);
+}
+
+static struct ip6t_table_info *alloc_table_info(unsigned int size)
+{
+ struct ip6t_table_info *newinfo;
+ int cpu;
+
+ newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL);
+ if (!newinfo)
+ return NULL;
+
+ newinfo->size = size;
+
+ for_each_cpu(cpu) {
+ if (size <= PAGE_SIZE)
+ newinfo->entries[cpu] = kmalloc_node(size,
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ else
+ newinfo->entries[cpu] = vmalloc_node(size,
+ cpu_to_node(cpu));
+ if (newinfo->entries[cpu] == NULL) {
+ free_table_info(newinfo);
+ return NULL;
+ }
+ }
+
+ return newinfo;
+}
+
static int
do_replace(void __user *user, unsigned int len)
{
@@ -1204,6 +1212,7 @@ do_replace(void __user *user, unsigned int len)
struct ip6t_table *t;
struct ip6t_table_info *newinfo, *oldinfo;
struct ip6t_counters *counters;
+ void *loc_cpu_entry, *loc_cpu_old_entry;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1212,13 +1221,13 @@ do_replace(void __user *user, unsigned int len)
if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
return -ENOMEM;
- newinfo = vmalloc(sizeof(struct ip6t_table_info)
- + SMP_ALIGN(tmp.size) *
- (highest_possible_processor_id()+1));
+ newinfo = alloc_table_info(tmp.size);
if (!newinfo)
return -ENOMEM;
- if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
@@ -1229,10 +1238,9 @@ do_replace(void __user *user, unsigned int len)
ret = -ENOMEM;
goto free_newinfo;
}
- memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
ret = translate_table(tmp.name, tmp.valid_hooks,
- newinfo, tmp.size, tmp.num_entries,
+ newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
tmp.hook_entry, tmp.underflow);
if (ret != 0)
goto free_newinfo_counters;
@@ -1271,8 +1279,9 @@ do_replace(void __user *user, unsigned int len)
/* Get the old counters. */
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
- vfree(oldinfo);
+ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+ free_table_info(oldinfo);
if (copy_to_user(tmp.counters, counters,
sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
ret = -EFAULT;
@@ -1284,11 +1293,11 @@ do_replace(void __user *user, unsigned int len)
module_put(t->me);
up(&ip6t_mutex);
free_newinfo_counters_untrans:
- IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+ IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
free_newinfo_counters:
vfree(counters);
free_newinfo:
- vfree(newinfo);
+ free_table_info(newinfo);
return ret;
}
@@ -1321,6 +1330,7 @@ do_add_counters(void __user *user, unsigned int len)
struct ip6t_counters_info tmp, *paddc;
struct ip6t_table *t;
int ret = 0;
+ void *loc_cpu_entry;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1350,7 +1360,9 @@ do_add_counters(void __user *user, unsigned int len)
}
i = 0;
- IP6T_ENTRY_ITERATE(t->private->entries,
+ /* Choose the copy that is on our node */
+ loc_cpu_entry = t->private->entries[smp_processor_id()];
+ IP6T_ENTRY_ITERATE(loc_cpu_entry,
t->private->size,
add_counter_to_entry,
paddc->counters,
@@ -1543,28 +1555,29 @@ int ip6t_register_table(struct ip6t_table *table,
struct ip6t_table_info *newinfo;
static struct ip6t_table_info bootstrap
= { 0, 0, 0, { 0 }, { 0 }, { } };
+ void *loc_cpu_entry;
- newinfo = vmalloc(sizeof(struct ip6t_table_info)
- + SMP_ALIGN(repl->size) *
- (highest_possible_processor_id()+1));
+ newinfo = alloc_table_info(repl->size);
if (!newinfo)
return -ENOMEM;
- memcpy(newinfo->entries, repl->entries, repl->size);
+ /* choose the copy on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(table->name, table->valid_hooks,
- newinfo, repl->size,
+ newinfo, loc_cpu_entry, repl->size,
repl->num_entries,
repl->hook_entry,
repl->underflow);
if (ret != 0) {
- vfree(newinfo);
+ free_table_info(newinfo);
return ret;
}
ret = down_interruptible(&ip6t_mutex);
if (ret != 0) {
- vfree(newinfo);
+ free_table_info(newinfo);
return ret;
}
@@ -1593,20 +1606,23 @@ int ip6t_register_table(struct ip6t_table *table,
return ret;
free_unlock:
- vfree(newinfo);
+ free_table_info(newinfo);
goto unlock;
}
void ip6t_unregister_table(struct ip6t_table *table)
{
+ void *loc_cpu_entry;
+
down(&ip6t_mutex);
LIST_DELETE(&ip6t_tables, table);
up(&ip6t_mutex);
/* Decrease module usage counts and free resources */
- IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
+ loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
+ IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
cleanup_entry, NULL);
- vfree(table->private);
+ free_table_info(table->private);
}
/* Returns 1 if the port is matched by the range, 0 otherwise */
@@ -2035,26 +2051,39 @@ static void __exit fini(void)
}
/*
- * find specified header up to transport protocol header.
- * If found target header, the offset to the header is set to *offset
- * and return 0. otherwise, return -1.
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
*
- * Notes: - non-1st Fragment Header isn't skipped.
- * - ESP header isn't skipped.
- * - The target header may be trancated.
*/
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+ int target, unsigned short *fragoff)
{
unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
u8 nexthdr = skb->nh.ipv6h->nexthdr;
unsigned int len = skb->len - start;
+ if (fragoff)
+ *fragoff = 0;
+
while (nexthdr != target) {
struct ipv6_opt_hdr _hdr, *hp;
unsigned int hdrlen;
- if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
+ if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+ if (target < 0)
+ break;
return -1;
+ }
+
hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
if (hp == NULL)
return -1;
@@ -2068,8 +2097,17 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
if (fp == NULL)
return -1;
- if (ntohs(*fp) & ~0x7)
+ _frag_off = ntohs(*fp) & ~0x7;
+ if (_frag_off) {
+ if (target < 0 &&
+ ((!ipv6_ext_hdr(hp->nexthdr)) ||
+ nexthdr == NEXTHDR_NONE)) {
+ if (fragoff)
+ *fragoff = _frag_off;
+ return hp->nexthdr;
+ }
return -1;
+ }
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
hdrlen = (hp->hdrlen + 2) << 2;
@@ -2082,7 +2120,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
}
*offset = start;
- return 0;
+ return nexthdr;
}
EXPORT_SYMBOL(ip6t_register_table);
@@ -2094,6 +2132,7 @@ EXPORT_SYMBOL(ip6t_register_target);
EXPORT_SYMBOL(ip6t_unregister_target);
EXPORT_SYMBOL(ip6t_ext_hdr);
EXPORT_SYMBOL(ipv6_find_hdr);
+EXPORT_SYMBOL(ip6_masked_addrcmp);
module_init(init);
module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0cd1d1bd9033ce..ae4653bfd65462 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/skbuff.h>
+#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/spinlock.h>
#include <linux/icmpv6.h>
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index dde37793d20b00..f5c1a7ff4a1f05 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/types.h>
#include <net/checksum.h>
@@ -53,7 +54,7 @@ match(const struct sk_buff *skb,
unsigned int ptr;
unsigned int hdrlen = 0;
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
+ if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0)
return 0;
ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index c450a635e54b92..48cf5f9efc95c5 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
unsigned int optlen;
#if HOPBYHOP
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
+ if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
#else
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
+ if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
#endif
return 0;
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index 24bc0cde43a174..e1828f6d0a4061 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/types.h>
#include <net/checksum.h>
@@ -55,7 +56,7 @@ match(const struct sk_buff *skb,
/* Make sure this isn't an evil packet */
/*DEBUGP("ipv6_esp entered \n");*/
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
+ if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP, NULL) < 0)
return 0;
eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 085d5f8eea29ca..d1549b268669b7 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -52,7 +52,7 @@ match(const struct sk_buff *skb,
const struct ip6t_frag *fraginfo = matchinfo;
unsigned int ptr;
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
+ if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0)
return 0;
fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1d09485111d011..e3bc8e2700e77e 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
unsigned int optlen;
#if HOPBYHOP
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
+ if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
#else
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
+ if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
#endif
return 0;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index beb2fd5cebbb31..c1e770e45543b3 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -58,7 +58,7 @@ match(const struct sk_buff *skb,
unsigned int ret = 0;
struct in6_addr *ap, _addr;
- if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
+ if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0)
return 0;
rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 753a3ae8502b7e..704fbbe748746f 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -401,6 +401,48 @@ static ctl_table nf_ct_net_table[] = {
};
#endif
+#if defined(CONFIG_NF_CT_NETLINK) || \
+ defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv6_tuple_to_nfattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
+ &tuple->src.u3.ip6);
+ NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
+ &tuple->dst.u3.ip6);
+ return 0;
+
+nfattr_failure:
+ return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+ [CTA_IP_V6_SRC-1] = sizeof(u_int32_t)*4,
+ [CTA_IP_V6_DST-1] = sizeof(u_int32_t)*4,
+};
+
+static int ipv6_nfattr_to_tuple(struct nfattr *tb[],
+ struct nf_conntrack_tuple *t)
+{
+ if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1])
+ return -EINVAL;
+
+ if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+ return -EINVAL;
+
+ memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]),
+ sizeof(u_int32_t) * 4);
+ memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]),
+ sizeof(u_int32_t) * 4);
+
+ return 0;
+}
+#endif
+
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.l3proto = PF_INET6,
.name = "ipv6",
@@ -409,6 +451,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.print_tuple = ipv6_print_tuple,
.print_conntrack = ipv6_print_conntrack,
.prepare = ipv6_prepare,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+ defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nfattr = ipv6_tuple_to_nfattr,
+ .nfattr_to_tuple = ipv6_nfattr_to_tuple,
+#endif
.get_features = ipv6_get_features,
.me = THIS_MODULE,
};
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a7e03cfacd06fc..09945c33305529 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -57,17 +57,17 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
return 1;
}
+/* Add 1; spaces filled with 0. */
+static u_int8_t invmap[] = {
+ [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
+ [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
+ [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
+ [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
+};
+
static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
- /* Add 1; spaces filled with 0. */
- static u_int8_t invmap[] = {
- [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
- [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
- [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
- [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
- };
-
int type = orig->dst.u.icmp.type - 128;
if (type < 0 || type >= sizeof(invmap) || !invmap[type])
return 0;
@@ -185,7 +185,7 @@ icmpv6_error_message(struct sk_buff *skb,
return -NF_ACCEPT;
}
- inproto = nf_ct_find_proto(PF_INET6, inprotonum);
+ inproto = __nf_ct_proto_find(PF_INET6, inprotonum);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
@@ -255,6 +255,60 @@ skipped:
return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
}
+#if defined(CONFIG_NF_CT_NETLINK) || \
+ defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nfattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *t)
+{
+ NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t),
+ &t->src.u.icmp.id);
+ NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t),
+ &t->dst.u.icmp.type);
+ NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
+ &t->dst.u.icmp.code);
+
+ return 0;
+
+nfattr_failure:
+ return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+ [CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t),
+ [CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t),
+ [CTA_PROTO_ICMPV6_ID-1] = sizeof(u_int16_t)
+};
+
+static int icmpv6_nfattr_to_tuple(struct nfattr *tb[],
+ struct nf_conntrack_tuple *tuple)
+{
+ if (!tb[CTA_PROTO_ICMPV6_TYPE-1]
+ || !tb[CTA_PROTO_ICMPV6_CODE-1]
+ || !tb[CTA_PROTO_ICMPV6_ID-1])
+ return -EINVAL;
+
+ if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+ return -EINVAL;
+
+ tuple->dst.u.icmp.type =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]);
+ tuple->dst.u.icmp.code =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]);
+ tuple->src.u.icmp.id =
+ *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]);
+
+ if (tuple->dst.u.icmp.type < 128
+ || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
+ || !invmap[tuple->dst.u.icmp.type - 128])
+ return -EINVAL;
+
+ return 0;
+}
+#endif
+
struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
{
.l3proto = PF_INET6,
@@ -267,6 +321,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
.packet = icmpv6_packet,
.new = icmpv6_new,
.error = icmpv6_error,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+ defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nfattr = icmpv6_tuple_to_nfattr,
+ .nfattr_to_tuple = icmpv6_nfattr_to_tuple,
+#endif
};
EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c2c52af9e5602c..f3e5ffbd592f9e 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -98,7 +98,7 @@ struct nf_ct_frag6_queue
#define FRAG6Q_HASHSZ 64
static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
-static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(nf_ct_frag6_lock);
static u32 nf_ct_frag6_hash_rnd;
static LIST_HEAD(nf_ct_frag6_lru_list);
int nf_ct_frag6_nqueues = 0;
@@ -371,7 +371,7 @@ nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct
init_timer(&fq->timer);
fq->timer.function = nf_ct_frag6_expire;
fq->timer.data = (long) fq;
- fq->lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&fq->lock);
atomic_set(&fq->refcnt, 1);
return nf_ct_frag6_intern(hash, fq);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a66900cda2afc7..66f1d12ea57831 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -32,6 +32,7 @@
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/skbuff.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <asm/bug.h>
@@ -433,25 +434,14 @@ out:
return err;
csum_copy_err:
- /* Clear queue. */
- if (flags&MSG_PEEK) {
- int clear = 0;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
- clear = 1;
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- if (clear)
- kfree_skb(skb);
- }
+ skb_kill_datagram(sk, skb, flags);
/* Error for blocking case is chosen to masquerade
as some normal condition.
*/
err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
/* FIXME: increment a raw6 drops counter here */
- goto out_free;
+ goto out;
}
static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c3123c9e1a8e95..577d49732b0fdb 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -33,6 +33,7 @@
#include <asm/uaccess.h>
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -720,7 +721,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->type = ARPHRD_SIT;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
- dev->mtu = 1500 - sizeof(struct iphdr);
+ dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8827389abaf72a..2947bc56d8a025 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -48,6 +48,7 @@
#include <net/tcp.h>
#include <net/ndisc.h>
#include <net/inet6_hashtables.h>
+#include <net/inet6_connection_sock.h>
#include <net/ipv6.h>
#include <net/transp_v6.h>
#include <net/addrconf.h>
@@ -59,6 +60,7 @@
#include <net/addrconf.h>
#include <net/snmp.h>
#include <net/dsfield.h>
+#include <net/timewait_sock.h>
#include <asm/uaccess.h>
@@ -67,224 +69,33 @@
static void tcp_v6_send_reset(struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
-static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
+static void tcp_v6_send_check(struct sock *sk, int len,
struct sk_buff *skb);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
-static struct tcp_func ipv6_mapped;
-static struct tcp_func ipv6_specific;
+static struct inet_connection_sock_af_ops ipv6_mapped;
+static struct inet_connection_sock_af_ops ipv6_specific;
-static inline int tcp_v6_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb)
-{
- const struct sock *sk2;
- const struct hlist_node *node;
-
- /* We must walk the whole port owner list in this case. -DaveM */
- sk_for_each_bound(sk2, node, &tb->owners) {
- if (sk != sk2 &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
- (!sk->sk_reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
- ipv6_rcv_saddr_equal(sk, sk2))
- break;
- }
-
- return node != NULL;
-}
-
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- * But it doesn't matter, the recalculation is in the rarest path
- * this function ever takes.
- */
static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
{
- struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- struct hlist_node *node;
- int ret;
-
- local_bh_disable();
- if (snum == 0) {
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
- int remaining = (high - low) + 1;
- int rover = net_random() % (high - low) + low;
-
- do {
- head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
- spin_lock(&head->lock);
- inet_bind_bucket_for_each(tb, node, &head->chain)
- if (tb->port == rover)
- goto next;
- break;
- next:
- spin_unlock(&head->lock);
- if (++rover > high)
- rover = low;
- } while (--remaining > 0);
-
- /* Exhausted local port range during search? It is not
- * possible for us to be holding one of the bind hash
- * locks if this test triggers, because if 'remaining'
- * drops to zero, we broke out of the do/while loop at
- * the top level, not from the 'break;' statement.
- */
- ret = 1;
- if (unlikely(remaining <= 0))
- goto fail;
-
- /* OK, here is the one we will use. */
- snum = rover;
- } else {
- head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- spin_lock(&head->lock);
- inet_bind_bucket_for_each(tb, node, &head->chain)
- if (tb->port == snum)
- goto tb_found;
- }
- tb = NULL;
- goto tb_not_found;
-tb_found:
- if (tb && !hlist_empty(&tb->owners)) {
- if (tb->fastreuse > 0 && sk->sk_reuse &&
- sk->sk_state != TCP_LISTEN) {
- goto success;
- } else {
- ret = 1;
- if (tcp_v6_bind_conflict(sk, tb))
- goto fail_unlock;
- }
- }
-tb_not_found:
- ret = 1;
- if (tb == NULL) {
- tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
- if (tb == NULL)
- goto fail_unlock;
- }
- if (hlist_empty(&tb->owners)) {
- if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
- tb->fastreuse = 1;
- else
- tb->fastreuse = 0;
- } else if (tb->fastreuse &&
- (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
- tb->fastreuse = 0;
-
-success:
- if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, snum);
- BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
- ret = 0;
-
-fail_unlock:
- spin_unlock(&head->lock);
-fail:
- local_bh_enable();
- return ret;
-}
-
-static __inline__ void __tcp_v6_hash(struct sock *sk)
-{
- struct hlist_head *list;
- rwlock_t *lock;
-
- BUG_TRAP(sk_unhashed(sk));
-
- if (sk->sk_state == TCP_LISTEN) {
- list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &tcp_hashinfo.lhash_lock;
- inet_listen_wlock(&tcp_hashinfo);
- } else {
- unsigned int hash;
- sk->sk_hash = hash = inet6_sk_ehashfn(sk);
- hash &= (tcp_hashinfo.ehash_size - 1);
- list = &tcp_hashinfo.ehash[hash].chain;
- lock = &tcp_hashinfo.ehash[hash].lock;
- write_lock(lock);
- }
-
- __sk_add_node(sk, list);
- sock_prot_inc_use(sk->sk_prot);
- write_unlock(lock);
+ return inet_csk_get_port(&tcp_hashinfo, sk, snum,
+ inet6_csk_bind_conflict);
}
-
static void tcp_v6_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (tp->af_specific == &ipv6_mapped) {
+ if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
tcp_prot.hash(sk);
return;
}
local_bh_disable();
- __tcp_v6_hash(sk);
+ __inet6_hash(&tcp_hashinfo, sk);
local_bh_enable();
}
}
-/*
- * Open request hash tables.
- */
-
-static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
-{
- u32 a, b, c;
-
- a = raddr->s6_addr32[0];
- b = raddr->s6_addr32[1];
- c = raddr->s6_addr32[2];
-
- a += JHASH_GOLDEN_RATIO;
- b += JHASH_GOLDEN_RATIO;
- c += rnd;
- __jhash_mix(a, b, c);
-
- a += raddr->s6_addr32[3];
- b += (u32) rport;
- __jhash_mix(a, b, c);
-
- return c & (TCP_SYNQ_HSIZE - 1);
-}
-
-static struct request_sock *tcp_v6_search_req(const struct sock *sk,
- struct request_sock ***prevp,
- __u16 rport,
- struct in6_addr *raddr,
- struct in6_addr *laddr,
- int iif)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
- struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- struct request_sock *req, **prev;
-
- for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
- (req = *prev) != NULL;
- prev = &req->dl_next) {
- const struct tcp6_request_sock *treq = tcp6_rsk(req);
-
- if (inet_rsk(req)->rmt_port == rport &&
- req->rsk_ops->family == AF_INET6 &&
- ipv6_addr_equal(&treq->rmt_addr, raddr) &&
- ipv6_addr_equal(&treq->loc_addr, laddr) &&
- (!treq->iif || treq->iif == iif)) {
- BUG_TRAP(req->sk == NULL);
- *prevp = prev;
- return req;
- }
- }
-
- return NULL;
-}
-
static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
struct in6_addr *saddr,
struct in6_addr *daddr,
@@ -308,195 +119,12 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
}
}
-static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
- struct inet_timewait_sock **twp)
-{
- struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- const struct in6_addr *daddr = &np->rcv_saddr;
- const struct in6_addr *saddr = &np->daddr;
- const int dif = sk->sk_bound_dev_if;
- const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
- unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
- struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
- struct sock *sk2;
- const struct hlist_node *node;
- struct inet_timewait_sock *tw;
-
- prefetch(head->chain.first);
- write_lock(&head->lock);
-
- /* Check TIME-WAIT sockets first. */
- sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
- const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
-
- tw = inet_twsk(sk2);
-
- if(*((__u32 *)&(tw->tw_dport)) == ports &&
- sk2->sk_family == PF_INET6 &&
- ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
- ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
- sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
- const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (tcptw->tw_ts_recent_stamp &&
- (!twp ||
- (sysctl_tcp_tw_reuse &&
- xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
- /* See comment in tcp_ipv4.c */
- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
- if (!tp->write_seq)
- tp->write_seq = 1;
- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
- sock_hold(sk2);
- goto unique;
- } else
- goto not_unique;
- }
- }
- tw = NULL;
-
- /* And established part... */
- sk_for_each(sk2, node, &head->chain) {
- if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
- goto not_unique;
- }
-
-unique:
- BUG_TRAP(sk_unhashed(sk));
- __sk_add_node(sk, &head->chain);
- sk->sk_hash = hash;
- sock_prot_inc_use(sk->sk_prot);
- write_unlock(&head->lock);
-
- if (twp) {
- *twp = tw;
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
- } else if (tw) {
- /* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, &tcp_death_row);
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
- inet_twsk_put(tw);
- }
- return 0;
-
-not_unique:
- write_unlock(&head->lock);
- return -EADDRNOTAVAIL;
-}
-
-static inline u32 tcpv6_port_offset(const struct sock *sk)
-{
- const struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
-
- return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
- np->daddr.s6_addr32,
- inet->dport);
-}
-
-static int tcp_v6_hash_connect(struct sock *sk)
-{
- unsigned short snum = inet_sk(sk)->num;
- struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret;
-
- if (!snum) {
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
- int range = high - low;
- int i;
- int port;
- static u32 hint;
- u32 offset = hint + tcpv6_port_offset(sk);
- struct hlist_node *node;
- struct inet_timewait_sock *tw = NULL;
-
- local_bh_disable();
- for (i = 1; i <= range; i++) {
- port = low + (i + offset) % range;
- head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- spin_lock(&head->lock);
-
- /* Does not bother with rcv_saddr checks,
- * because the established check is already
- * unique enough.
- */
- inet_bind_bucket_for_each(tb, node, &head->chain) {
- if (tb->port == port) {
- BUG_TRAP(!hlist_empty(&tb->owners));
- if (tb->fastreuse >= 0)
- goto next_port;
- if (!__tcp_v6_check_established(sk,
- port,
- &tw))
- goto ok;
- goto next_port;
- }
- }
-
- tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- if (!tb) {
- spin_unlock(&head->lock);
- break;
- }
- tb->fastreuse = -1;
- goto ok;
-
- next_port:
- spin_unlock(&head->lock);
- }
- local_bh_enable();
-
- return -EADDRNOTAVAIL;
-
-ok:
- hint += i;
-
- /* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, port);
- if (sk_unhashed(sk)) {
- inet_sk(sk)->sport = htons(port);
- __tcp_v6_hash(sk);
- }
- spin_unlock(&head->lock);
-
- if (tw) {
- inet_twsk_deschedule(tw, &tcp_death_row);
- inet_twsk_put(tw);
- }
-
- ret = 0;
- goto out;
- }
-
- head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
-
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- __tcp_v6_hash(sk);
- spin_unlock_bh(&head->lock);
- return 0;
- } else {
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
- ret = __tcp_v6_check_established(sk, snum, NULL);
-out:
- local_bh_enable();
- return ret;
- }
-}
-
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
- struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p = NULL, final;
@@ -571,7 +199,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
*/
if (addr_type == IPV6_ADDR_MAPPED) {
- u32 exthdrlen = tp->ext_header_len;
+ u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -583,14 +211,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sin.sin_port = usin->sin6_port;
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
- tp->af_specific = &ipv6_mapped;
+ icsk->icsk_af_ops = &ipv6_mapped;
sk->sk_backlog_rcv = tcp_v4_do_rcv;
err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
if (err) {
- tp->ext_header_len = exthdrlen;
- tp->af_specific = &ipv6_specific;
+ icsk->icsk_ext_hdr_len = exthdrlen;
+ icsk->icsk_af_ops = &ipv6_specific;
sk->sk_backlog_rcv = tcp_v6_do_rcv;
goto failure;
} else {
@@ -643,16 +271,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_route_caps = dst->dev->features &
~(NETIF_F_IP_CSUM | NETIF_F_TSO);
- tp->ext_header_len = 0;
+ icsk->icsk_ext_hdr_len = 0;
if (np->opt)
- tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+ icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+ np->opt->opt_nflen);
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
inet->dport = usin->sin6_port;
tcp_set_state(sk, TCP_SYN_SENT);
- err = tcp_v6_hash_connect(sk);
+ err = inet6_hash_connect(&tcp_death_row, sk);
if (err)
goto late_failure;
@@ -758,7 +387,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} else
dst_hold(dst);
- if (tp->pmtu_cookie > dst_mtu(dst)) {
+ if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
tcp_sync_mss(sk, dst_mtu(dst));
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
@@ -775,8 +404,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sock_owned_by_user(sk))
goto out;
- req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
- &hdr->saddr, inet6_iif(skb));
+ req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
+ &hdr->saddr, inet6_iif(skb));
if (!req)
goto out;
@@ -822,7 +451,7 @@ out:
static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
struct dst_entry *dst)
{
- struct tcp6_request_sock *treq = tcp6_rsk(req);
+ struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff * skb;
struct ipv6_txoptions *opt = NULL;
@@ -888,8 +517,8 @@ done:
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
- if (tcp6_rsk(req)->pktopts)
- kfree_skb(tcp6_rsk(req)->pktopts);
+ if (inet6_rsk(req)->pktopts)
+ kfree_skb(inet6_rsk(req)->pktopts);
}
static struct request_sock_ops tcp6_request_sock_ops = {
@@ -901,26 +530,15 @@ static struct request_sock_ops tcp6_request_sock_ops = {
.send_reset = tcp_v6_send_reset
};
-static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet6_skb_parm *opt = IP6CB(skb);
-
- if (np->rxopt.all) {
- if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
- ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
- (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
- ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
- return 1;
- }
- return 0;
-}
-
+static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
+ .twsk_unique = tcp_twsk_unique,
+};
-static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
- struct sk_buff *skb)
+static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcphdr *th = skb->h.th;
if (skb->ip_summed == CHECKSUM_HW) {
th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
@@ -1091,8 +709,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
struct sock *nsk;
/* Find possible connection requests. */
- req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr, inet6_iif(skb));
+ req = inet6_csk_search_req(sk, &prev, th->source,
+ &skb->nh.ipv6h->saddr,
+ &skb->nh.ipv6h->daddr, inet6_iif(skb));
if (req)
return tcp_check_req(sk, skb, req, prev);
@@ -1116,23 +735,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
return sk;
}
-static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
-
- reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
- inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
-}
-
-
/* FIXME: this is substantially similar to the ipv4 code.
* Can some kind of merge be done? -- erics
*/
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
- struct tcp6_request_sock *treq;
+ struct inet6_request_sock *treq;
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1157,7 +765,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = reqsk_alloc(&tcp6_request_sock_ops);
+ req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
if (req == NULL)
goto drop;
@@ -1170,7 +778,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb);
- treq = tcp6_rsk(req);
+ treq = inet6_rsk(req);
ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
TCP_ECN_create_request(req, skb->h.th);
@@ -1196,8 +804,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (tcp_v6_send_synack(sk, req, NULL))
goto drop;
- tcp_v6_synq_add(sk, req);
-
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
return 0;
drop:
@@ -1212,7 +819,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
- struct tcp6_request_sock *treq = tcp6_rsk(req);
+ struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
@@ -1247,7 +854,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
- newtp->af_specific = &ipv6_mapped;
+ inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
newnp->pktoptions = NULL;
newnp->opt = NULL;
@@ -1261,10 +868,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
*/
/* It is tricky place. Until this moment IPv4 tcp
- worked with IPv6 af_tcp.af_specific.
+ worked with IPv6 icsk.icsk_af_ops.
Sync it now.
*/
- tcp_sync_mss(newsk, newtp->pmtu_cookie);
+ tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
return newsk;
}
@@ -1371,10 +978,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
sock_kfree_s(sk, opt, opt->tot_len);
}
- newtp->ext_header_len = 0;
+ inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt)
- newtp->ext_header_len = newnp->opt->opt_nflen +
- newnp->opt->opt_flen;
+ inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+ newnp->opt->opt_flen);
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
@@ -1382,7 +989,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
- __tcp_v6_hash(newsk);
+ __inet6_hash(&tcp_hashinfo, newsk);
inet_inherit_port(&tcp_hashinfo, sk, newsk);
return newsk;
@@ -1679,139 +1286,16 @@ do_time_wait:
goto discard_it;
}
-static int tcp_v6_rebuild_header(struct sock *sk)
-{
- int err;
- struct dst_entry *dst;
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct in6_addr *final_p = NULL, final;
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.fl6_flowlabel = np->flow_label;
- fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = inet->dport;
- fl.fl_ip_sport = inet->sport;
-
- if (np->opt && np->opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
- ipv6_addr_copy(&final, &fl.fl6_dst);
- ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
- final_p = &final;
- }
-
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err) {
- sk->sk_route_caps = 0;
- return err;
- }
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
- sk->sk_err_soft = -err;
- return err;
- }
-
- ip6_dst_store(sk, dst, NULL);
- sk->sk_route_caps = dst->dev->features &
- ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
- }
-
- return 0;
-}
-
-static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
-{
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi fl;
- struct dst_entry *dst;
- struct in6_addr *final_p = NULL, final;
-
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.fl6_flowlabel = np->flow_label;
- IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
- fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_sport = inet->sport;
- fl.fl_ip_dport = inet->dport;
-
- if (np->opt && np->opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
- ipv6_addr_copy(&final, &fl.fl6_dst);
- ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
- final_p = &final;
- }
-
- dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst == NULL) {
- int err = ip6_dst_lookup(sk, &dst, &fl);
-
- if (err) {
- sk->sk_err_soft = -err;
- return err;
- }
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
- sk->sk_route_caps = 0;
- return err;
- }
-
- ip6_dst_store(sk, dst, NULL);
- sk->sk_route_caps = dst->dev->features &
- ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
- }
-
- skb->dst = dst_clone(dst);
-
- /* Restore final destination back after routing done */
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-
- return ip6_xmit(sk, skb, &fl, np->opt, 0);
-}
-
-static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
-
- sin6->sin6_family = AF_INET6;
- ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
- sin6->sin6_port = inet_sk(sk)->dport;
- /* We do not store received flowlabel for TCP */
- sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (sk->sk_bound_dev_if &&
- ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = sk->sk_bound_dev_if;
-}
-
static int tcp_v6_remember_stamp(struct sock *sk)
{
/* Alas, not yet... */
return 0;
}
-static struct tcp_func ipv6_specific = {
- .queue_xmit = tcp_v6_xmit,
+static struct inet_connection_sock_af_ops ipv6_specific = {
+ .queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
- .rebuild_header = tcp_v6_rebuild_header,
+ .rebuild_header = inet6_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.remember_stamp = tcp_v6_remember_stamp,
@@ -1819,7 +1303,7 @@ static struct tcp_func ipv6_specific = {
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
- .addr2sockaddr = v6_addr2sockaddr,
+ .addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6)
};
@@ -1827,7 +1311,7 @@ static struct tcp_func ipv6_specific = {
* TCP over IPv4 via INET6 API
*/
-static struct tcp_func ipv6_mapped = {
+static struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
@@ -1838,7 +1322,7 @@ static struct tcp_func ipv6_mapped = {
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
- .addr2sockaddr = v6_addr2sockaddr,
+ .addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6)
};
@@ -1877,8 +1361,9 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_state = TCP_CLOSE;
- tp->af_specific = &ipv6_specific;
+ icsk->icsk_af_ops = &ipv6_specific;
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ icsk->icsk_sync_mss = tcp_sync_mss;
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
@@ -1900,14 +1385,13 @@ static int tcp_v6_destroy_sock(struct sock *sk)
static void get_openreq6(struct seq_file *seq,
struct sock *sk, struct request_sock *req, int i, int uid)
{
- struct in6_addr *dest, *src;
int ttd = req->expires - jiffies;
+ struct in6_addr *src = &inet6_rsk(req)->loc_addr;
+ struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
if (ttd < 0)
ttd = 0;
- src = &tcp6_rsk(req)->loc_addr;
- dest = &tcp6_rsk(req)->rmt_addr;
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -1988,14 +1472,14 @@ static void get_timewait6_sock(struct seq_file *seq,
{
struct in6_addr *dest, *src;
__u16 destp, srcp;
- struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+ struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
int ttd = tw->tw_ttd - jiffies;
if (ttd < 0)
ttd = 0;
- dest = &tcp6tw->tw_v6_daddr;
- src = &tcp6tw->tw_v6_rcv_saddr;
+ dest = &tw6->tw_v6_daddr;
+ src = &tw6->tw_v6_rcv_saddr;
destp = ntohs(tw->tw_dport);
srcp = ntohs(tw->tw_sport);
@@ -2093,7 +1577,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
- .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
+ .twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
};
@@ -2110,7 +1594,8 @@ static struct inet_protosw tcpv6_protosw = {
.ops = &inet6_stream_ops,
.capability = -1,
.no_check = 0,
- .flags = INET_PROTOSW_PERMANENT,
+ .flags = INET_PROTOSW_PERMANENT |
+ INET_PROTOSW_ICSK,
};
void __init tcpv6_init(void)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5cc8731eb55b85..d8538dcea8130b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
+#include <linux/skbuff.h>
#include <asm/uaccess.h>
#include <net/sock.h>
@@ -300,20 +301,7 @@ out:
return err;
csum_copy_err:
- /* Clear queue. */
- if (flags&MSG_PEEK) {
- int clear = 0;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
- clear = 1;
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- if (clear)
- kfree_skb(skb);
- }
-
- skb_free_datagram(sk, skb);
+ skb_kill_datagram(sk, skb, flags);
if (flags & MSG_DONTWAIT) {
UDP6_INC_STATS_USER(UDP_MIB_INERRORS);