diff options
author | David S. Miller <davem@nuts.davemloft.net> | 2004-12-14 20:56:22 -0800 |
---|---|---|
committer | David S. Miller <davem@nuts.davemloft.net> | 2004-12-14 20:56:22 -0800 |
commit | 712bc31381a3327859442ba1ec026d77e728fbbb (patch) | |
tree | c3b2ea56b3004562416f99c6c88b994b9c8eb43d /net | |
parent | 9d53e4dd111c4bcb1f55961fce39823e340016f6 (diff) | |
parent | 3a747d70f8c2940da69449376b6f9d848d1c56ee (diff) | |
download | history-712bc31381a3327859442ba1ec026d77e728fbbb.tar.gz |
Merge nuts.davemloft.net:/disk1/BK/network-2.6.11
into nuts.davemloft.net:/disk1/BK/net-2.6.11
Diffstat (limited to 'net')
30 files changed, 857 insertions, 371 deletions
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index b0e9f66269ee47..68ccef507b4960 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -189,8 +189,22 @@ config BRIDGE_EBT_LOG tristate "ebt: log support" depends on BRIDGE_NF_EBTABLES help - This option adds the log target, that you can use in any rule in - any ebtables table. It records the frame header to the syslog. + This option adds the log watcher, that you can use in any rule + in any ebtables table. It records info about the frame header + to the syslog. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_ULOG + tristate "ebt: ulog support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the ulog watcher, that you can use in any rule + in any ebtables table. The packet is passed to a userspace + logging daemon using netlink multicast sockets. This differs + from the log watcher in the sense that the complete packet is + sent to userspace instead of a descriptive text and that + netlink multicast sockets are used instead of the syslog. To compile it as a module, choose M here. If unsure, say N. diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 6b7d017fcd18c3..8bf6d9f6e9d34c 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -29,3 +29,4 @@ obj-$(CONFIG_BRIDGE_EBT_SNAT) += ebt_snat.o # watchers obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_log.o +obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_ulog.o diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 6c022efaab8095..5071e53272414e 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -55,8 +55,9 @@ static void print_MAC(unsigned char *p) } #define myNIPQUAD(a) a[0], a[1], a[2], a[3] -static void ebt_log(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) { struct ebt_log_info *info = (struct ebt_log_info *)data; char level_string[4] = "< >"; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c new file mode 100644 index 00000000000000..ea6c24f6ff26eb --- /dev/null +++ b/net/bridge/netfilter/ebt_ulog.c @@ -0,0 +1,295 @@ +/* + * netfilter module for userspace bridged Ethernet frames logging daemons + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * November, 2004 + * + * Based on ipt_ULOG.c, which is + * (C) 2000-2002 by Harald Welte <laforge@netfilter.org> + * + * This module accepts two parameters: + * + * nlbufsiz: + * The parameter specifies how big the buffer for each netlink multicast + * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will + * get accumulated in the kernel until they are sent to userspace. It is + * NOT possible to allocate more than 128kB, and it is strongly discouraged, + * because atomically allocating 128kB inside the network rx softirq is not + * reliable. Please also keep in mind that this buffer size is allocated for + * each nlgroup you are using, so the total kernel memory usage increases + * by that factor. + * + * flushtimeout: + * Specify, after how many hundredths of a second the queue should be + * flushed even if it is not full yet. + * + */ + +#include <linux/module.h> +#include <linux/config.h> +#include <linux/spinlock.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/netlink.h> +#include <linux/netdevice.h> +#include <linux/module.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ulog.h> +#include <net/sock.h> +#include "../br_private.h" + +#define PRINTR(format, args...) do { if (net_ratelimit()) \ + printk(format , ## args); } while (0) + +static unsigned int nlbufsiz = 4096; +module_param(nlbufsiz, uint, 0600); +MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) " + "(defaults to 4096)"); + +static unsigned int flushtimeout = 10; +module_param(flushtimeout, uint, 0600); +MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) " + "(defaults to 10)"); + +typedef struct { + unsigned int qlen; /* number of nlmsgs' in the skb */ + struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ + struct sk_buff *skb; /* the pre-allocated skb */ + struct timer_list timer; /* the timer function */ + spinlock_t lock; /* the per-queue lock */ +} ebt_ulog_buff_t; + +static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; +static struct sock *ebtulognl; + +/* send one ulog_buff_t to userspace */ +static void ulog_send(unsigned int nlgroup) +{ + ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; + + if (timer_pending(&ub->timer)) + del_timer(&ub->timer); + + /* last nlmsg needs NLMSG_DONE */ + if (ub->qlen > 1) + ub->lastnlh->nlmsg_type = NLMSG_DONE; + + NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup; + netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC); + + ub->qlen = 0; + ub->skb = NULL; +} + +/* timer function to flush queue in flushtimeout time */ +static void ulog_timer(unsigned long data) +{ + spin_lock_bh(&ulog_buffers[data].lock); + if (ulog_buffers[data].skb) + ulog_send(data); + spin_unlock_bh(&ulog_buffers[data].lock); +} + +static struct sk_buff *ulog_alloc_skb(unsigned int size) +{ + struct sk_buff *skb; + + skb = alloc_skb(nlbufsiz, GFP_ATOMIC); + if (!skb) { + PRINTR(KERN_ERR "ebt_ulog: can't alloc whole buffer " + "of size %ub!\n", nlbufsiz); + if (size < nlbufsiz) { + /* try to allocate only as much as we need for + * current packet */ + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + PRINTR(KERN_ERR "ebt_ulog: can't even allocate " + "buffer of size %ub\n", size); + } + } + + return skb; +} + +static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + ebt_ulog_packet_msg_t *pm; + size_t size, copy_len; + struct nlmsghdr *nlh; + struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; + unsigned int group = uloginfo->nlgroup; + ebt_ulog_buff_t *ub = &ulog_buffers[group]; + spinlock_t *lock = &ub->lock; + + if ((uloginfo->cprange == 0) || + (uloginfo->cprange > skb->len + ETH_HLEN)) + copy_len = skb->len + ETH_HLEN; + else + copy_len = uloginfo->cprange; + + size = NLMSG_SPACE(sizeof(*pm) + copy_len); + if (size > nlbufsiz) { + PRINTR("ebt_ulog: Size %d needed, but nlbufsiz=%d\n", + size, nlbufsiz); + return; + } + + spin_lock_bh(lock); + + if (!ub->skb) { + if (!(ub->skb = ulog_alloc_skb(size))) + goto alloc_failure; + } else if (size > skb_tailroom(ub->skb)) { + ulog_send(group); + + if (!(ub->skb = ulog_alloc_skb(size))) + goto alloc_failure; + } + + nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, 0, + size - NLMSG_ALIGN(sizeof(*nlh))); + ub->qlen++; + + pm = NLMSG_DATA(nlh); + + /* Fill in the ulog data */ + pm->version = EBT_ULOG_VERSION; + do_gettimeofday(&pm->stamp); + if (ub->qlen == 1) + ub->skb->stamp = pm->stamp; + pm->data_len = copy_len; + pm->mark = skb->nfmark; + pm->hook = hooknr; + if (uloginfo->prefix != NULL) + strcpy(pm->prefix, uloginfo->prefix); + else + *(pm->prefix) = '\0'; + + if (in) { + strcpy(pm->physindev, in->name); + /* If in isn't a bridge, then physindev==indev */ + if (in->br_port) + strcpy(pm->indev, in->br_port->br->dev->name); + else + strcpy(pm->indev, in->name); + } else + pm->indev[0] = pm->physindev[0] = '\0'; + + if (out) { + /* If out exists, then out is a bridge port */ + strcpy(pm->physoutdev, out->name); + strcpy(pm->outdev, out->br_port->br->dev->name); + } else + pm->outdev[0] = pm->physoutdev[0] = '\0'; + + if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) + BUG(); + + if (ub->qlen > 1) + ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; + + ub->lastnlh = nlh; + + if (ub->qlen >= uloginfo->qthreshold) + ulog_send(group); + else if (!timer_pending(&ub->timer)) { + ub->timer.expires = jiffies + flushtimeout * HZ / 100; + add_timer(&ub->timer); + } + +unlock: + spin_unlock_bh(lock); + + return; + +nlmsg_failure: + printk(KERN_CRIT "ebt_ulog: error during NLMSG_PUT. This should " + "not happen, please report to author.\n"); + goto unlock; +alloc_failure: + goto unlock; +} + +static int ebt_ulog_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) || + uloginfo->nlgroup > 31) + return -EINVAL; + + uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; + + if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN) + uloginfo->qthreshold = EBT_ULOG_MAX_QLEN; + + return 0; +} + +static struct ebt_watcher ulog = { + .name = EBT_ULOG_WATCHER, + .watcher = ebt_ulog, + .check = ebt_ulog_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + int i, ret = 0; + + if (nlbufsiz >= 128*1024) { + printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," + " please try a smaller nlbufsiz parameter.\n"); + return -EINVAL; + } + + /* initialize ulog_buffers */ + for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { + init_timer(&ulog_buffers[i].timer); + ulog_buffers[i].timer.function = ulog_timer; + ulog_buffers[i].timer.data = i; + ulog_buffers[i].lock = SPIN_LOCK_UNLOCKED; + } + + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + if (!ebtulognl) + ret = -ENOMEM; + else if ((ret = ebt_register_watcher(&ulog))) + sock_release(ebtulognl->sk_socket); + + return ret; +} + +static void __exit fini(void) +{ + ebt_ulog_buff_t *ub; + int i; + + ebt_unregister_watcher(&ulog); + for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { + ub = &ulog_buffers[i]; + if (timer_pending(&ub->timer)) + del_timer(&ub->timer); + spin_lock_bh(&ub->lock); + if (ub->skb) { + kfree_skb(ub->skb); + ub->skb = NULL; + } + spin_unlock_bh(&ub->lock); + } + sock_release(ebtulognl->sk_socket); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); +MODULE_DESCRIPTION("ebtables userspace logging module for bridged Ethernet" + " frames"); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 00926f0bb3a96a..33dde2be31ba3e 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -90,10 +90,10 @@ static struct ebt_target ebt_standard_target = { {NULL, NULL}, EBT_STANDARD_TARGET, NULL, NULL, NULL, NULL}; static inline int ebt_do_watcher (struct ebt_entry_watcher *w, - const struct sk_buff *skb, const struct net_device *in, + const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out) { - w->u.watcher->watcher(skb, in, out, w->data, + w->u.watcher->watcher(skb, hooknr, in, out, w->data, w->watcher_size); /* watchers don't give a verdict */ return 0; @@ -208,7 +208,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, in, + EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in, out); t = (struct ebt_entry_target *) diff --git a/net/core/dev.c b/net/core/dev.c index 8453a1afb0361f..051353be45c889 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1486,7 +1486,7 @@ int netif_rx_ni(struct sk_buff *skb) preempt_disable(); err = netif_rx(skb); - if (softirq_pending(smp_processor_id())) + if (local_softirq_pending()) do_softirq(); preempt_enable(); @@ -2693,8 +2693,7 @@ static inline void net_set_todo(struct net_device *dev) * chain. 0 is returned on success. A negative errno code is returned * on a failure to set up the device, or if the name is a duplicate. * - * Callers must hold the rtnl semaphore. See the comment at the - * end of Space.c for details about the locking. You may want + * Callers must hold the rtnl semaphore. You may want * register_netdev() instead of this. * * BUGS: @@ -2815,6 +2814,51 @@ out_err: goto out; } +/** + * register_netdev - register a network device + * @dev: device to register + * + * Take a completed network device structure and add it to the kernel + * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier + * chain. 0 is returned on success. A negative errno code is returned + * on a failure to set up the device, or if the name is a duplicate. + * + * This is a wrapper around register_netdev that takes the rtnl semaphore + * and expands the device name if you passed a format string to + * alloc_netdev. + */ +int register_netdev(struct net_device *dev) +{ + int err; + + rtnl_lock(); + + /* + * If the name is a format string the caller wants us to do a + * name allocation. + */ + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto out; + } + + /* + * Back compatibility hook. Kill this one in 2.5 + */ + if (dev->name[0] == 0 || dev->name[0] == ' ') { + err = dev_alloc_name(dev, "eth%d"); + if (err < 0) + goto out; + } + + err = register_netdevice(dev); +out: + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(register_netdev); + /* * netdev_wait_allrefs - wait until all references are gone. * @@ -2958,6 +3002,46 @@ out: } /** + * alloc_netdev - allocate network device + * @sizeof_priv: size of private data to allocate space for + * @name: device name format string + * @setup: callback to initialize device + * + * Allocates a struct net_device with private data area for driver use + * and performs basic initialization. + */ +struct net_device *alloc_netdev(int sizeof_priv, const char *name, + void (*setup)(struct net_device *)) +{ + void *p; + struct net_device *dev; + int alloc_size; + + /* ensure 32-byte alignment of both the device and private area */ + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; + + p = kmalloc(alloc_size, GFP_KERNEL); + if (!p) { + printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); + return NULL; + } + memset(p, 0, alloc_size); + + dev = (struct net_device *) + (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + dev->padded = (char *)dev - (char *)p; + + if (sizeof_priv) + dev->priv = netdev_priv(dev); + + setup(dev); + strcpy(dev->name, name); + return dev; +} +EXPORT_SYMBOL(alloc_netdev); + +/** * free_netdev - free network device * @dev: device * @@ -2999,8 +3083,7 @@ void synchronize_net(void) * from the kernel tables. On success 0 is returned, on a failure * a negative errno code is returned. * - * Callers must hold the rtnl semaphore. See the comment at the - * end of Space.c for details about the locking. You may want + * Callers must hold the rtnl semaphore. You may want * unregister_netdev() instead of this. */ @@ -3078,6 +3161,27 @@ int unregister_netdevice(struct net_device *dev) return 0; } +/** + * unregister_netdev - remove device from the kernel + * @dev: device + * + * This function shuts down a device interface and removes it + * from the kernel tables. On success 0 is returned, on a failure + * a negative errno code is returned. + * + * This is just a wrapper for unregister_netdevice that takes + * the rtnl semaphore. In general you want to use this and not + * unregister_netdevice. + */ +void unregister_netdev(struct net_device *dev) +{ + rtnl_lock(); + unregister_netdevice(dev); + rtnl_unlock(); +} + +EXPORT_SYMBOL(unregister_netdev); + #ifdef CONFIG_HOTPLUG_CPU static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 52641b0a5de8ce..fe2dc396cd4da7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -267,7 +267,22 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *dev; int err, send_addr_notify = 0; - dev = dev_get_by_index(ifm->ifi_index); + if (ifm->ifi_index >= 0) + dev = dev_get_by_index(ifm->ifi_index); + else if (ida[IFLA_IFNAME - 1]) { + char ifname[IFNAMSIZ]; + + if (RTA_PAYLOAD(ida[IFLA_IFNAME - 1]) > RTA_ALIGN(sizeof(ifname))) + return -EINVAL; + + memset(ifname, 0, sizeof(ifname)); + memcpy(ifname, RTA_DATA(ida[IFLA_IFNAME - 1]), + RTA_PAYLOAD(ida[IFLA_IFNAME - 1])); + ifname[IFNAMSIZ - 1] = '\0'; + dev = dev_get_by_name(ifname); + } else + return -EINVAL; + if (!dev) return -ENODEV; @@ -358,10 +373,10 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1])); } - if (ida[IFLA_IFNAME - 1]) { + if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) { char ifname[IFNAMSIZ]; - if (ida[IFLA_IFNAME - 1]->rta_len > RTA_LENGTH(sizeof(ifname))) + if (RTA_PAYLOAD(ida[IFLA_IFNAME - 1]) > RTA_ALIGN(sizeof(ifname))) goto out; memset(ifname, 0, sizeof(ifname)); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3e991428fb3d1c..c83dd4c9b05730 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -163,6 +163,59 @@ nodata: goto out; } +/** + * alloc_skb_from_cache - allocate a network buffer + * @cp: kmem_cache from which to allocate the data area + * (object size must be big enough for @size bytes + skb overheads) + * @size: size to allocate + * @gfp_mask: allocation mask + * + * Allocate a new &sk_buff. The returned buffer has no headroom and + * tail room of size bytes. The object has a reference count of one. + * The return is the buffer. On a failure the return is %NULL. + * + * Buffers may only be allocated from interrupts using a @gfp_mask of + * %GFP_ATOMIC. + */ +struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, + unsigned int size, int gfp_mask) +{ + struct sk_buff *skb; + u8 *data; + + /* Get the HEAD */ + skb = kmem_cache_alloc(skbuff_head_cache, + gfp_mask & ~__GFP_DMA); + if (!skb) + goto out; + + /* Get the DATA. */ + size = SKB_DATA_ALIGN(size); + data = kmem_cache_alloc(cp, gfp_mask); + if (!data) + goto nodata; + + memset(skb, 0, offsetof(struct sk_buff, truesize)); + skb->truesize = size + sizeof(struct sk_buff); + atomic_set(&skb->users, 1); + skb->head = data; + skb->data = data; + skb->tail = data; + skb->end = data + size; + + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->frag_list = NULL; +out: + return skb; +nodata: + kmem_cache_free(skbuff_head_cache, skb); + skb = NULL; + goto out; +} + static void skb_drop_fraglist(struct sk_buff *skb) { diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 21e11b49d74232..f13ba990f41942 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -274,29 +274,6 @@ out: static int fn_hash_last_dflt=-1; -static int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx) -{ - struct neighbour *n; - int state = NUD_NONE; - - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); - if (n) { - state = n->nud_state; - neigh_release(n); - } - if (state==NUD_REACHABLE) - return 0; - if ((state&NUD_VALID) && order != fn_hash_last_dflt) - return 0; - if ((state&NUD_VALID) || - (*last_idx<0 && order > fn_hash_last_dflt)) { - *last_resort = fi; - *last_idx = order; - } - return 1; -} - static void fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { @@ -337,7 +314,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib if (next_fi != res->fi) break; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx)) { + &last_idx, &fn_hash_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; @@ -355,7 +332,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib goto out; } - if (!fib_detect_death(fi, order, &last_resort, &last_idx)) { + if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; @@ -376,11 +353,6 @@ out: read_unlock(&fib_hash_lock); } -static void rtmsg_fib(int, struct fib_node *, struct fib_alias *, - int, int, - struct nlmsghdr *n, - struct netlink_skb_parms *); - /* Insert node F to FZ. */ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) { @@ -404,26 +376,6 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) return NULL; } -/* Return the first fib alias matching TOS with - * priority less than or equal to PRIO. - */ -static struct fib_alias *fib_find_alias(struct fib_node *fn, u8 tos, u32 prio) -{ - if (fn) { - struct list_head *head = &fn->fn_alias; - struct fib_alias *fa; - - list_for_each_entry(fa, head, fa_list) { - if (fa->fa_tos > tos) - continue; - if (fa->fa_info->fib_priority >= prio || - fa->fa_tos < tos) - return fa; - } - } - return NULL; -} - static int fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) @@ -463,7 +415,11 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fn_rehash_zone(fz); f = fib_find_node(fz, key); - fa = fib_find_alias(f, tos, fi->fib_priority); + + if (!f) + fa = NULL; + else + fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority); /* Now fa, if non-NULL, points to the first fib alias * with the same keys [prefix,tos,priority], if such key already @@ -565,7 +521,7 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fz->fz_nent++; rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, f, new_fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); return 0; out_free_new_fa: @@ -603,7 +559,11 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, } f = fib_find_node(fz, key); - fa = fib_find_alias(f, tos, 0); + + if (!f) + fa = NULL; + else + fa = fib_find_alias(&f->fn_alias, tos, 0); if (!fa) return -ESRCH; @@ -631,7 +591,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, int kill_fn; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, f, fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); kill_fn = 0; write_lock_bh(&fib_hash_lock); @@ -796,33 +756,6 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin return skb->len; } -static void rtmsg_fib(int event, struct fib_node *f, struct fib_alias *fa, - int z, int tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req) -{ - struct sk_buff *skb; - u32 pid = req ? req->pid : 0; - int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); - - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) - return; - - if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, - fa->fa_type, fa->fa_scope, &f->fn_key, z, - fa->fa_tos, - fa->fa_info) < 0) { - kfree_skb(skb); - return; - } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; - if (n->nlmsg_flags&NLM_F_ECHO) - atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); - if (n->nlmsg_flags&NLM_F_ECHO) - netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); -} - #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table * fib_hash_init(int id) #else diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index d504a28ce78c03..56debc86eab024 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -30,5 +30,13 @@ extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi); +extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, + int z, int tb_id, + struct nlmsghdr *n, struct netlink_skb_parms *req); +extern struct fib_alias *fib_find_alias(struct list_head *fah, + u8 tos, u32 prio); +extern int fib_detect_death(struct fib_info *fi, int order, + struct fib_info **last_resort, + int *last_idx, int *dflt); #endif /* _FIB_LOOKUP_H */ diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a1349862a8ce71..68d48c21511e81 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -270,6 +270,74 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) return -1; } +void rtmsg_fib(int event, u32 key, struct fib_alias *fa, + int z, int tb_id, + struct nlmsghdr *n, struct netlink_skb_parms *req) +{ + struct sk_buff *skb; + u32 pid = req ? req->pid : 0; + int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return; + + if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, + fa->fa_type, fa->fa_scope, &key, z, + fa->fa_tos, + fa->fa_info) < 0) { + kfree_skb(skb); + return; + } + NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; + if (n->nlmsg_flags&NLM_F_ECHO) + atomic_inc(&skb->users); + netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); + if (n->nlmsg_flags&NLM_F_ECHO) + netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); +} + +/* Return the first fib alias matching TOS with + * priority less than or equal to PRIO. + */ +struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) +{ + if (fah) { + struct fib_alias *fa; + list_for_each_entry(fa, fah, fa_list) { + if (fa->fa_tos > tos) + continue; + if (fa->fa_info->fib_priority >= prio || + fa->fa_tos < tos) + return fa; + } + } + return NULL; +} + +int fib_detect_death(struct fib_info *fi, int order, + struct fib_info **last_resort, int *last_idx, int *dflt) +{ + struct neighbour *n; + int state = NUD_NONE; + + n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); + if (n) { + state = n->nud_state; + neigh_release(n); + } + if (state==NUD_REACHABLE) + return 0; + if ((state&NUD_VALID) && order != *dflt) + return 0; + if ((state&NUD_VALID) || + (*last_idx<0 && order > *dflt)) { + *last_resort = fi; + *last_idx = order; + } + return 1; +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index eb8cf140ef51b0..2d59090cbf6f1d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -504,20 +504,6 @@ config IP_NF_TARGET_SAME To compile it as a module, choose M here. If unsure, say N. -config IP_NF_NAT_LOCAL - bool "NAT of local connections (READ HELP)" - depends on IP_NF_NAT - help - This option enables support for NAT of locally originated connections. - Enable this if you need to use destination NAT on connections - originating from local processes on the nat box itself. - - Please note that you will need a recent version (>= 1.2.6a) - of the iptables userspace program in order to use this feature. - See <http://www.iptables.org/> for download instructions. - - If unsure, say 'N'. - config IP_NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support (EXPERIMENTAL)" depends on EXPERIMENTAL && IP_NF_NAT diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 083f0327cf2789..cc709c436bc941 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -883,6 +883,7 @@ void need_ip_conntrack(void) EXPORT_SYMBOL(ip_conntrack_protocol_register); EXPORT_SYMBOL(ip_conntrack_protocol_unregister); +EXPORT_SYMBOL(ip_ct_get_tuple); EXPORT_SYMBOL(invert_tuplepr); EXPORT_SYMBOL(ip_conntrack_alter_reply); EXPORT_SYMBOL(ip_conntrack_destroyed); diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 4ff337eb1e8172..9b533caddc5d69 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -182,7 +182,6 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple, return 0; } -#ifdef CONFIG_IP_NF_NAT_LOCAL /* If it's really a local destination manip, it may need to do a source manip too. */ static int @@ -202,7 +201,6 @@ do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp) ip_rt_put(rt); return 1; } -#endif /* Simple way to iterate through all. */ static inline int fake_cmp(const struct ip_conntrack *ct, @@ -301,7 +299,6 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, * do_extra_mangle last time. */ *other_ipp = saved_ip; -#ifdef CONFIG_IP_NF_NAT_LOCAL if (hooknum == NF_IP_LOCAL_OUT && *var_ipp != orig_dstip && !do_extra_mangle(*var_ipp, other_ipp)) { @@ -312,7 +309,6 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, * anyway. */ continue; } -#endif /* Count how many others map onto this. */ score = count_maps(tuple->src.ip, tuple->dst.ip, @@ -356,13 +352,11 @@ find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple, else { /* Only do extra mangle when required (breaks socket binding) */ -#ifdef CONFIG_IP_NF_NAT_LOCAL if (tuple->dst.ip != mr->range[0].min_ip && hooknum == NF_IP_LOCAL_OUT && !do_extra_mangle(mr->range[0].min_ip, &tuple->src.ip)) return NULL; -#endif tuple->dst.ip = mr->range[0].min_ip; } } @@ -473,10 +467,8 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, static unsigned int opposite_hook[NF_IP_NUMHOOKS] = { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING, [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING, -#ifdef CONFIG_IP_NF_NAT_LOCAL [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN, [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT, -#endif }; unsigned int @@ -821,6 +813,23 @@ do_bindings(struct ip_conntrack *ct, /* not reached */ } +static inline int tuple_src_equal_dst(const struct ip_conntrack_tuple *t1, + const struct ip_conntrack_tuple *t2) +{ + if (t1->dst.protonum != t2->dst.protonum || t1->src.ip != t2->dst.ip) + return 0; + if (t1->dst.protonum != IPPROTO_ICMP) + return t1->src.u.all == t2->dst.u.all; + else { + struct ip_conntrack_tuple inv; + + /* ICMP tuples are asymetric */ + invert_tuplepr(&inv, t1); + return inv.src.u.all == t2->src.u.all && + inv.dst.u.all == t2->dst.u.all; + } +} + int icmp_reply_translation(struct sk_buff **pskb, struct ip_conntrack *conntrack, @@ -833,6 +842,7 @@ icmp_reply_translation(struct sk_buff **pskb, } *inside; unsigned int i; struct ip_nat_info *info = &conntrack->nat.info; + struct ip_conntrack_tuple *cttuple, innertuple; int hdrlen; if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside))) @@ -876,6 +886,13 @@ icmp_reply_translation(struct sk_buff **pskb, such addresses are not too uncommon, as Alan Cox points out) */ + if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + + sizeof(struct icmphdr) + inside->ip.ihl*4, + &innertuple, + ip_ct_find_proto(inside->ip.protocol))) + return 0; + cttuple = &conntrack->tuplehash[dir].tuple; + READ_LOCK(&ip_nat_lock); for (i = 0; i < info->num_manips; i++) { DEBUGP("icmp_reply: manip %u dir %s hook %u\n", @@ -885,37 +902,52 @@ icmp_reply_translation(struct sk_buff **pskb, if (info->manips[i].direction != dir) continue; - /* Mapping the inner packet is just like a normal - packet, except it was never src/dst reversed, so - where we would normally apply a dst manip, we apply - a src, and vice versa. */ - if (info->manips[i].hooknum == hooknum) { - DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n", - info->manips[i].maniptype == IP_NAT_MANIP_SRC - ? "DST" : "SRC", - NIPQUAD(info->manips[i].manip.ip), - ntohs(info->manips[i].manip.u.udp.port)); - if (!manip_pkt(inside->ip.protocol, pskb, - (*pskb)->nh.iph->ihl*4 - + sizeof(inside->icmp), - &info->manips[i].manip, - !info->manips[i].maniptype)) - goto unlock_fail; + /* Mapping the inner packet is just like a normal packet, except + * it was never src/dst reversed, so where we would normally + * apply a dst manip, we apply a src, and vice versa. */ - /* Outer packet needs to have IP header NATed like - it's a reply. */ + /* Only true for forwarded packets, locally generated packets + * never hit PRE_ROUTING, we need to apply their PRE_ROUTING + * manips in LOCAL_OUT. */ + if (hooknum == NF_IP_LOCAL_OUT && + info->manips[i].hooknum == NF_IP_PRE_ROUTING) + hooknum = info->manips[i].hooknum; - /* Use mapping to map outer packet: 0 give no - per-proto mapping */ - DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n", - info->manips[i].maniptype == IP_NAT_MANIP_SRC - ? "SRC" : "DST", - NIPQUAD(info->manips[i].manip.ip)); - if (!manip_pkt(0, pskb, 0, - &info->manips[i].manip, - info->manips[i].maniptype)) - goto unlock_fail; + if (info->manips[i].hooknum != hooknum) + continue; + + /* ICMP errors may be generated locally for packets that + * don't have all NAT manips applied yet. Verify manips + * have been applied before reversing them */ + if (info->manips[i].maniptype == IP_NAT_MANIP_SRC) { + if (!tuple_src_equal_dst(cttuple, &innertuple)) + continue; + } else { + if (!tuple_src_equal_dst(&innertuple, cttuple)) + continue; } + + DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "DST" : "SRC", NIPQUAD(info->manips[i].manip.ip), + ntohs(info->manips[i].manip.u.udp.port)); + if (!manip_pkt(inside->ip.protocol, pskb, + (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp), + &info->manips[i].manip, + !info->manips[i].maniptype)) + goto unlock_fail; + + /* Outer packet needs to have IP header NATed like + it's a reply. */ + + /* Use mapping to map outer packet: 0 give no + per-proto mapping */ + DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", NIPQUAD(info->manips[i].manip.ip)); + if (!manip_pkt(0, pskb, 0, &info->manips[i].manip, + info->manips[i].maniptype)) + goto unlock_fail; } READ_UNLOCK(&ip_nat_lock); diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 9f727b6309238d..db0bf47bacff14 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -149,12 +149,8 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; -#ifdef CONFIG_IP_NF_NAT_LOCAL IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); -#else - IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING); -#endif ct = ip_conntrack_get(*pskb, &ctinfo); @@ -232,13 +228,6 @@ static int ipt_dnat_checkentry(const char *tablename, return 0; } -#ifndef CONFIG_IP_NF_NAT_LOCAL - if (hook_mask & (1 << NF_IP_LOCAL_OUT)) { - DEBUGP("DNAT: CONFIG_IP_NF_NAT_LOCAL not enabled\n"); - return 0; - } -#endif - return 1; } diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 71bd2e05b0b221..0ef8efffb91bd5 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -128,16 +128,7 @@ ip_nat_fn(unsigned int hooknum, WRITE_LOCK(&ip_nat_lock); /* Seen it before? This can happen for loopback, retrans, or local packets.. */ - if (!(info->initialized & (1 << maniptype)) -#ifndef CONFIG_IP_NF_NAT_LOCAL - /* If this session has already been confirmed we must not - * touch it again even if there is no mapping set up. - * Can only happen on local->local traffic with - * CONFIG_IP_NF_NAT_LOCAL disabled. - */ - && !(ct->status & IPS_CONFIRMED) -#endif - ) { + if (!(info->initialized & (1 << maniptype))) { unsigned int ret; if (ct->master @@ -146,15 +137,14 @@ ip_nat_fn(unsigned int hooknum, ret = call_expect(master_ct(ct), pskb, hooknum, ct, info); } else { -#ifdef CONFIG_IP_NF_NAT_LOCAL /* LOCAL_IN hook doesn't have a chain! */ if (hooknum == NF_IP_LOCAL_IN) ret = alloc_null_binding(ct, info, hooknum); else -#endif - ret = ip_nat_rule_find(pskb, hooknum, in, out, - ct, info); + ret = ip_nat_rule_find(pskb, hooknum, + in, out, ct, + info); } if (ret != NF_ACCEPT) { @@ -180,6 +170,29 @@ ip_nat_fn(unsigned int hooknum, } static unsigned int +ip_nat_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + u_int32_t saddr, daddr; + unsigned int ret; + + saddr = (*pskb)->nh.iph->saddr; + daddr = (*pskb)->nh.iph->daddr; + + ret = ip_nat_fn(hooknum, pskb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN + && ((*pskb)->nh.iph->saddr != saddr + || (*pskb)->nh.iph->daddr != daddr)) { + dst_release((*pskb)->dst); + (*pskb)->dst = NULL; + } + return ret; +} + +static unsigned int ip_nat_out(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, @@ -211,7 +224,6 @@ ip_nat_out(unsigned int hooknum, return ip_nat_fn(hooknum, pskb, in, out, okfn); } -#ifdef CONFIG_IP_NF_NAT_LOCAL static unsigned int ip_nat_local_fn(unsigned int hooknum, struct sk_buff **pskb, @@ -237,13 +249,12 @@ ip_nat_local_fn(unsigned int hooknum, return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; return ret; } -#endif /* We must be after connection tracking and before packet filtering. */ /* Before packet filtering, change destination */ static struct nf_hook_ops ip_nat_in_ops = { - .hook = ip_nat_fn, + .hook = ip_nat_in, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_PRE_ROUTING, @@ -259,7 +270,6 @@ static struct nf_hook_ops ip_nat_out_ops = { .priority = NF_IP_PRI_NAT_SRC, }; -#ifdef CONFIG_IP_NF_NAT_LOCAL /* Before packet filtering, change destination */ static struct nf_hook_ops ip_nat_local_out_ops = { .hook = ip_nat_local_fn, @@ -277,7 +287,6 @@ static struct nf_hook_ops ip_nat_local_in_ops = { .hooknum = NF_IP_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, }; -#endif /* Protocol registration. */ int ip_nat_protocol_register(struct ip_nat_protocol *proto) @@ -334,7 +343,6 @@ static int init_or_cleanup(int init) printk("ip_nat_init: can't register out hook.\n"); goto cleanup_inops; } -#ifdef CONFIG_IP_NF_NAT_LOCAL ret = nf_register_hook(&ip_nat_local_out_ops); if (ret < 0) { printk("ip_nat_init: can't register local out hook.\n"); @@ -345,16 +353,13 @@ static int init_or_cleanup(int init) printk("ip_nat_init: can't register local in hook.\n"); goto cleanup_localoutops; } -#endif return ret; cleanup: -#ifdef CONFIG_IP_NF_NAT_LOCAL nf_unregister_hook(&ip_nat_local_in_ops); cleanup_localoutops: nf_unregister_hook(&ip_nat_local_out_ops); cleanup_outops: -#endif nf_unregister_hook(&ip_nat_out_ops); cleanup_inops: nf_unregister_hook(&ip_nat_in_ops); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2fe4fd4e34cf65..b04297f5ddf861 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -334,6 +334,18 @@ config NET_CLS_IND Requires a new iproute2 You MUST NOT turn this on if you dont have an update iproute2. +config CLS_U32_MARK + bool "Use nfmark as a key in U32 classifier" + depends on NET_CLS_U32 && NETFILTER + help + This allows you to match mark in a u32 filter. + Example: + tc filter add dev eth0 protocol ip parent 1:0 prio 5 u32 \ + match mark 0x0090 0xffff \ + match ip dst 4.4.4.4 \ + flowid 1:90 + You must use a new iproute2 to use this feature. + config NET_CLS_RSVP tristate "Special RSVP classifier" depends on NET_CLS && NET_QOS diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 79b2a400114954..94a05533fa074a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -294,14 +294,16 @@ rtattr_failure: } -int tcf_action_init_1(struct rtattr *rta, struct rtattr *est, struct tc_action *a, char *name, int ovr, int bind ) +struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, + char *name, int ovr, int bind, int *err) { + struct tc_action *a; struct tc_action_ops *a_o; char act_name[4 + IFNAMSIZ + 1]; struct rtattr *tb[TCA_ACT_MAX+1]; struct rtattr *kind = NULL; - int err = -EINVAL; + *err = -EINVAL; if (NULL == name) { if (rtattr_parse(tb, TCA_ACT_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta))<0) @@ -337,22 +339,25 @@ int tcf_action_init_1(struct rtattr *rta, struct rtattr *est, struct tc_action * goto err_out; } - if (NULL == a) { + a = kmalloc(sizeof(*a), GFP_KERNEL); + if (a == NULL) { + *err = -ENOMEM; goto err_mod; } + memset(a, 0, sizeof(*a)); /* backward compatibility for policer */ if (NULL == name) { - err = a_o->init(tb[TCA_ACT_OPTIONS-1], est, a, ovr, bind); - if (0 > err ) { - err = -EINVAL; - goto err_mod; + *err = a_o->init(tb[TCA_ACT_OPTIONS-1], est, a, ovr, bind); + if (*err < 0) { + *err = -EINVAL; + goto err_free; } } else { - err = a_o->init(rta, est, a, ovr, bind); - if (0 > err ) { - err = -EINVAL; - goto err_mod; + *err = a_o->init(rta, est, a, ovr, bind); + if (*err < 0) { + *err = -EINVAL; + goto err_free; } } @@ -360,60 +365,58 @@ int tcf_action_init_1(struct rtattr *rta, struct rtattr *est, struct tc_action * if it exists and is only bound to in a_o->init() then ACT_P_CREATED is not returned (a zero is). */ - if (ACT_P_CREATED != err) { + if (*err != ACT_P_CREATED) module_put(a_o->owner); - } a->ops = a_o; DPRINTK("tcf_action_init_1: successfull %s \n",act_name); - return 0; + *err = 0; + return a; + +err_free: + kfree(a); err_mod: module_put(a_o->owner); err_out: - return err; + return NULL; } -int tcf_action_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, char *name, int ovr , int bind) +struct tc_action *tcf_action_init(struct rtattr *rta, struct rtattr *est, + char *name, int ovr, int bind, int *err) { struct rtattr *tb[TCA_ACT_MAX_PRIO+1]; + struct tc_action *a = NULL, *act, *act_prev = NULL; int i; - struct tc_action *act = a, *a_s = a; - - int err = -EINVAL; - if (rtattr_parse(tb, TCA_ACT_MAX_PRIO, RTA_DATA(rta), RTA_PAYLOAD(rta))<0) - return err; + if (rtattr_parse(tb, TCA_ACT_MAX_PRIO, RTA_DATA(rta), + RTA_PAYLOAD(rta)) < 0) { + *err = -EINVAL; + return a; + } - for (i=0; i < TCA_ACT_MAX_PRIO ; i++) { + for (i=0; i < TCA_ACT_MAX_PRIO; i++) { if (tb[i]) { - if (NULL == act) { - act = kmalloc(sizeof(*act),GFP_KERNEL); - if (NULL == act) { - err = -ENOMEM; - goto bad_ret; - } - memset(act, 0,sizeof(*act)); - } - act->next = NULL; - if (0 > tcf_action_init_1(tb[i],est,act,name,ovr,bind)) { - printk("Error processing action order %d\n",i); - return err; + act = tcf_action_init_1(tb[i], est, name, ovr, bind, err); + if (act == NULL) { + printk("Error processing action order %d\n", i); + goto bad_ret; } act->order = i+1; - if (a_s != act) { - a_s->next = act; - a_s = act; - } - act = NULL; + if (a == NULL) + a = act; + else + act_prev->next = act; + act_prev = act; } } + return a; - return 0; bad_ret: - tcf_action_destroy(a, bind); - return err; + if (a != NULL) + tcf_action_destroy(a, bind); + return NULL; } int tcf_action_copy_stats (struct sk_buff *skb,struct tc_action *a) @@ -849,21 +852,9 @@ static int tcf_action_add(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int o struct tc_action *a = NULL; u32 seq = n->nlmsg_seq; - act = kmalloc(sizeof(*act),GFP_KERNEL); - if (NULL == act) - return -ENOMEM; - - memset(act, 0, sizeof(*act)); - - ret = tcf_action_init(rta, NULL,act,NULL,ovr,0); - /* NOTE: We have an all-or-none model - * This means that of any of the actions fail - * to update then all are undone. - * */ - if (0 > ret) { - tcf_action_destroy(act, 0); + act = tcf_action_init(rta, NULL, NULL, ovr, 0, &ret); + if (act == NULL) goto done; - } /* dump then free all the actions after update; inserted policy * stays intact @@ -880,7 +871,6 @@ static int tcf_action_add(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int o } } done: - return ret; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 0e8505bbbb38a1..984e1f27b1a69c 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -27,6 +27,7 @@ * JHS: We should remove the CONFIG_NET_CLS_IND from here * eventually when the meta match extension is made available * + * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro> */ #include <asm/uaccess.h> @@ -58,6 +59,13 @@ #include <net/pkt_cls.h> +struct tc_u32_mark +{ + __u32 val; + __u32 mask; + __u32 success; +}; + struct tc_u_knode { struct tc_u_knode *next; @@ -79,6 +87,9 @@ struct tc_u_knode #ifdef CONFIG_CLS_U32_PERF struct tc_u32_pcnt *pf; #endif +#ifdef CONFIG_CLS_U32_MARK + struct tc_u32_mark mark; +#endif struct tc_u32_sel sel; }; @@ -139,6 +150,16 @@ next_knode: n->pf->rcnt +=1; j = 0; #endif + +#ifdef CONFIG_CLS_U32_MARK + if ((skb->nfmark & n->mark.mask) != n->mark.val) { + n = n->next; + goto next_knode; + } else { + n->mark.success++; + } +#endif + for (i = n->sel.nkeys; i>0; i--, key++) { if ((*(u32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { @@ -554,6 +575,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, struct tc_u_hnode *ht; struct tc_u_knode *n; struct tc_u32_sel *s; + struct tc_u32_mark *mark; struct rtattr *opt = tca[TCA_OPTIONS-1]; struct rtattr *tb[TCA_U32_MAX]; u32 htid; @@ -657,6 +679,17 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, } n->fshift = i; } + +#ifdef CONFIG_CLS_U32_MARK + if (tb[TCA_U32_MARK-1]) { + if (RTA_PAYLOAD(tb[TCA_U32_MARK-1]) < sizeof(struct tc_u32_mark)) + return -EINVAL; + mark = RTA_DATA(tb[TCA_U32_MARK-1]); + memcpy(&n->mark, mark, sizeof(struct tc_u32_mark)); + n->mark.success = 0; + } +#endif + err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE-1]); if (err == 0) { struct tc_u_knode **ins; @@ -744,6 +777,12 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid); if (n->ht_down) RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle); + +#ifdef CONFIG_CLS_U32_MARK + if (n->mark.val || n->mark.mask) + RTA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark); +#endif + #ifdef CONFIG_NET_CLS_ACT if (tcf_dump_act(skb, n->action, TCA_U32_ACT, TCA_U32_POLICE) < 0) goto rtattr_failure; diff --git a/net/socket.c b/net/socket.c index 102d2737c269ec..45aada9314a1f1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1073,7 +1073,6 @@ int sock_wake_async(struct socket *sock, int how, int band) static int __sock_create(int family, int type, int protocol, struct socket **res, int kern) { - int i; int err; struct socket *sock; @@ -1118,7 +1117,7 @@ static int __sock_create(int family, int type, int protocol, struct socket **res net_family_read_lock(); if (net_families[family] == NULL) { - i = -EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto out; } @@ -1128,10 +1127,9 @@ static int __sock_create(int family, int type, int protocol, struct socket **res * default. */ - if (!(sock = sock_alloc())) - { + if (!(sock = sock_alloc())) { printk(KERN_WARNING "socket: no more sockets\n"); - i = -ENFILE; /* Not exactly a match, but its the + err = -ENFILE; /* Not exactly a match, but its the closest posix thing */ goto out; } @@ -1142,11 +1140,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res * We will call the ->create function, that possibly is in a loadable * module, so we have to bump that loadable module refcnt first. */ - i = -EAFNOSUPPORT; + err = -EAFNOSUPPORT; if (!try_module_get(net_families[family]->owner)) goto out_release; - if ((i = net_families[family]->create(sock, protocol)) < 0) + if ((err = net_families[family]->create(sock, protocol)) < 0) goto out_module_put; /* * Now to bump the refcnt of the [loadable] module that owns this @@ -1166,7 +1164,7 @@ static int __sock_create(int family, int type, int protocol, struct socket **res out: net_family_read_unlock(); - return i; + return err; out_module_put: module_put(net_families[family]->owner); out_release: diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 2d8ee7ce0d8f9b..02d8235d5a555b 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -34,28 +34,19 @@ #include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/timer.h> #include <linux/string.h> -#include <linux/sockios.h> #include <linux/net.h> -#include <linux/stat.h> -#include <linux/inet.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ -#include <linux/mm.h> -#include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/init.h> #include <net/x25.h> @@ -223,14 +214,19 @@ static void x25_insert_socket(struct sock *sk) /* * Find a socket that wants to accept the Call Request we just - * received. + * received. Check the full list for an address/cud match. + * If no cuds match return the next_best thing, an address match. + * Note: if a listening socket has cud set it must only get calls + * with matching cud. */ -static struct sock *x25_find_listener(struct x25_address *addr) +static struct sock *x25_find_listener(struct x25_address *addr, struct x25_calluserdata *calluserdata) { struct sock *s; + struct sock *next_best; struct hlist_node *node; read_lock_bh(&x25_list_lock); + next_best = NULL; sk_for_each(s, node, &x25_list) if ((!strcmp(addr->x25_addr, @@ -238,9 +234,24 @@ static struct sock *x25_find_listener(struct x25_address *addr) !strcmp(addr->x25_addr, null_x25_address.x25_addr)) && s->sk_state == TCP_LISTEN) { - sock_hold(s); - goto found; + + /* + * Found a listening socket, now check the incoming + * call user data vs this sockets call user data + */ + if (x25_check_calluserdata(&x25_sk(s)->calluserdata, calluserdata)) { + sock_hold(s); + goto found; + } + if (x25_sk(s)->calluserdata.cudlength == 0) { + next_best = s; + } } + if (next_best) { + s = next_best; + sock_hold(s); + goto found; + } s = NULL; found: read_unlock_bh(&x25_list_lock); @@ -814,6 +825,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, struct x25_opt *makex25; struct x25_address source_addr, dest_addr; struct x25_facilities facilities; + struct x25_calluserdata calluserdata; int len, rc; /* @@ -828,9 +840,27 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr)); /* - * Find a listener for the particular address. + * Get the length of the facilities, skip past them for the moment + * get the call user data because this is needed to determine + * the correct listener + */ + len = skb->data[0] + 1; + skb_pull(skb,len); + + /* + * Incoming Call User Data. + */ + if (skb->len >= 0) { + memcpy(calluserdata.cuddata, skb->data, skb->len); + calluserdata.cudlength = skb->len; + } + + skb_push(skb,len); + + /* + * Find a listener for the particular address/cud pair. */ - sk = x25_find_listener(&source_addr); + sk = x25_find_listener(&source_addr,&calluserdata); /* * We can't accept the Call Request. @@ -859,7 +889,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, goto out_sock_put; /* - * Remove the facilities, leaving any Call User Data. + * Remove the facilities */ skb_pull(skb, len); @@ -873,17 +903,10 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, makex25->neighbour = nb; makex25->facilities = facilities; makex25->vc_facil_mask = x25_sk(sk)->vc_facil_mask; + makex25->calluserdata = calluserdata; x25_write_internal(make, X25_CALL_ACCEPTED); - /* - * Incoming Call User Data. - */ - if (skb->len >= 0) { - memcpy(makex25->calluserdata.cuddata, skb->data, skb->len); - makex25->calluserdata.cudlength = skb->len; - } - makex25->state = X25_STATE_3; sk->sk_ack_backlog++; diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index c288dd8f5f4810..aabda59c824e79 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -5,7 +5,6 @@ * Added /proc/sys/net/x25 directory entry (empty =) ). [MS] */ -#include <linux/mm.h> #include <linux/sysctl.h> #include <linux/skbuff.h> #include <linux/socket.h> diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 99af8425fbfce3..88e704e70043d7 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -18,29 +18,10 @@ */ #include <linux/config.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/stat.h> -#include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/fcntl.h> -#include <linux/termios.h> /* For TIOCINQ/OUTQ */ -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> -#include <linux/proc_fs.h> #include <linux/if_arp.h> #include <net/x25.h> diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 4463bb0d738af9..1a25f40e93d2c4 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -19,24 +19,10 @@ * negotiation. */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/timer.h> #include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/inet.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/system.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> #include <net/x25.h> /* diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 802bc1ae252bc1..7b7697c4b0ef20 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -24,25 +24,11 @@ */ #include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/timer.h> #include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/inet.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <net/ip.h> /* For ip_rcv */ #include <net/tcp.h> -#include <asm/system.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> #include <net/x25.h> static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 118146629c3a7e..a2b9b71d46599b 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -21,25 +21,12 @@ * 2000-09-04 Henner Eisen dev_hold() / dev_put() for x25_neigh. */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> -#include <net/sock.h> -#include <asm/system.h> #include <asm/uaccess.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> #include <linux/init.h> #include <net/x25.h> diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c index ed6b6d84b3d0d6..1a0d3e46974d6a 100644 --- a/net/x25/x25_out.c +++ b/net/x25/x25_out.c @@ -22,24 +22,11 @@ * needed cleaned seq-number fields. */ -#include <linux/errno.h> -#include <linux/types.h> #include <linux/socket.h> -#include <linux/in.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/timer.h> #include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/inet.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/system.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> #include <net/x25.h> static int x25_pacsize_to_bytes(unsigned int pacsize) diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index c40d4c5d46a1c8..fd9b13029642ae 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -21,25 +21,11 @@ * jun/24/01 Arnaldo C. Melo use skb_queue_purge, cleanups */ -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/timer.h> #include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/inet.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp.h> -#include <asm/system.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> #include <net/x25.h> /* @@ -367,3 +353,22 @@ void x25_check_rbuf(struct sock *sk) x25_stop_timer(sk); } } + +/* + * Compare 2 calluserdata structures, used to find correct listening sockets + * when call user data is used. + */ +int x25_check_calluserdata(struct x25_calluserdata *ours, struct x25_calluserdata *theirs) +{ + int i; + if (ours->cudlength != theirs->cudlength) + return 0; + + for (i=0;i<ours->cudlength;i++) { + if (ours->cuddata[i] != theirs->cuddata[i]) { + return 0; + } + } + return 1; +} + diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index 32e3f0a03b8962..949faa7f6b92f8 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -20,24 +20,10 @@ */ #include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp.h> -#include <asm/system.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/interrupt.h> #include <net/x25.h> static void x25_heartbeat_expiry(unsigned long); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0bc92ef6f85115..70ba1fc61d9fad 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -549,6 +549,8 @@ void xfrm_policy_delete(struct xfrm_policy *pol, int dir) } } +EXPORT_SYMBOL(xfrm_policy_delete); + int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct xfrm_policy *old_pol; |