GIT d6e17ba387733d7caaaac742f640edc1a5f232d0 master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6.14.git --- diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -135,3 +135,15 @@ Why: With the 16-bit PCMCIA subsystem no pcmciautils package available at http://kernel.org/pub/linux/utils/kernel/pcmcia/ Who: Dominik Brodowski + +--------------------------- + +What: ip_queue and ip6_queue (old ipv4-only and ipv6-only netfilter queue) +When: December 2005 +Why: This interface has been obsoleted by the new layer3-independent + "nfnetlink_queue". The Kernel interface is compatible, so the old + ip[6]tables "QUEUE" targets still work and will transparently handle + all packets into nfnetlink queue number 0. Userspace users will have + to link against API-compatible library on top of libnfnetlink_queue + instead of the current 'libipq'. +Who: Harald Welte diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -214,8 +214,7 @@ static int __devinit ns_init_card(int i, static void __devinit ns_init_card_error(ns_dev *card, int error); static scq_info *get_scq(int size, u32 scd); static void free_scq(scq_info *scq, struct atm_vcc *vcc); -static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, - u32 handle2, u32 addr2); +static void push_rxbufs(ns_dev *, struct sk_buff *); static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs); static int ns_open(struct atm_vcc *vcc); static void ns_close(struct atm_vcc *vcc); @@ -766,6 +765,7 @@ static int __devinit ns_init_card(int i, ns_init_card_error(card, error); return error; } + NS_SKB_CB(hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; } @@ -786,9 +786,10 @@ static int __devinit ns_init_card(int i, ns_init_card_error(card, error); return error; } + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); /* Due to the implementation of push_rxbufs() this is 1, not 0 */ if (j == 1) { @@ -822,9 +823,10 @@ static int __devinit ns_init_card(int i, ns_init_card_error(card, error); return error; } + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } /* Test for strange behaviour which leads to crashes */ if ((bcount = ns_stat_sfbqc_get(readl(card->membase + STAT))) < card->sbnr.min) @@ -852,6 +854,7 @@ static int __devinit ns_init_card(int i, ns_init_card_error(card, error); return error; } + NS_SKB_CB(iovb)->buf_type = BUF_NONE; skb_queue_tail(&card->iovpool.queue, iovb); card->iovpool.count++; } @@ -1078,12 +1081,18 @@ static void free_scq(scq_info *scq, stru /* The handles passed must be pointers to the sk_buff containing the small or large buffer(s) cast to u32. */ -static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, - u32 handle2, u32 addr2) +static void push_rxbufs(ns_dev *card, struct sk_buff *skb) { + struct ns_skb_cb *cb = NS_SKB_CB(skb); + u32 handle1, addr1; + u32 handle2, addr2; u32 stat; unsigned long flags; + /* *BARF* */ + handle2 = addr2 = 0; + handle1 = (u32)skb; + addr1 = (u32)virt_to_bus(skb->data); #ifdef GENERAL_DEBUG if (!addr1) @@ -1093,7 +1102,7 @@ static void push_rxbufs(ns_dev *card, u3 stat = readl(card->membase + STAT); card->sbfqc = ns_stat_sfbqc_get(stat); card->lbfqc = ns_stat_lfbqc_get(stat); - if (type == BUF_SM) + if (cb->buf_type == BUF_SM) { if (!addr2) { @@ -1111,7 +1120,7 @@ static void push_rxbufs(ns_dev *card, u3 } } } - else /* type == BUF_LG */ + else /* buf_type == BUF_LG */ { if (!addr2) { @@ -1132,26 +1141,26 @@ static void push_rxbufs(ns_dev *card, u3 if (addr2) { - if (type == BUF_SM) + if (cb->buf_type == BUF_SM) { if (card->sbfqc >= card->sbnr.max) { - skb_unlink((struct sk_buff *) handle1); + skb_unlink((struct sk_buff *) handle1, &card->sbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle1); - skb_unlink((struct sk_buff *) handle2); + skb_unlink((struct sk_buff *) handle2, &card->sbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle2); return; } else card->sbfqc += 2; } - else /* (type == BUF_LG) */ + else /* (buf_type == BUF_LG) */ { if (card->lbfqc >= card->lbnr.max) { - skb_unlink((struct sk_buff *) handle1); + skb_unlink((struct sk_buff *) handle1, &card->lbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle1); - skb_unlink((struct sk_buff *) handle2); + skb_unlink((struct sk_buff *) handle2, &card->lbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle2); return; } @@ -1166,12 +1175,12 @@ static void push_rxbufs(ns_dev *card, u3 writel(handle2, card->membase + DR2); writel(addr1, card->membase + DR1); writel(handle1, card->membase + DR0); - writel(NS_CMD_WRITE_FREEBUFQ | (u32) type, card->membase + CMD); + writel(NS_CMD_WRITE_FREEBUFQ | cb->buf_type, card->membase + CMD); spin_unlock_irqrestore(&card->res_lock, flags); XPRINTK("nicstar%d: Pushing %s buffers at 0x%x and 0x%x.\n", card->index, - (type == BUF_SM ? "small" : "large"), addr1, addr2); + (cb->buf_type == BUF_SM ? "small" : "large"), addr1, addr2); } if (!card->efbie && card->sbfqc >= card->sbnr.min && @@ -1322,9 +1331,10 @@ static irqreturn_t ns_irq_handler(int ir card->efbie = 0; break; } + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } card->sbfqc = i; process_rsq(card); @@ -1348,9 +1358,10 @@ static irqreturn_t ns_irq_handler(int ir card->efbie = 0; break; } + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } card->lbfqc = i; process_rsq(card); @@ -2227,6 +2238,7 @@ static void dequeue_rx(ns_dev *card, ns_ recycle_rx_buf(card, skb); return; } + NS_SKB_CB(iovb)->buf_type = BUF_NONE; } else if (--card->iovpool.count < card->iovnr.min) @@ -2234,6 +2246,7 @@ static void dequeue_rx(ns_dev *card, ns_ struct sk_buff *new_iovb; if ((new_iovb = alloc_skb(NS_IOVBUFSIZE, GFP_ATOMIC)) != NULL) { + NS_SKB_CB(iovb)->buf_type = BUF_NONE; skb_queue_tail(&card->iovpool.queue, new_iovb); card->iovpool.count++; } @@ -2264,7 +2277,7 @@ static void dequeue_rx(ns_dev *card, ns_ if (NS_SKB(iovb)->iovcnt == 1) { - if (skb->list != &card->sbpool.queue) + if (NS_SKB_CB(skb)->buf_type != BUF_SM) { printk("nicstar%d: Expected a small buffer, and this is not one.\n", card->index); @@ -2278,7 +2291,7 @@ static void dequeue_rx(ns_dev *card, ns_ } else /* NS_SKB(iovb)->iovcnt >= 2 */ { - if (skb->list != &card->lbpool.queue) + if (NS_SKB_CB(skb)->buf_type != BUF_LG) { printk("nicstar%d: Expected a large buffer, and this is not one.\n", card->index); @@ -2322,8 +2335,7 @@ static void dequeue_rx(ns_dev *card, ns_ /* skb points to a small buffer */ if (!atm_charge(vcc, skb->truesize)) { - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); + push_rxbufs(card, skb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2350,8 +2362,7 @@ static void dequeue_rx(ns_dev *card, ns_ { if (!atm_charge(vcc, sb->truesize)) { - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2367,16 +2378,14 @@ static void dequeue_rx(ns_dev *card, ns_ atomic_inc(&vcc->stats->rx); } - push_rxbufs(card, BUF_LG, (u32) skb, - (u32) virt_to_bus(skb->data), 0, 0); + push_rxbufs(card, skb); } else /* len > NS_SMBUFSIZE, the usual case */ { if (!atm_charge(vcc, skb->truesize)) { - push_rxbufs(card, BUF_LG, (u32) skb, - (u32) virt_to_bus(skb->data), 0, 0); + push_rxbufs(card, skb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2394,8 +2403,7 @@ static void dequeue_rx(ns_dev *card, ns_ atomic_inc(&vcc->stats->rx); } - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); } @@ -2430,6 +2438,7 @@ static void dequeue_rx(ns_dev *card, ns_ card->hbpool.count++; } } + NS_SKB_CB(hb)->buf_type = BUF_NONE; } else if (--card->hbpool.count < card->hbnr.min) @@ -2437,6 +2446,7 @@ static void dequeue_rx(ns_dev *card, ns_ struct sk_buff *new_hb; if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { + NS_SKB_CB(new_hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; } @@ -2444,6 +2454,7 @@ static void dequeue_rx(ns_dev *card, ns_ { if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { + NS_SKB_CB(new_hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; } @@ -2473,8 +2484,7 @@ static void dequeue_rx(ns_dev *card, ns_ remaining = len - iov->iov_len; iov++; /* Free the small buffer */ - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); /* Copy all large buffers to the huge buffer and free them */ for (j = 1; j < NS_SKB(iovb)->iovcnt; j++) @@ -2485,8 +2495,7 @@ static void dequeue_rx(ns_dev *card, ns_ skb_put(hb, tocopy); iov++; remaining -= tocopy; - push_rxbufs(card, BUF_LG, (u32) lb, - (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } #ifdef EXTRA_DEBUG if (remaining != 0 || hb->len != len) @@ -2527,9 +2536,10 @@ static void ns_sb_destructor(struct sk_b sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) break; + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } while (card->sbfqc < card->sbnr.min); } @@ -2550,9 +2560,10 @@ static void ns_lb_destructor(struct sk_b lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) break; + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } while (card->lbfqc < card->lbnr.min); } @@ -2569,6 +2580,7 @@ static void ns_hb_destructor(struct sk_b hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) break; + NS_SKB_CB(hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; } @@ -2577,45 +2589,25 @@ static void ns_hb_destructor(struct sk_b #endif /* NS_USE_DESTRUCTORS */ - static void recycle_rx_buf(ns_dev *card, struct sk_buff *skb) { - if (skb->list == &card->sbpool.queue) - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0); - else if (skb->list == &card->lbpool.queue) - push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0); - else - { - printk("nicstar%d: What kind of rx buffer is this?\n", card->index); - dev_kfree_skb_any(skb); - } -} + struct ns_skb_cb *cb = NS_SKB_CB(skb); + if (unlikely(cb->buf_type == BUF_NONE)) { + printk("nicstar%d: What kind of rx buffer is this?\n", card->index); + dev_kfree_skb_any(skb); + } else + push_rxbufs(card, skb); +} static void recycle_iovec_rx_bufs(ns_dev *card, struct iovec *iov, int count) { - struct sk_buff *skb; - - for (; count > 0; count--) - { - skb = (struct sk_buff *) (iov++)->iov_base; - if (skb->list == &card->sbpool.queue) - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); - else if (skb->list == &card->lbpool.queue) - push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); - else - { - printk("nicstar%d: What kind of rx buffer is this?\n", card->index); - dev_kfree_skb_any(skb); - } - } + while (count-- > 0) + recycle_rx_buf(card, (struct sk_buff *) (iov++)->iov_base); } - static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb) { if (card->iovpool.count < card->iovnr.max) @@ -2631,7 +2623,7 @@ static void recycle_iov_buf(ns_dev *card static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) { - skb_unlink(sb); + skb_unlink(sb, &card->sbpool.queue); #ifdef NS_USE_DESTRUCTORS if (card->sbfqc < card->sbnr.min) #else @@ -2640,10 +2632,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *new_sb; if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) { + NS_SKB_CB(new_sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, new_sb); skb_reserve(new_sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) new_sb, - (u32) virt_to_bus(new_sb->data), 0, 0); + push_rxbufs(card, new_sb); } } if (card->sbfqc < card->sbnr.init) @@ -2652,10 +2644,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *new_sb; if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) { + NS_SKB_CB(new_sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, new_sb); skb_reserve(new_sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) new_sb, - (u32) virt_to_bus(new_sb->data), 0, 0); + push_rxbufs(card, new_sb); } } } @@ -2664,7 +2656,7 @@ static void dequeue_sm_buf(ns_dev *card, static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) { - skb_unlink(lb); + skb_unlink(lb, &card->lbpool.queue); #ifdef NS_USE_DESTRUCTORS if (card->lbfqc < card->lbnr.min) #else @@ -2673,10 +2665,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *new_lb; if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) { + NS_SKB_CB(new_lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, new_lb); skb_reserve(new_lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) new_lb, - (u32) virt_to_bus(new_lb->data), 0, 0); + push_rxbufs(card, new_lb); } } if (card->lbfqc < card->lbnr.init) @@ -2685,10 +2677,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *new_lb; if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) { + NS_SKB_CB(new_lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, new_lb); skb_reserve(new_lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) new_lb, - (u32) virt_to_bus(new_lb->data), 0, 0); + push_rxbufs(card, new_lb); } } } @@ -2880,9 +2872,10 @@ static int ns_ioctl(struct atm_dev *dev, sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) return -ENOMEM; + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } break; @@ -2894,9 +2887,10 @@ static int ns_ioctl(struct atm_dev *dev, lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) return -ENOMEM; + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } break; @@ -2923,6 +2917,7 @@ static int ns_ioctl(struct atm_dev *dev, hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) return -ENOMEM; + NS_SKB_CB(hb)->buf_type = BUF_NONE; ns_grab_int_lock(card, flags); skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; @@ -2953,6 +2948,7 @@ static int ns_ioctl(struct atm_dev *dev, iovb = alloc_skb(NS_IOVBUFSIZE, GFP_KERNEL); if (iovb == NULL) return -ENOMEM; + NS_SKB_CB(iovb)->buf_type = BUF_NONE; ns_grab_int_lock(card, flags); skb_queue_tail(&card->iovpool.queue, iovb); card->iovpool.count++; @@ -2979,17 +2975,12 @@ static int ns_ioctl(struct atm_dev *dev, } - static void which_list(ns_dev *card, struct sk_buff *skb) { - printk("It's a %s buffer.\n", skb->list == &card->sbpool.queue ? - "small" : skb->list == &card->lbpool.queue ? "large" : - skb->list == &card->hbpool.queue ? "huge" : - skb->list == &card->iovpool.queue ? "iovec" : "unknown"); + printk("skb buf_type: 0x%08x\n", NS_SKB_CB(skb)->buf_type); } - static void ns_poll(unsigned long arg) { int i; diff --git a/drivers/atm/nicstar.h b/drivers/atm/nicstar.h --- a/drivers/atm/nicstar.h +++ b/drivers/atm/nicstar.h @@ -103,8 +103,14 @@ #define NS_IOREMAP_SIZE 4096 -#define BUF_SM 0x00000000 /* These two are used for push_rxbufs() */ -#define BUF_LG 0x00000001 /* CMD, Write_FreeBufQ, LBUF bit */ +/* + * BUF_XX distinguish the Rx buffers depending on their (small/large) size. + * BUG_SM and BUG_LG are both used by the driver and the device. + * BUF_NONE is only used by the driver. + */ +#define BUF_SM 0x00000000 /* These two are used for push_rxbufs() */ +#define BUF_LG 0x00000001 /* CMD, Write_FreeBufQ, LBUF bit */ +#define BUF_NONE 0xffffffff /* Software only: */ #define NS_HBUFSIZE 65568 /* Size of max. AAL5 PDU */ #define NS_MAX_IOVECS (2 + (65568 - NS_SMBUFSIZE) / \ @@ -684,6 +690,12 @@ enum ns_regs /* Device driver structures ***************************************************/ +struct ns_skb_cb { + u32 buf_type; /* BUF_SM/BUF_LG/BUF_NONE */ +}; + +#define NS_SKB_CB(skb) ((struct ns_skb_cb *)((skb)->cb)) + typedef struct tsq_info { void *org; diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -417,10 +417,12 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy chan = (here[3] & uPD98401_AAL5_CHAN) >> uPD98401_AAL5_CHAN_SHIFT; if (chan < zatm_dev->chans && zatm_dev->rx_map[chan]) { + int pos = ZATM_VCC(vcc)->pool; + vcc = zatm_dev->rx_map[chan]; - if (skb == zatm_dev->last_free[ZATM_VCC(vcc)->pool]) - zatm_dev->last_free[ZATM_VCC(vcc)->pool] = NULL; - skb_unlink(skb); + if (skb == zatm_dev->last_free[pos]) + zatm_dev->last_free[pos] = NULL; + skb_unlink(skb, zatm_dev->pool + pos); } else { printk(KERN_ERR DEV_LABEL "(itf %d): RX indication " diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -120,7 +120,7 @@ aoenet_xmit(struct sk_buff *sl) * (1) len doesn't include the header by default. I want this. */ static int -aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) +aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) { struct aoe_hdr *h; u32 n; diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -158,7 +158,7 @@ static int bfusb_send_bulk(struct bfusb if (err) { BT_ERR("%s bulk tx submit failed urb %p err %d", bfusb->hdev->name, urb, err); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); usb_free_urb(urb); } else atomic_inc(&bfusb->pending_tx); @@ -212,7 +212,7 @@ static void bfusb_tx_complete(struct urb read_lock(&bfusb->lock); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); skb_queue_tail(&bfusb->completed_q, skb); bfusb_tx_wakeup(bfusb); @@ -253,7 +253,7 @@ static int bfusb_rx_submit(struct bfusb if (err) { BT_ERR("%s bulk rx submit failed urb %p err %d", bfusb->hdev->name, urb, err); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); kfree_skb(skb); usb_free_urb(urb); } @@ -398,7 +398,7 @@ static void bfusb_rx_complete(struct urb buf += len; } - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); kfree_skb(skb); bfusb_rx_submit(bfusb, urb); diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c @@ -681,7 +681,7 @@ static void handle_packet_response(struc return; } - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &host->pending_packet_queue); if (packet->state == hpsb_queued) { packet->sendtime = jiffies; @@ -989,7 +989,7 @@ void abort_timedouts(unsigned long __opa packet = (struct hpsb_packet *)skb->data; if (time_before(packet->sendtime + expire, jiffies)) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &host->pending_packet_queue); packet->state = hpsb_complete; packet->ack_code = ACKX_TIMEOUT; queue_packet_complete(packet); diff --git a/drivers/isdn/act2000/capi.c b/drivers/isdn/act2000/capi.c --- a/drivers/isdn/act2000/capi.c +++ b/drivers/isdn/act2000/capi.c @@ -606,7 +606,7 @@ handle_ack(act2000_card *card, act2000_c if ((((m->msg.data_b3_req.fakencci >> 8) & 0xff) == chan->ncci) && (m->msg.data_b3_req.blocknr == blocknr)) { /* found corresponding DATA_B3_REQ */ - skb_unlink(tmp); + skb_unlink(tmp, &card->ackq); chan->queued -= m->msg.data_b3_req.datalen; if (m->msg.data_b3_req.flags) ret = m->msg.data_b3_req.datalen; diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -1786,7 +1786,6 @@ isdn_net_receive(struct net_device *ndev lp->stats.rx_bytes += skb->len; } skb->dev = ndev; - skb->input_dev = ndev; skb->pkt_type = PACKET_HOST; skb->mac.raw = skb->data; #ifdef ISDN_DEBUG_NET_DUMP diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -1177,7 +1177,6 @@ isdn_ppp_push_higher(isdn_net_dev * net_ mlp->huptimer = 0; #endif /* CONFIG_IPPP_FILTER */ skb->dev = dev; - skb->input_dev = dev; skb->mac.raw = skb->data; netif_rx(skb); /* net_dev->local->stats.rx_packets++; done in isdn_net.c */ diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2419,22 +2419,19 @@ out: return 0; } -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype) +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev) { struct bonding *bond = dev->priv; struct slave *slave = NULL; int ret = NET_RX_DROP; - if (!(dev->flags & IFF_MASTER)) { + if (!(dev->flags & IFF_MASTER)) goto out; - } read_lock(&bond->lock); - slave = bond_get_slave_by_dev((struct bonding *)dev->priv, - skb->real_dev); - if (slave == NULL) { + slave = bond_get_slave_by_dev((struct bonding *)dev->priv, orig_dev); + if (!slave) goto out_unlock; - } bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -295,6 +295,6 @@ void bond_3ad_adapter_duplex_changed(str void bond_3ad_handle_link_change(struct slave *slave, char link); int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype); +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev); #endif //__BOND_3AD_H__ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -354,15 +354,14 @@ static void rlb_update_entry_from_arp(st _unlock_rx_hashtbl(bond); } -static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype) +static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev) { struct bonding *bond = bond_dev->priv; struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; - if (!(bond_dev->flags & IFF_MASTER)) { + if (!(bond_dev->flags & IFF_MASTER)) goto out; - } if (!arp) { dprintk("Packet has no ARP data\n"); diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -98,7 +98,7 @@ static char bcast_addr[6]={0xFF,0xFF,0xF static char bpq_eth_addr[6]; -static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); static const char *bpq_print_ethaddr(const unsigned char *); @@ -165,7 +165,7 @@ static inline int dev_is_ethdev(struct n /* * Receive an AX.25 frame via an ethernet interface. */ -static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) +static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len; char * ptr; diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1657,7 +1657,6 @@ ppp_receive_nonmp_frame(struct ppp *ppp, skb->dev = ppp->dev; skb->protocol = htons(npindex_to_ethertype[npi]); skb->mac.raw = skb->data; - skb->input_dev = ppp->dev; netif_rx(skb); ppp->dev->last_rx = jiffies; } diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -377,7 +377,8 @@ abort_kfree: ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, + struct net_device *orig_dev) { struct pppoe_hdr *ph; @@ -426,7 +427,8 @@ out: ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, + struct net_device *orig_dev) { struct pppoe_hdr *ph; diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c --- a/drivers/net/rrunner.c +++ b/drivers/net/rrunner.c @@ -1429,6 +1429,7 @@ static int rr_start_xmit(struct sk_buff { struct rr_private *rrpriv = netdev_priv(dev); struct rr_regs __iomem *regs = rrpriv->regs; + struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; struct ring_ctrl *txctrl; unsigned long flags; u32 index, len = skb->len; @@ -1460,7 +1461,7 @@ static int rr_start_xmit(struct sk_buff ifield = (u32 *)skb_push(skb, 8); ifield[0] = 0; - ifield[1] = skb->private.ifield; + ifield[1] = hcb->ifield; /* * We don't need the lock before we are actually going to start diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c @@ -156,52 +156,6 @@ static int shaper_start_xmit(struct sk_b SHAPERCB(skb)->shapelen= shaper_clocks(shaper,skb); -#ifdef SHAPER_COMPLEX /* and broken.. */ - - while(ptr && ptr!=(struct sk_buff *)&shaper->sendq) - { - if(ptr->pripri - && jiffies - SHAPERCB(ptr)->shapeclock < SHAPER_MAXSLIP) - { - struct sk_buff *tmp=ptr->prev; - - /* - * It goes before us therefore we slip the length - * of the new frame. - */ - - SHAPERCB(ptr)->shapeclock+=SHAPERCB(skb)->shapelen; - SHAPERCB(ptr)->shapelatency+=SHAPERCB(skb)->shapelen; - - /* - * The packet may have slipped so far back it - * fell off. - */ - if(SHAPERCB(ptr)->shapelatency > SHAPER_LATENCY) - { - skb_unlink(ptr); - dev_kfree_skb(ptr); - } - ptr=tmp; - } - else - break; - } - if(ptr==NULL || ptr==(struct sk_buff *)&shaper->sendq) - skb_queue_head(&shaper->sendq,skb); - else - { - struct sk_buff *tmp; - /* - * Set the packet clock out time according to the - * frames ahead. Im sure a bit of thought could drop - * this loop. - */ - for(tmp=skb_peek(&shaper->sendq); tmp!=NULL && tmp!=ptr; tmp=tmp->next) - SHAPERCB(skb)->shapeclock+=tmp->shapelen; - skb_append(ptr,skb); - } -#else { struct sk_buff *tmp; /* @@ -220,7 +174,7 @@ static int shaper_start_xmit(struct sk_b } else skb_queue_tail(&shaper->sendq, skb); } -#endif + if(sh_debug) printk("Frame queued.\n"); if(skb_queue_len(&shaper->sendq)>SHAPER_QLEN) @@ -302,7 +256,7 @@ static void shaper_kick(struct shaper *s * Pull the frame and get interrupts back on. */ - skb_unlink(skb); + skb_unlink(skb, &shaper->sendq); if (shaper->recovery < SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen) shaper->recovery = SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen; diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c --- a/drivers/net/wan/hdlc_generic.c +++ b/drivers/net/wan/hdlc_generic.c @@ -61,7 +61,7 @@ static struct net_device_stats *hdlc_get static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *p) + struct packet_type *p, struct net_device *orig_dev) { hdlc_device *hdlc = dev_to_hdlc(dev); if (hdlc->proto.netif_rx) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -86,7 +86,7 @@ static __inline__ int dev_is_ethdev(stru /* * Receive a LAPB frame via an ethernet interface. */ -static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) +static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len, err; struct lapbethdev *lapbeth; diff --git a/drivers/net/wan/sdla_fr.c b/drivers/net/wan/sdla_fr.c --- a/drivers/net/wan/sdla_fr.c +++ b/drivers/net/wan/sdla_fr.c @@ -445,7 +445,7 @@ void s508_s514_unlock(sdla_t *card, uns void s508_s514_lock(sdla_t *card, unsigned long *smp_flags); unsigned short calc_checksum (char *, int); -static int setup_fr_header(struct sk_buff** skb, +static int setup_fr_header(struct sk_buff *skb, struct net_device* dev, char op_mode); @@ -1372,7 +1372,7 @@ static int if_send(struct sk_buff* skb, /* Move the if_header() code to here. By inserting frame * relay header in if_header() we would break the * tcpdump and other packet sniffers */ - chan->fr_header_len = setup_fr_header(&skb,dev,chan->common.usedby); + chan->fr_header_len = setup_fr_header(skb,dev,chan->common.usedby); if (chan->fr_header_len < 0 ){ ++chan->ifstats.tx_dropped; ++card->wandev.stats.tx_dropped; @@ -1597,8 +1597,6 @@ static int setup_for_delayed_transmit(st return 1; } - skb_unlink(skb); - chan->transmit_length = len; chan->delay_skb = skb; @@ -4871,18 +4869,15 @@ static void unconfig_fr (sdla_t *card) } } -static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, +static int setup_fr_header(struct sk_buff *skb, struct net_device* dev, char op_mode) { - struct sk_buff *skb = *skb_orig; fr_channel_t *chan=dev->priv; - if (op_mode == WANPIPE){ - + if (op_mode == WANPIPE) { chan->fr_header[0]=Q922_UI; switch (htons(skb->protocol)){ - case ETH_P_IP: chan->fr_header[1]=NLPID_IP; break; @@ -4894,16 +4889,14 @@ static int setup_fr_header(struct sk_buf } /* If we are in bridging mode, we must apply - * an Ethernet header */ - if (op_mode == BRIDGE || op_mode == BRIDGE_NODE){ - - + * an Ethernet header + */ + if (op_mode == BRIDGE || op_mode == BRIDGE_NODE) { /* Encapsulate the packet as a bridged Ethernet frame. */ #ifdef DEBUG printk(KERN_INFO "%s: encapsulating skb for frame relay\n", dev->name); #endif - chan->fr_header[0] = 0x03; chan->fr_header[1] = 0x00; chan->fr_header[2] = 0x80; @@ -4916,7 +4909,6 @@ static int setup_fr_header(struct sk_buf /* Yuck. */ skb->protocol = ETH_P_802_3; return 8; - } return 0; diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -1447,7 +1447,7 @@ static void sppp_print_bytes (u_char *p, * after interrupt servicing to process frames queued via netif_rx. */ -static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p) +static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c --- a/drivers/usb/net/usbnet.c +++ b/drivers/usb/net/usbnet.c @@ -2903,19 +2903,18 @@ static struct net_device_stats *usbnet_g * completion callbacks. 2.5 should have fixed those bugs... */ -static void defer_bh (struct usbnet *dev, struct sk_buff *skb) +static void defer_bh(struct usbnet *dev, struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; unsigned long flags; - spin_lock_irqsave (&list->lock, flags); - __skb_unlink (skb, list); - spin_unlock (&list->lock); - spin_lock (&dev->done.lock); - __skb_queue_tail (&dev->done, skb); + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock(&list->lock); + spin_lock(&dev->done.lock); + __skb_queue_tail(&dev->done, skb); if (dev->done.qlen == 1) - tasklet_schedule (&dev->bh); - spin_unlock_irqrestore (&dev->done.lock, flags); + tasklet_schedule(&dev->bh); + spin_unlock_irqrestore(&dev->done.lock, flags); } /* some work can't be done in tasklets, so we use keventd @@ -3120,7 +3119,7 @@ block: break; } - defer_bh (dev, skb); + defer_bh(dev, skb, &dev->rxq); if (urb) { if (netif_running (dev->net) @@ -3490,7 +3489,7 @@ static void tx_complete (struct urb *urb urb->dev = NULL; entry->state = tx_done; - defer_bh (dev, skb); + defer_bh(dev, skb, &dev->txq); } /*-------------------------------------------------------------------------*/ diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u dev->groups = 23; dev->seq = 1; - dev->nls = netlink_kernel_create(NETLINK_W1, NULL); + dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); if (!dev->nls) { printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", NETLINK_NFLOG, dev->dev.bus_id); @@ -225,3 +225,5 @@ void w1_remove_master_device(struct w1_b EXPORT_SYMBOL(w1_add_master_device); EXPORT_SYMBOL(w1_remove_master_device); + +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_W1); diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h @@ -25,6 +25,8 @@ #define SO_ERROR 0x1007 #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_RCVLOWAT 0x1010 #define SO_SNDLOWAT 0x1011 #define SO_RCVTIMEO 0x1012 diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h --- a/include/asm-arm/socket.h +++ b/include/asm-arm/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h --- a/include/asm-arm26/socket.h +++ b/include/asm-arm26/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h --- a/include/asm-cris/socket.h +++ b/include/asm-cris/socket.h @@ -16,6 +16,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h --- a/include/asm-frv/socket.h +++ b/include/asm-frv/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h --- a/include/asm-h8300/socket.h +++ b/include/asm-h8300/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h --- a/include/asm-ia64/socket.h +++ b/include/asm-ia64/socket.h @@ -23,6 +23,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h --- a/include/asm-m32r/socket.h +++ b/include/asm-m32r/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h --- a/include/asm-m68k/socket.h +++ b/include/asm-m68k/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h @@ -37,6 +37,8 @@ To add: #define SO_REUSEPORT 0x0200 /* A #define SO_ERROR 0x1007 /* get error status and clear */ #define SO_SNDBUF 0x1001 /* Send buffer size. */ #define SO_RCVBUF 0x1002 /* Receive buffer. */ +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_SNDLOWAT 0x1003 /* send low-water mark */ #define SO_RCVLOWAT 0x1004 /* receive low-water mark */ #define SO_SNDTIMEO 0x1005 /* send timeout */ diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h --- a/include/asm-parisc/socket.h +++ b/include/asm-parisc/socket.h @@ -16,6 +16,8 @@ /* To add :#define SO_REUSEPORT 0x0200 */ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_SNDLOWAT 0x1003 #define SO_RCVLOWAT 0x1004 #define SO_SNDTIMEO 0x1005 diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h --- a/include/asm-ppc/socket.h +++ b/include/asm-ppc/socket.h @@ -20,6 +20,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h --- a/include/asm-ppc64/socket.h +++ b/include/asm-ppc64/socket.h @@ -21,6 +21,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h --- a/include/asm-s390/socket.h +++ b/include/asm-s390/socket.h @@ -22,6 +22,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h --- a/include/asm-sh/socket.h +++ b/include/asm-sh/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_RCVBUFFORCE 32 +#define SO_SNDBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h --- a/include/asm-sparc/socket.h +++ b/include/asm-sparc/socket.h @@ -29,6 +29,8 @@ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_ERROR 0x1007 #define SO_TYPE 0x1008 diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h --- a/include/asm-sparc64/socket.h +++ b/include/asm-sparc64/socket.h @@ -29,6 +29,8 @@ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_ERROR 0x1007 #define SO_TYPE 0x1008 diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h --- a/include/asm-v850/socket.h +++ b/include/asm-v850/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h --- a/include/asm-xtensa/socket.h +++ b/include/asm-xtensa/socket.h @@ -24,6 +24,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -26,6 +26,11 @@ #include #ifdef __KERNEL__ + +struct hippi_cb { + __u32 ifield; +}; + extern unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -155,7 +155,6 @@ static inline int __vlan_hwaccel_rx(stru { struct net_device_stats *stats; - skb->real_dev = skb->dev; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { dev_kfree_skb_any(skb); diff --git a/include/linux/net.h b/include/linux/net.h --- a/include/linux/net.h +++ b/include/linux/net.h @@ -282,5 +282,8 @@ static struct proto_ops name##_ops = { #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) +#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ + MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -497,10 +497,12 @@ static inline void *netdev_priv(struct n #define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev)) struct packet_type { - __be16 type; /* This is really htons(ether_type). */ - struct net_device *dev; /* NULL is wildcarded here */ - int (*func) (struct sk_buff *, struct net_device *, - struct packet_type *); + __be16 type; /* This is really htons(ether_type). */ + struct net_device *dev; /* NULL is wildcarded here */ + int (*func) (struct sk_buff *, + struct net_device *, + struct packet_type *, + struct net_device *); void *af_packet_priv; struct list_head list; }; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -21,10 +21,23 @@ #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP +/* we overload the higher bits for encoding auxiliary data such as the queue + * number. Not nice, but better than additional function arguments. */ +#define NF_VERDICT_MASK 0x0000ffff +#define NF_VERDICT_BITS 16 + +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK || NF_QUEUE) + +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* Generic cache responses from hook functions. <= 0x2000 is used for protocol-flags. */ #define NFC_UNKNOWN 0x4000 #define NFC_ALTERED 0x8000 +#endif #ifdef __KERNEL__ #include @@ -176,10 +189,12 @@ int nf_getsockopt(struct sock *sk, int p /* Packet queuing */ typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, void *data); + struct nf_info *info, + unsigned int queuenum, void *data); extern int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data); extern int nf_unregister_queue_handler(int pf); +extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); @@ -190,6 +205,22 @@ extern void nf_ct_attach(struct sk_buff /* FIXME: Before cache is ever used, this must be implemented for real. */ extern void nf_invalidate_cache(int pf); +/* Call this before modifying an existing packet: ensures it is + modifiable and linear to the point you care about (writable_len). + Returns true or false. */ +extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); + +struct nf_queue_rerouter { + void (*save)(const struct sk_buff *skb, struct nf_info *info); + int (*reroute)(struct sk_buff **skb, const struct nf_info *info); + int rer_size; +}; + +#define nf_info_reroute(x) ((void *)x + sizeof(struct nf_info)) + +extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); +extern int nf_unregister_queue_rerouter(int pf); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h new file mode 100644 --- /dev/null +++ b/include/linux/netfilter/nfnetlink.h @@ -0,0 +1,148 @@ +#ifndef _NFNETLINK_H +#define _NFNETLINK_H +#include + +/* nfnetlink groups: Up to 32 maximum */ +#define NF_NETLINK_CONNTRACK_NEW 0x00000001 +#define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 +#define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 +#define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 +#define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 +#define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 + +/* Generic structure for encapsulation optional netfilter information. + * It is reminiscent of sockaddr, but with sa_family replaced + * with attribute type. + * ! This should someday be put somewhere generic as now rtnetlink and + * ! nfnetlink use the same attributes methods. - J. Schulist. + */ + +struct nfattr +{ + u_int16_t nfa_len; + u_int16_t nfa_type; +} __attribute__ ((packed)); + +/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time + * to put this in a generic file */ + +#define NFA_ALIGNTO 4 +#define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) +#define NFA_OK(nfa,len) ((len) > 0 && (nfa)->nfa_len >= sizeof(struct nfattr) \ + && (nfa)->nfa_len <= (len)) +#define NFA_NEXT(nfa,attrlen) ((attrlen) -= NFA_ALIGN((nfa)->nfa_len), \ + (struct nfattr *)(((char *)(nfa)) + NFA_ALIGN((nfa)->nfa_len))) +#define NFA_LENGTH(len) (NFA_ALIGN(sizeof(struct nfattr)) + (len)) +#define NFA_SPACE(len) NFA_ALIGN(NFA_LENGTH(len)) +#define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0))) +#define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) +#define NFA_NEST(skb, type) \ +({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ + NFA_PUT(skb, type, 0, NULL); \ + __start; }) +#define NFA_NEST_END(skb, start) \ +({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ + (skb)->len; }) +#define NFA_NEST_CANCEL(skb, start) \ +({ if (start) \ + skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ + -1; }) + +/* General form of address family dependent message. + */ +struct nfgenmsg { + u_int8_t nfgen_family; /* AF_xxx */ + u_int8_t version; /* nfnetlink version */ + u_int16_t res_id; /* resource id */ +} __attribute__ ((packed)); + +#define NFNETLINK_V0 0 + +#define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \ + + NLMSG_ALIGN(sizeof(struct nfgenmsg)))) +#define NFM_PAYLOAD(n) NLMSG_PAYLOAD(n, sizeof(struct nfgenmsg)) + +/* netfilter netlink message types are split in two pieces: + * 8 bit subsystem, 8bit operation. + */ + +#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) +#define NFNL_MSG_TYPE(x) (x & 0x00ff) + +/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() + * won't work anymore */ +#define NFNL_SUBSYS_NONE 0 +#define NFNL_SUBSYS_CTNETLINK 1 +#define NFNL_SUBSYS_CTNETLINK_EXP 2 +#define NFNL_SUBSYS_QUEUE 3 +#define NFNL_SUBSYS_ULOG 4 +#define NFNL_SUBSYS_COUNT 5 + +#ifdef __KERNEL__ + +#include +#include + +struct nfnl_callback +{ + kernel_cap_t cap_required; /* capabilities required for this msg */ + int (*call)(struct sock *nl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); +}; + +struct nfnetlink_subsystem +{ + const char *name; + __u8 subsys_id; /* nfnetlink subsystem ID */ + __u8 cb_count; /* number of callbacks */ + u_int32_t attr_count; /* number of nfattr's */ + struct nfnl_callback *cb; /* callback for individual types */ +}; + +extern void __nfa_fill(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +#define NFA_PUT(skb, attrtype, attrlen, data) \ +({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \ + __nfa_fill(skb, attrtype, attrlen, data); }) + +extern struct semaphore nfnl_sem; + +#define nfnl_shlock() down(&nfnl_sem) +#define nfnl_shlock_nowait() down_trylock(&nfnl_sem) + +#define nfnl_shunlock() do { up(&nfnl_sem); \ + if(nfnl && nfnl->sk_receive_queue.qlen) \ + nfnl->sk_data_ready(nfnl, 0); \ + } while(0) + +extern void nfnl_lock(void); +extern void nfnl_unlock(void); + +extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n); +extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n); + +extern int nfattr_parse(struct nfattr *tb[], int maxattr, + struct nfattr *nfa, int len); + +#define nfattr_parse_nested(tb, max, nfa) \ + nfattr_parse((tb), (max), NFA_DATA((nfa)), NFA_PAYLOAD((nfa))) + +#define nfattr_bad_size(tb, max, cta_min) \ +({ int __i, __res = 0; \ + for (__i=0; __i + +enum cntl_msg_types { + IPCTNL_MSG_CT_NEW, + IPCTNL_MSG_CT_GET, + IPCTNL_MSG_CT_DELETE, + IPCTNL_MSG_CT_GET_CTRZERO, + + IPCTNL_MSG_MAX +}; + +enum ctnl_exp_msg_types { + IPCTNL_MSG_EXP_NEW, + IPCTNL_MSG_EXP_GET, + IPCTNL_MSG_EXP_DELETE, + + IPCTNL_MSG_EXP_MAX +}; + + +enum ctattr_type { + CTA_UNSPEC, + CTA_TUPLE_ORIG, + CTA_TUPLE_REPLY, + CTA_STATUS, + CTA_PROTOINFO, + CTA_HELP, + CTA_NAT, + CTA_TIMEOUT, + CTA_MARK, + CTA_COUNTERS_ORIG, + CTA_COUNTERS_REPLY, + CTA_USE, + CTA_EXPECT, + CTA_ID, + __CTA_MAX +}; +#define CTA_MAX (__CTA_MAX - 1) + +enum ctattr_tuple { + CTA_TUPLE_UNSPEC, + CTA_TUPLE_IP, + CTA_TUPLE_PROTO, + __CTA_TUPLE_MAX +}; +#define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1) + +enum ctattr_ip { + CTA_IP_UNSPEC, + CTA_IP_V4_SRC, + CTA_IP_V4_DST, + CTA_IP_V6_SRC, + CTA_IP_V6_DST, + __CTA_IP_MAX +}; +#define CTA_IP_MAX (__CTA_IP_MAX - 1) + +enum ctattr_l4proto { + CTA_PROTO_UNSPEC, + CTA_PROTO_NUM, + CTA_PROTO_SRC_PORT, + CTA_PROTO_DST_PORT, + CTA_PROTO_ICMP_ID, + CTA_PROTO_ICMP_TYPE, + CTA_PROTO_ICMP_CODE, + __CTA_PROTO_MAX +}; +#define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1) + +enum ctattr_protoinfo { + CTA_PROTOINFO_UNSPEC, + CTA_PROTOINFO_TCP_STATE, + __CTA_PROTOINFO_MAX +}; +#define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) + +enum ctattr_counters { + CTA_COUNTERS_UNSPEC, + CTA_COUNTERS_PACKETS, + CTA_COUNTERS_BYTES, + __CTA_COUNTERS_MAX +}; +#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) + +enum ctattr_nat { + CTA_NAT_UNSPEC, + CTA_NAT_MINIP, + CTA_NAT_MAXIP, + CTA_NAT_PROTO, + __CTA_NAT_MAX +}; +#define CTA_NAT_MAX (__CTA_NAT_MAX - 1) + +enum ctattr_protonat { + CTA_PROTONAT_UNSPEC, + CTA_PROTONAT_PORT_MIN, + CTA_PROTONAT_PORT_MAX, + __CTA_PROTONAT_MAX +}; +#define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) + +enum ctattr_expect { + CTA_EXPECT_UNSPEC, + CTA_EXPECT_TUPLE, + CTA_EXPECT_MASK, + CTA_EXPECT_TIMEOUT, + CTA_EXPECT_ID, + __CTA_EXPECT_MAX +}; +#define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) + +enum ctattr_help { + CTA_HELP_UNSPEC, + CTA_HELP_NAME, + __CTA_HELP_MAX +}; +#define CTA_HELP_MAX (__CTA_HELP_MAX - 1) + +#define CTA_HELP_MAXNAMESIZE 32 + +#endif /* _IPCONNTRACK_NETLINK_H */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h new file mode 100644 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -0,0 +1,85 @@ +#ifndef _NFNETLINK_QUEUE_H +#define _NFNETLINK_QUEUE_H + +#include + +enum nfqnl_msg_types { + NFQNL_MSG_PACKET, /* packet from kernel to userspace */ + NFQNL_MSG_VERDICT, /* verdict from userspace to kernel */ + NFQNL_MSG_CONFIG, /* connect to a particular queue */ + + NFQNL_MSG_MAX +}; + +struct nfqnl_msg_packet_hdr { + u_int32_t packet_id; /* unique ID of packet in queue */ + u_int16_t hw_protocol; /* hw protocol (network order) */ + u_int8_t hook; /* netfilter hook */ +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_hw { + u_int16_t hw_addrlen; + u_int16_t _pad; + u_int8_t hw_addr[8]; +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_timestamp { + u_int64_t sec; + u_int64_t usec; +} __attribute__ ((packed)); + +enum nfqnl_attr_type { + NFQA_UNSPEC, + NFQA_PACKET_HDR, + NFQA_VERDICT_HDR, /* nfqnl_msg_verdict_hrd */ + NFQA_MARK, /* u_int32_t nfmark */ + NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ + NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFQA_HWADDR, /* nfqnl_msg_packet_hw */ + NFQA_PAYLOAD, /* opaque data payload */ + + __NFQA_MAX +}; +#define NFQA_MAX (__NFQA_MAX - 1) + +struct nfqnl_msg_verdict_hdr { + u_int32_t verdict; + u_int32_t id; +} __attribute__ ((packed)); + + +enum nfqnl_msg_config_cmds { + NFQNL_CFG_CMD_NONE, + NFQNL_CFG_CMD_BIND, + NFQNL_CFG_CMD_UNBIND, + NFQNL_CFG_CMD_PF_BIND, + NFQNL_CFG_CMD_PF_UNBIND, +}; + +struct nfqnl_msg_config_cmd { + u_int8_t command; /* nfqnl_msg_config_cmds */ + u_int8_t _pad; + u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ +} __attribute__ ((packed)); + +enum nfqnl_config_mode { + NFQNL_COPY_NONE, + NFQNL_COPY_META, + NFQNL_COPY_PACKET, +}; + +struct nfqnl_msg_config_params { + u_int32_t copy_range; + u_int8_t copy_mode; /* enum nfqnl_config_mode */ +} __attribute__ ((packed)); + + +enum nfqnl_attr_config { + NFQA_CFG_UNSPEC, + NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */ + NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ + __NFQA_CFG_MAX +}; + +#endif /* _NFNETLINK_QUEUE_H */ diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h @@ -9,6 +9,8 @@ #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_DN_SRC 0x0001 @@ -18,6 +20,7 @@ #define NFC_DN_IF_IN 0x0004 /* Output device. */ #define NFC_DN_IF_OUT 0x0008 +#endif /* ! __KERNEL__ */ /* DECnet Hooks */ /* After promisc drops, checksum checks. */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -8,6 +8,8 @@ #include #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_IP_SRC 0x0001 @@ -35,6 +37,7 @@ #define NFC_IP_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP Hooks */ /* After promisc drops, checksum checks. */ @@ -77,11 +80,6 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); -/* Call this before modifying an existing IP packet: ensures it is - modifiable and linear to the point you care about (writable_len). - Returns true or false. */ -extern int skb_ip_make_writable(struct sk_buff **pskb, - unsigned int writable_len); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -65,6 +65,63 @@ enum ip_conntrack_status { /* Both together */ IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), + + /* Connection is dying (removed from lists), can not be unset. */ + IPS_DYING_BIT = 9, + IPS_DYING = (1 << IPS_DYING_BIT), +}; + +/* Connection tracking event bits */ +enum ip_conntrack_events +{ + /* New conntrack */ + IPCT_NEW_BIT = 0, + IPCT_NEW = (1 << IPCT_NEW_BIT), + + /* Expected connection */ + IPCT_RELATED_BIT = 1, + IPCT_RELATED = (1 << IPCT_RELATED_BIT), + + /* Destroyed conntrack */ + IPCT_DESTROY_BIT = 2, + IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), + + /* Timer has been refreshed */ + IPCT_REFRESH_BIT = 3, + IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), + + /* Status has changed */ + IPCT_STATUS_BIT = 4, + IPCT_STATUS = (1 << IPCT_STATUS_BIT), + + /* Update of protocol info */ + IPCT_PROTOINFO_BIT = 5, + IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), + + /* Volatile protocol info */ + IPCT_PROTOINFO_VOLATILE_BIT = 6, + IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), + + /* New helper for conntrack */ + IPCT_HELPER_BIT = 7, + IPCT_HELPER = (1 << IPCT_HELPER_BIT), + + /* Update of helper info */ + IPCT_HELPINFO_BIT = 8, + IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), + + /* Volatile helper info */ + IPCT_HELPINFO_VOLATILE_BIT = 9, + IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), + + /* NAT info */ + IPCT_NATINFO_BIT = 10, + IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW_BIT = 0, + IPEXP_NEW = (1 << IPEXP_NEW_BIT), }; #ifdef __KERNEL__ @@ -152,6 +209,9 @@ struct ip_conntrack /* Current number of expected connections */ unsigned int expecting; + /* Unique ID that identifies this conntrack*/ + unsigned int id; + /* Helper, if any. */ struct ip_conntrack_helper *helper; @@ -171,7 +231,7 @@ struct ip_conntrack #endif /* CONFIG_IP_NF_NAT_NEEDED */ #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - unsigned long mark; + u_int32_t mark; #endif /* Traversed often, so hopefully in different cacheline to top */ @@ -200,6 +260,9 @@ struct ip_conntrack_expect /* Usage count. */ atomic_t use; + /* Unique ID */ + unsigned int id; + #ifdef CONFIG_IP_NF_NAT_NEEDED /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ @@ -239,7 +302,12 @@ ip_conntrack_get(const struct sk_buff *s } /* decrement reference count on a conntrack */ -extern inline void ip_conntrack_put(struct ip_conntrack *ct); +static inline void +ip_conntrack_put(struct ip_conntrack *ct) +{ + IP_NF_ASSERT(ct); + nf_conntrack_put(&ct->ct_general); +} /* call to create an explicit dependency on ip_conntrack. */ extern void need_ip_conntrack(void); @@ -274,12 +342,50 @@ extern void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data), void *data); +extern struct ip_conntrack_helper * +__ip_conntrack_helper_find_byname(const char *); +extern struct ip_conntrack_helper * +ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple); +extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper); + +extern struct ip_conntrack_protocol * +__ip_conntrack_proto_find(u_int8_t protocol); +extern struct ip_conntrack_protocol * +ip_conntrack_proto_find_get(u_int8_t protocol); +extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto); + +extern void ip_ct_remove_expectations(struct ip_conntrack *ct); + +extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *, + struct ip_conntrack_tuple *); + +extern void ip_conntrack_free(struct ip_conntrack *ct); + +extern void ip_conntrack_hash_insert(struct ip_conntrack *ct); + +extern struct ip_conntrack_expect * +__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); + +extern struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple); + +extern struct ip_conntrack_tuple_hash * +__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +extern void ip_conntrack_flush(void); + /* It's confirmed if it is, or has been in the hash table. */ static inline int is_confirmed(struct ip_conntrack *ct) { return test_bit(IPS_CONFIRMED_BIT, &ct->status); } +static inline int is_dying(struct ip_conntrack *ct) +{ + return test_bit(IPS_DYING_BIT, &ct->status); +} + extern unsigned int ip_conntrack_htable_size; struct ip_conntrack_stat @@ -303,6 +409,88 @@ struct ip_conntrack_stat #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +#include + +struct ip_conntrack_ecache { + struct ip_conntrack *ct; + unsigned int events; +}; +DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); + +#define CONNTRACK_ECACHE(x) (__get_cpu_var(ip_conntrack_ecache).x) + +extern struct notifier_block *ip_conntrack_chain; +extern struct notifier_block *ip_conntrack_expect_chain; + +static inline int ip_conntrack_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&ip_conntrack_chain, nb); +} + +static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ip_conntrack_chain, nb); +} + +static inline int +ip_conntrack_expect_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&ip_conntrack_expect_chain, nb); +} + +static inline int +ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); +} + +static inline void +ip_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) +{ + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { + if (net_ratelimit()) { + printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); + dump_stack(); + } + } + ecache->events |= event; +} + +extern void +ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct); +extern void ip_conntrack_event_cache_init(const struct sk_buff *skb); + +static inline void ip_conntrack_event(enum ip_conntrack_events event, + struct ip_conntrack *ct) +{ + if (is_confirmed(ct) && !is_dying(ct)) + notifier_call_chain(&ip_conntrack_chain, event, ct); +} + +static inline void +ip_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct ip_conntrack_expect *exp) +{ + notifier_call_chain(&ip_conntrack_expect_chain, event, exp); +} +#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */ +static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) {} +static inline void ip_conntrack_event(enum ip_conntrack_events event, + struct ip_conntrack *ct) {} +static inline void ip_conntrack_deliver_cached_events_for( + struct ip_conntrack *ct) {} +static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {} +static inline void +ip_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct ip_conntrack_expect *exp) {} +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + #ifdef CONFIG_IP_NF_NAT_NEEDED static inline int ip_nat_initialized(struct ip_conntrack *conntrack, enum ip_nat_manip_type manip) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -2,6 +2,9 @@ #define _IP_CONNTRACK_CORE_H #include +#define MAX_IP_CT_PROTO 256 +extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; + /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ @@ -38,12 +41,23 @@ extern int __ip_conntrack_confirm(struct /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int ip_conntrack_confirm(struct sk_buff **pskb) { - if ((*pskb)->nfct - && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct)) - return __ip_conntrack_confirm(pskb); - return NF_ACCEPT; + struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; + int ret = NF_ACCEPT; + + if (ct && !is_confirmed(ct)) + ret = __ip_conntrack_confirm(pskb); + ip_conntrack_deliver_cached_events_for(ct); + + return ret; } +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +struct ip_conntrack_ecache; +extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); +#endif + +extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); + extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; extern rwlock_t ip_conntrack_lock; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -24,6 +24,8 @@ struct ip_conntrack_helper int (*help)(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); + + int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); }; extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -2,6 +2,7 @@ #ifndef _IP_CONNTRACK_PROTOCOL_H #define _IP_CONNTRACK_PROTOCOL_H #include +#include struct seq_file; @@ -47,22 +48,22 @@ struct ip_conntrack_protocol int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum); + /* convert protoinfo to nfnetink attributes */ + int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct); + + int (*tuple_to_nfattr)(struct sk_buff *skb, + const struct ip_conntrack_tuple *t); + int (*nfattr_to_tuple)(struct nfattr *tb[], + struct ip_conntrack_tuple *t); + /* Module (if any) which this is connected to. */ struct module *me; }; -#define MAX_IP_CT_PROTO 256 -extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; - /* Protocol registration. */ extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); - -static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) -{ - return ip_ct_protos[protocol]; -} - /* Existing built-in protocols */ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; @@ -73,6 +74,11 @@ extern int ip_conntrack_protocol_tcp_ini /* Log invalid packets */ extern unsigned int ip_ct_log_invalid; +extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *, + const struct ip_conntrack_tuple *); +extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *); + #ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS #define LOG_INVALID(proto) \ diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h --- a/include/linux/netfilter_ipv4/ip_nat_protocol.h +++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h @@ -4,6 +4,9 @@ #include #include +#include +#include + struct iphdr; struct ip_nat_range; @@ -15,6 +18,8 @@ struct ip_nat_protocol /* Protocol number. */ unsigned int protonum; + struct module *me; + /* Translate a packet to the target according to manip type. Return true if succeeded. */ int (*manip_pkt)(struct sk_buff **pskb, @@ -43,19 +48,20 @@ struct ip_nat_protocol unsigned int (*print_range)(char *buffer, const struct ip_nat_range *range); -}; -#define MAX_IP_NAT_PROTO 256 -extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; + int (*range_to_nfattr)(struct sk_buff *skb, + const struct ip_nat_range *range); + + int (*nfattr_to_range)(struct nfattr *tb[], + struct ip_nat_range *range); +}; /* Protocol registration. */ extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); -static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol) -{ - return ip_nat_protos[protocol]; -} +extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol); +extern void ip_nat_proto_put(struct ip_nat_protocol *proto); /* Built-in protocols. */ extern struct ip_nat_protocol ip_nat_protocol_tcp; @@ -67,4 +73,9 @@ extern int init_protocols(void) __init; extern void cleanup_protocols(void); extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); +extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range); +extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[], + struct ip_nat_range *range); + #endif /*_IP_NAT_PROTO_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h new file mode 100644 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h @@ -0,0 +1,16 @@ +/* iptables module for using NFQUEUE mechanism + * + * (C) 2005 Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * +*/ +#ifndef _IPT_NFQ_TARGET_H +#define _IPT_NFQ_TARGET_H + +/* target info */ +struct ipt_NFQ_info { + u_int16_t queuenum; +}; + +#endif /* _IPT_DSCP_TARGET_H */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -10,6 +10,8 @@ #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_IP6_SRC 0x0001 @@ -38,6 +40,7 @@ #define NFC_IP6_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP6_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP6 Hooks */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -119,7 +119,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); +extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -164,7 +164,6 @@ struct skb_shared_info { * @stamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on - * @real_dev: The real device we are using * @h: Transport layer header * @nh: Network layer header * @mac: Link layer header @@ -190,14 +189,11 @@ struct skb_shared_info { * @end: End pointer * @destructor: Destruct function * @nfmark: Can be used for communication between hooks - * @nfcache: Cache info * @nfct: Associated connection, if any * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c - * @private: Data which is private to the HIPPI implementation * @tc_index: Traffic control index * @tc_verd: traffic control verdict - * @tc_classid: traffic control classid */ struct sk_buff { @@ -205,12 +201,10 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; - struct sk_buff_head *list; struct sock *sk; struct timeval stamp; struct net_device *dev; struct net_device *input_dev; - struct net_device *real_dev; union { struct tcphdr *th; @@ -252,33 +246,27 @@ struct sk_buff { __u8 local_df:1, cloned:1, ip_summed:2, - nohdr:1; - /* 3 bits spare */ + nohdr:1, + nfctinfo:3; __u8 pkt_type; __u16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER - unsigned long nfmark; - __u32 nfcache; - __u32 nfctinfo; + __u32 nfmark; struct nf_conntrack *nfct; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + __u8 ipvs_property:1; +#endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif #endif /* CONFIG_NETFILTER */ -#if defined(CONFIG_HIPPI) - union { - __u32 ifield; - } private; -#endif #ifdef CONFIG_NET_SCHED - __u32 tc_index; /* traffic control index */ + __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT - __u32 tc_verd; /* traffic control verdict */ - __u32 tc_classid; /* traffic control classid */ + __u16 tc_verd; /* traffic control verdict */ #endif - #endif @@ -597,7 +585,6 @@ static inline void __skb_queue_head(stru { struct sk_buff *prev, *next; - newsk->list = list; list->qlen++; prev = (struct sk_buff *)list; next = prev->next; @@ -622,7 +609,6 @@ static inline void __skb_queue_tail(stru { struct sk_buff *prev, *next; - newsk->list = list; list->qlen++; next = (struct sk_buff *)list; prev = next->prev; @@ -655,7 +641,6 @@ static inline struct sk_buff *__skb_dequ next->prev = prev; prev->next = next; result->next = result->prev = NULL; - result->list = NULL; } return result; } @@ -664,7 +649,7 @@ static inline struct sk_buff *__skb_dequ /* * Insert a packet on a list. */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk); +extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) @@ -672,24 +657,23 @@ static inline void __skb_insert(struct s newsk->next = next; newsk->prev = prev; next->prev = prev->next = newsk; - newsk->list = list; list->qlen++; } /* * Place a packet after a given packet in a list. */ -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk); -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) +extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { - __skb_insert(newsk, old, old->next, old->list); + __skb_insert(newsk, old, old->next, list); } /* * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. */ -extern void skb_unlink(struct sk_buff *skb); +extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; @@ -698,7 +682,6 @@ static inline void __skb_unlink(struct s next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; - skb->list = NULL; next->prev = prev; prev->next = next; } diff --git a/include/net/act_api.h b/include/net/act_api.h --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -63,7 +63,7 @@ struct tc_action_ops __u32 type; /* TBD to match kind */ __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; - int (*act)(struct sk_buff **, struct tc_action *); + int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *); int (*get_stats)(struct sk_buff *, struct tc_action *); int (*dump)(struct sk_buff *, struct tc_action *,int , int); int (*cleanup)(struct tc_action *, int bind); diff --git a/include/net/arp.h b/include/net/arp.h --- a/include/net/arp.h +++ b/include/net/arp.h @@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl; extern void arp_init(void); extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); extern int arp_ioctl(unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, u32 dest_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -302,7 +302,7 @@ extern int ax25_protocol_is_registered( /* ax25_in.c */ extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); -extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); /* ax25_ip.c */ extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); diff --git a/include/net/datalink.h b/include/net/datalink.h --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -9,7 +9,7 @@ struct datalink_proto { unsigned short header_length; int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, unsigned char *); struct list_head node; diff --git a/include/net/ip.h b/include/net/ip.h --- a/include/net/ip.h +++ b/include/net/ip.h @@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct u32 saddr, u32 daddr, struct ip_options *opt); extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); extern int ip_output(struct sk_buff *skb); @@ -140,8 +140,6 @@ struct ip_reply_arg { void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -extern int ip_finish_output(struct sk_buff *skb); - struct ipv4_config { int log_martians; diff --git a/include/net/ipv6.h b/include/net/ipv6.h --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const st extern int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); /* * upper-layer output functions diff --git a/include/net/llc.h b/include/net/llc.h --- a/include/net/llc.h +++ b/include/net/llc.h @@ -46,7 +46,8 @@ struct llc_sap { unsigned char f_bit; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); struct llc_addr laddr; struct list_head node; struct { @@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock; extern unsigned char llc_station_mac_sa[ETH_ALEN]; extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int llc_mac_hdr_init(struct sk_buff *skb, unsigned char *sa, unsigned char *da); @@ -78,7 +79,8 @@ extern void llc_set_station_handler(void extern struct llc_sap *llc_sap_open(unsigned char lsap, int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void llc_sap_close(struct llc_sap *sap); extern struct llc_sap *llc_sap_find(unsigned char sap_value); diff --git a/include/net/p8022.h b/include/net/p8022.h --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -4,7 +4,8 @@ extern struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, c static inline int tcf_match_indev(struct sk_buff *skb, char *indev) { - if (0 != indev[0]) { - if (NULL == skb->input_dev) + if (indev[0]) { + if (!skb->input_dev) return 0; - else if (0 != strcmp(indev, skb->input_dev->name)) + if (strcmp(indev, skb->input_dev->name)) return 0; } diff --git a/include/net/psnap.h b/include/net/psnap.h --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -1,7 +1,7 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H -extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); +extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); extern void unregister_snap_client(struct datalink_proto *proto); #endif diff --git a/include/net/request_sock.h b/include/net/request_sock.h --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -89,6 +89,7 @@ struct listen_sock { int qlen_young; int clock_hand; u32 hash_rnd; + u32 nr_table_entries; struct request_sock *syn_table[0]; }; @@ -129,11 +130,13 @@ static inline struct listen_sock *reqsk_ return lopt; } -static inline void reqsk_queue_destroy(struct request_sock_queue *queue) +static inline void __reqsk_queue_destroy(struct request_sock_queue *queue) { kfree(reqsk_queue_yank_listen_sk(queue)); } +extern void reqsk_queue_destroy(struct request_sock_queue *queue); + static inline struct request_sock * reqsk_queue_yank_acceptq(struct request_sock_queue *queue) { diff --git a/include/net/route.h b/include/net/route.h --- a/include/net/route.h +++ b/include/net/route.h @@ -105,10 +105,6 @@ struct rt_cache_stat unsigned int out_hlist_search; }; -extern struct rt_cache_stat *rt_cache_stat; -#define RT_CACHE_STAT_INC(field) \ - (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) - extern struct ip_rt_acct *ip_rt_acct; struct in_device; diff --git a/include/net/x25.h b/include/net/x25.h --- a/include/net/x25.h +++ b/include/net/x25.h @@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25 /* x25_dev.c */ extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); -extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); +extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); extern void x25_establish_link(struct x25_neigh *); extern void x25_terminate_link(struct x25_neigh *); diff --git a/include/net/x25device.h b/include/net/x25device.h --- a/include/net/x25device.h +++ b/include/net/x25device.h @@ -8,7 +8,6 @@ static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->mac.raw = skb->data; - skb->input_dev = skb->dev = dev; skb->pkt_type = PACKET_HOST; return htons(ETH_P_X25); diff --git a/include/net/xfrm.h b/include/net/xfrm.h --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -818,7 +818,6 @@ extern void xfrm6_init(void); extern void xfrm6_fini(void); extern void xfrm_state_init(void); extern void xfrm4_state_init(void); -extern void xfrm4_state_fini(void); extern void xfrm6_state_init(void); extern void xfrm6_state_fini(void); diff --git a/kernel/audit.c b/kernel/audit.c --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,8 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive); + audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive, + THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -153,7 +153,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic) static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL); + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL, + THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR diff --git a/net/802/hippi.c b/net/802/hippi.c --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -51,6 +51,7 @@ static int hippi_header(struct sk_buff * unsigned len) { struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); + struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; if (!len){ len = skb->len - HIPPI_HLEN; @@ -84,9 +85,10 @@ static int hippi_header(struct sk_buff * if (daddr) { memcpy(hip->le.dest_switch_addr, daddr + 3, 3); - memcpy(&skb->private.ifield, daddr + 2, 4); + memcpy(&hcb->ifield, daddr + 2, 4); return HIPPI_HLEN; } + hcb->ifield = 0; return -((int)HIPPI_HLEN); } diff --git a/net/802/p8022.c b/net/802/p8022.c --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -35,7 +35,8 @@ static int p8022_request(struct datalink struct datalink_proto *register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)) + struct packet_type *pt, + struct net_device *orig_dev)) { struct datalink_proto *proto; diff --git a/net/802/psnap.c b/net/802/psnap.c --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -47,7 +47,7 @@ static struct datalink_proto *find_snap_ * A SNAP packet has arrived */ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { int rc = 1; struct datalink_proto *proto; @@ -61,7 +61,7 @@ static int snap_rcv(struct sk_buff *skb, /* Pass the frame on. */ skb->h.raw += 5; skb_pull(skb, 5); - rc = proto->rcvfunc(skb, dev, &snap_packet_type); + rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); } else { skb->sk = NULL; kfree_skb(skb); @@ -118,7 +118,8 @@ module_exit(snap_exit); struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *)) + struct packet_type *, + struct net_device *)) { struct datalink_proto *proto = NULL; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -51,7 +51,7 @@ struct net_device *__find_vlan_dev(struc /* found in vlan_dev.c */ int vlan_dev_rebuild_header(struct sk_buff *skb); int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype); + struct packet_type *ptype, struct net_device *orig_dev); int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -113,7 +113,7 @@ static inline struct sk_buff *vlan_check * */ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype) + struct packet_type* ptype, struct net_device *orig_dev) { unsigned char *rawp = NULL; struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data); diff --git a/net/Kconfig b/net/Kconfig --- a/net/Kconfig +++ b/net/Kconfig @@ -205,6 +205,8 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. +source "net/netfilter/Kconfig" + endmenu endmenu diff --git a/net/Makefile b/net/Makefile --- a/net/Makefile +++ b/net/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_NET) += $(tmp-y) obj-$(CONFIG_LLC) += llc/ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ obj-$(CONFIG_INET) += ipv4/ +obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ ifneq ($(CONFIG_IPV6),) diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -698,7 +698,7 @@ static void __aarp_resolved(struct aarp_ * frame. We currently only support Ethernet. */ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct elapaarp *ea = aarp_hdr(skb); int hash, ret = 0; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1390,7 +1390,7 @@ free_it: * [ie ARPHRD_ETHERTALK] */ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct ddpehdr *ddp; struct sock *sock; @@ -1482,7 +1482,7 @@ freeit: * header and append a long one. */ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { /* Expand any short form frames */ if (skb->mac.raw[2] == 1) { @@ -1528,7 +1528,7 @@ static int ltalk_rcv(struct sk_buff *skb } skb->h.raw = skb->data; - return atalk_rcv(skb, dev, pt); + return atalk_rcv(skb, dev, pt, orig_dev); freeit: kfree_skb(skb); return 0; diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c --- a/net/atm/ipcommon.c +++ b/net/atm/ipcommon.c @@ -34,7 +34,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) { - struct sk_buff *skb; unsigned long flags; struct sk_buff *skb_from = (struct sk_buff *) from; struct sk_buff *skb_to = (struct sk_buff *) to; @@ -47,8 +46,6 @@ void skb_migrate(struct sk_buff_head *fr prev->next = skb_to; to->prev->next = from->next; to->prev = from->prev; - for (skb = from->next; skb != skb_to; skb = skb->next) - skb->list = to; to->qlen += from->qlen; spin_unlock(&to->lock); from->prev = skb_from; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -132,7 +132,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct skb->dev = ax25->ax25_dev->dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, skb->dev, NULL); /* Wrong ptype */ + ip_rcv(skb, skb->dev, NULL, skb->dev); /* Wrong ptype */ return 1; } #endif @@ -258,7 +258,7 @@ static int ax25_rcv(struct sk_buff *skb, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, dev, ptype); /* Note ptype here is the wrong one, fix me later */ + ip_rcv(skb, dev, ptype, dev); /* Note ptype here is the wrong one, fix me later */ break; case AX25_P_ARP: @@ -268,7 +268,7 @@ static int ax25_rcv(struct sk_buff *skb, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_ARP); - arp_rcv(skb, dev, ptype); /* Note ptype here is wrong... */ + arp_rcv(skb, dev, ptype, dev); /* Note ptype here is wrong... */ break; #endif case AX25_P_TEXT: @@ -454,7 +454,7 @@ static int ax25_rcv(struct sk_buff *skb, * Receive an AX.25 frame via a SLIP interface. */ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -76,7 +76,7 @@ void ax25_requeue_frames(ax25_cb *ax25) if (skb_prev == NULL) skb_queue_head(&ax25->write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &ax25->write_queue); skb_prev = skb; } } diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -23,10 +23,9 @@ static int ebt_target_mark(struct sk_buf { struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; - if ((*pskb)->nfmark != info->mark) { + if ((*pskb)->nfmark != info->mark) (*pskb)->nfmark = info->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return info->target; } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -258,7 +258,7 @@ static int __init init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/dev.c b/net/core/dev.c --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1059,7 +1059,7 @@ void dev_queue_xmit_nit(struct sk_buff * skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype); + ptype->func(skb2, skb->dev, ptype, skb->dev); } } rcu_read_unlock(); @@ -1426,14 +1426,14 @@ int netif_rx_ni(struct sk_buff *skb) EXPORT_SYMBOL(netif_rx_ni); -static __inline__ void skb_bond(struct sk_buff *skb) +static inline struct net_device *skb_bond(struct sk_buff *skb) { struct net_device *dev = skb->dev; - if (dev->master) { - skb->real_dev = skb->dev; + if (dev->master) skb->dev = dev->master; - } + + return dev; } static void net_tx_action(struct softirq_action *h) @@ -1483,10 +1483,11 @@ static void net_tx_action(struct softirq } static __inline__ int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev) + struct packet_type *pt_prev, + struct net_device *orig_dev) { atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev); + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) @@ -1497,7 +1498,8 @@ struct net_bridge_fdb_entry *(*br_fdb_ge void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); static __inline__ int handle_bridge(struct sk_buff **pskb, - struct packet_type **pt_prev, int *ret) + struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { struct net_bridge_port *port; @@ -1506,14 +1508,14 @@ static __inline__ int handle_bridge(stru return 0; if (*pt_prev) { - *ret = deliver_skb(*pskb, *pt_prev); + *ret = deliver_skb(*pskb, *pt_prev, orig_dev); *pt_prev = NULL; } return br_handle_frame_hook(port, pskb); } #else -#define handle_bridge(skb, pt_prev, ret) (0) +#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) #endif #ifdef CONFIG_NET_CLS_ACT @@ -1535,17 +1537,14 @@ static int ing_filter(struct sk_buff *sk __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); if (MAX_RED_LOOP < ttl++) { printk("Redir loop detected Dropping packet (%s->%s)\n", - skb->input_dev?skb->input_dev->name:"??",skb->dev->name); + skb->input_dev->name, skb->dev->name); return TC_ACT_SHOT; } skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); - if (NULL == skb->input_dev) { - skb->input_dev = skb->dev; - printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name); - } + spin_lock(&dev->ingress_lock); if ((q = dev->qdisc_ingress) != NULL) result = q->enqueue(skb, q); @@ -1560,6 +1559,7 @@ static int ing_filter(struct sk_buff *sk int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + struct net_device *orig_dev; int ret = NET_RX_DROP; unsigned short type; @@ -1570,7 +1570,10 @@ int netif_receive_skb(struct sk_buff *sk if (!skb->stamp.tv_sec) net_timestamp(&skb->stamp); - skb_bond(skb); + if (!skb->input_dev) + skb->input_dev = skb->dev; + + orig_dev = skb_bond(skb); __get_cpu_var(netdev_rx_stat).total++; @@ -1591,14 +1594,14 @@ int netif_receive_skb(struct sk_buff *sk list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } #ifdef CONFIG_NET_CLS_ACT if (pt_prev) { - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; /* noone else should process this after*/ } else { skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -1617,7 +1620,7 @@ ncls: handle_diverter(skb); - if (handle_bridge(&skb, &pt_prev, &ret)) + if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; type = skb->protocol; @@ -1625,13 +1628,13 @@ ncls: if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev); + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining diff --git a/net/core/netfilter.c b/net/core/netfilter.c --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -22,12 +22,7 @@ #include #include #include -#include -#include -#include #include -#include -#include /* In this code, we can be waiting indefinitely for userspace to * service a packet if a hook returns NF_QUEUE. We could keep a count @@ -58,6 +53,9 @@ static struct nf_queue_handler_t { nf_queue_outfn_t outfn; void *data; } queue_handler[NPROTO]; + +static struct nf_queue_rerouter *queue_rerouter; + static DEFINE_RWLOCK(queue_handler_lock); int nf_register_hook(struct nf_hook_ops *reg) @@ -223,7 +221,8 @@ static unsigned int nf_iterate(struct li verdict = elem->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG - if (unlikely(verdict > NF_MAX_VERDICT)) { + if (unlikely((verdict & NF_VERDICT_MASK) + > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", elem->hook, hook); continue; @@ -241,6 +240,9 @@ int nf_register_queue_handler(int pf, nf { int ret; + if (pf >= NPROTO) + return -EINVAL; + write_lock_bh(&queue_handler_lock); if (queue_handler[pf].outfn) ret = -EBUSY; @@ -257,6 +259,9 @@ int nf_register_queue_handler(int pf, nf /* The caller must flush their queue before this */ int nf_unregister_queue_handler(int pf) { + if (pf >= NPROTO) + return -EINVAL; + write_lock_bh(&queue_handler_lock); queue_handler[pf].outfn = NULL; queue_handler[pf].data = NULL; @@ -265,16 +270,54 @@ int nf_unregister_queue_handler(int pf) return 0; } +int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + + return 0; +} + +int nf_unregister_queue_rerouter(int pf) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + return 0; +} + +void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) +{ + int pf; + + write_lock_bh(&queue_handler_lock); + for (pf = 0; pf < NPROTO; pf++) { + if (queue_handler[pf].outfn == outfn) { + queue_handler[pf].outfn = NULL; + queue_handler[pf].data = NULL; + } + } + write_unlock_bh(&queue_handler_lock); +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). */ -static int nf_queue(struct sk_buff *skb, +static int nf_queue(struct sk_buff **skb, struct list_head *elem, int pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sk_buff *), + unsigned int queuenum) { int status; struct nf_info *info; @@ -287,17 +330,17 @@ static int nf_queue(struct sk_buff *skb, read_lock(&queue_handler_lock); if (!queue_handler[pf].outfn) { read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } - info = kmalloc(sizeof(*info), GFP_ATOMIC); + info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC); if (!info) { if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", - skb); + *skb); read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } @@ -316,15 +359,22 @@ static int nf_queue(struct sk_buff *skb, if (outdev) dev_hold(outdev); #ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - physindev = skb->nf_bridge->physindev; + if ((*skb)->nf_bridge) { + physindev = (*skb)->nf_bridge->physindev; if (physindev) dev_hold(physindev); - physoutdev = skb->nf_bridge->physoutdev; + physoutdev = (*skb)->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } #endif + if (queue_rerouter[pf].save) + queue_rerouter[pf].save(*skb, info); + + status = queue_handler[pf].outfn(*skb, info, queuenum, + queue_handler[pf].data); + + if (status >= 0 && queue_rerouter[pf].reroute) + status = queue_rerouter[pf].reroute(skb, info); - status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); read_unlock(&queue_handler_lock); if (status < 0) { @@ -337,9 +387,11 @@ static int nf_queue(struct sk_buff *skb, #endif module_put(info->elem->owner); kfree(info); - kfree_skb(skb); + kfree_skb(*skb); + return 1; } + return 1; } @@ -368,9 +420,10 @@ next_hook: } else if (verdict == NF_DROP) { kfree_skb(*pskb); ret = -EPERM; - } else if (verdict == NF_QUEUE) { + } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; } unlock: @@ -427,14 +480,15 @@ void nf_reinject(struct sk_buff *skb, st info->okfn, INT_MIN); } - switch (verdict) { + switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: info->okfn(skb); break; case NF_QUEUE: - if (!nf_queue(skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn)) + if (!nf_queue(&skb, elem, info->pf, info->hook, + info->indev, info->outdev, info->okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; break; } @@ -447,73 +501,7 @@ void nf_reinject(struct sk_buff *skb, st return; } -#ifdef CONFIG_INET -/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb) -{ - struct iphdr *iph = (*pskb)->nh.iph; - struct rtable *rt; - struct flowi fl = {}; - struct dst_entry *odst; - unsigned int hh_len; - - /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause - * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. - */ - if (inet_addr_type(iph->saddr) == RTN_LOCAL) { - fl.nl_u.ip4_u.daddr = iph->daddr; - fl.nl_u.ip4_u.saddr = iph->saddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; -#ifdef CONFIG_IP_ROUTE_FWMARK - fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; -#endif - fl.proto = iph->protocol; - if (ip_route_output_key(&rt, &fl) != 0) - return -1; - - /* Drop old route. */ - dst_release((*pskb)->dst); - (*pskb)->dst = &rt->u.dst; - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->saddr; - if (ip_route_output_key(&rt, &fl) != 0) - return -1; - - odst = (*pskb)->dst; - if (ip_route_input(*pskb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); - return -1; - } - dst_release(&rt->u.dst); - dst_release(odst); - } - - if ((*pskb)->dst->error) - return -1; - - /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - - return 0; -} -EXPORT_SYMBOL(ip_route_me_harder); - -int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) +int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) { struct sk_buff *nskb; @@ -540,8 +528,7 @@ copy_skb: *pskb = nskb; return 1; } -EXPORT_SYMBOL(skb_ip_make_writable); -#endif /*CONFIG_INET*/ +EXPORT_SYMBOL(skb_make_writable); /* Internal logging interface, which relies on the real LOG target modules */ @@ -627,6 +614,12 @@ void __init netfilter_init(void) { int i, h; + queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), + GFP_KERNEL); + if (!queue_rerouter) + panic("netfilter: cannot allocate queue rerouter array\n"); + memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); + for (i = 0; i < NPROTO; i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); @@ -645,4 +638,7 @@ EXPORT_SYMBOL(nf_reinject); EXPORT_SYMBOL(nf_setsockopt); EXPORT_SYMBOL(nf_unregister_hook); EXPORT_SYMBOL(nf_unregister_queue_handler); +EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); +EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); +EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/core/request_sock.c b/net/core/request_sock.c --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -32,7 +32,6 @@ * Further increasing requires to change hash table size. */ int sysctl_max_syn_backlog = 256; -EXPORT_SYMBOL(sysctl_max_syn_backlog); int reqsk_queue_alloc(struct request_sock_queue *queue, const int nr_table_entries) @@ -53,6 +52,7 @@ int reqsk_queue_alloc(struct request_soc get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = queue->rskq_accept_head = NULL; + lopt->nr_table_entries = nr_table_entries; write_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; @@ -62,3 +62,28 @@ int reqsk_queue_alloc(struct request_soc } EXPORT_SYMBOL(reqsk_queue_alloc); + +void reqsk_queue_destroy(struct request_sock_queue *queue) +{ + /* make all the listen_opt local to us */ + struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); + + if (lopt->qlen != 0) { + int i; + + for (i = 0; i < lopt->nr_table_entries; i++) { + struct request_sock *req; + + while ((req = lopt->syn_table[i]) != NULL) { + lopt->syn_table[i] = req->dl_next; + lopt->qlen--; + reqsk_free(req); + } + } + } + + BUG_TRAP(lopt->qlen == 0); + kfree(lopt); +} + +EXPORT_SYMBOL(reqsk_queue_destroy); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -708,7 +708,7 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); + rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/core/skbuff.c b/net/core/skbuff.c --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -281,8 +281,6 @@ void kfree_skbmem(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - BUG_ON(skb->list != NULL); - dst_release(skb->dst); #ifdef CONFIG_XFRM secpath_put(skb->sp); @@ -302,7 +300,6 @@ void __kfree_skb(struct sk_buff *skb) skb->tc_index = 0; #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; - skb->tc_classid = 0; #endif #endif @@ -333,11 +330,9 @@ struct sk_buff *skb_clone(struct sk_buff #define C(x) n->x = skb->x n->next = n->prev = NULL; - n->list = NULL; n->sk = NULL; C(stamp); C(dev); - C(real_dev); C(h); C(nh); C(mac); @@ -361,7 +356,6 @@ struct sk_buff *skb_clone(struct sk_buff n->destructor = NULL; #ifdef CONFIG_NETFILTER C(nfmark); - C(nfcache); C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); @@ -370,9 +364,6 @@ struct sk_buff *skb_clone(struct sk_buff nf_bridge_get(skb->nf_bridge); #endif #endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -380,7 +371,6 @@ struct sk_buff *skb_clone(struct sk_buff n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); - C(tc_classid); #endif #endif @@ -404,10 +394,8 @@ static void copy_skb_header(struct sk_bu */ unsigned long offset = new->data - old->data; - new->list = NULL; new->sk = NULL; new->dev = old->dev; - new->real_dev = old->real_dev; new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); @@ -424,7 +412,6 @@ static void copy_skb_header(struct sk_bu new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; - new->nfcache = old->nfcache; new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; @@ -1344,50 +1331,43 @@ void skb_queue_tail(struct sk_buff_head __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } + /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove + * @list: list to use * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls + * Remove a packet from a list. The list locks are taken and this + * function is atomic with respect to other list locked calls * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. + * You must know what list the SKB is on. */ -void skb_unlink(struct sk_buff *skb) +void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; - - if (list) { - unsigned long flags; + unsigned long flags; - spin_lock_irqsave(&list->lock, flags); - if (skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock_irqrestore(&list->lock, flags); } - /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert + * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ - -void skb_append(struct sk_buff *old, struct sk_buff *newsk) +void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_append(old, newsk, list); + spin_unlock_irqrestore(&list->lock, flags); } @@ -1395,19 +1375,21 @@ void skb_append(struct sk_buff *old, str * skb_insert - insert a buffer * @old: buffer to insert before * @newsk: buffer to insert + * @list: list to use + * + * Place a packet before a given packet in a list. The list locks are + * taken and this function is atomic with respect to other list locked + * calls. * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls * A buffer cannot be placed on two lists at the same time. */ - -void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_insert(newsk, old->prev, old, list); + spin_unlock_irqrestore(&list->lock, flags); } #if 0 diff --git a/net/core/sock.c b/net/core/sock.c --- a/net/core/sock.c +++ b/net/core/sock.c @@ -259,7 +259,7 @@ int sock_setsockopt(struct socket *sock, if (val > sysctl_wmem_max) val = sysctl_wmem_max; - +set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; if ((val * 2) < SOCK_MIN_SNDBUF) sk->sk_sndbuf = SOCK_MIN_SNDBUF; @@ -273,6 +273,13 @@ int sock_setsockopt(struct socket *sock, sk->sk_write_space(sk); break; + case SO_SNDBUFFORCE: + if (!capable(CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } + goto set_sndbuf; + case SO_RCVBUF: /* Don't error on this BSD doesn't and if you think about it this is right. Otherwise apps have to @@ -281,7 +288,7 @@ int sock_setsockopt(struct socket *sock, if (val > sysctl_rmem_max) val = sysctl_rmem_max; - +set_rcvbuf: sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* FIXME: is this lower bound the right one? */ if ((val * 2) < SOCK_MIN_RCVBUF) @@ -290,6 +297,13 @@ int sock_setsockopt(struct socket *sock, sk->sk_rcvbuf = val * 2; break; + case SO_RCVBUFFORCE: + if (!capable(CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } + goto set_rcvbuf; + case SO_KEEPALIVE: #ifdef CONFIG_INET if (sk->sk_protocol == IPPROTO_TCP) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1763,7 +1763,7 @@ static int dn_recvmsg(struct kiocb *iocb nskb = skb->next; if (skb->len == 0) { - skb_unlink(skb); + skb_unlink(skb, queue); kfree_skb(skb); /* * N.B. Don't refer to skb or cb after this point @@ -2073,7 +2073,7 @@ static struct notifier_block dn_dev_noti .notifier_call = dn_device_event, }; -extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static struct packet_type dn_dix_packet_type = { .type = __constant_htons(ETH_P_DNA_RT), diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -479,7 +479,7 @@ int dn_nsp_check_xmit_queue(struct sock xmit_count = cb2->xmit_count; segnum = cb2->segnum; /* Remove and drop ack'ed packet */ - skb_unlink(ack); + skb_unlink(ack, q); kfree_skb(ack); ack = NULL; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -572,7 +572,7 @@ static int dn_route_ptp_hello(struct sk_ return NET_RX_SUCCESS; } -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dn_skb_cb *cb; unsigned char flags = 0; diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,7 +138,8 @@ static int __init init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk); + dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk, + THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; @@ -162,6 +163,7 @@ static void __exit fini(void) MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); MODULE_AUTHOR("Steven Whitehouse "); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG); module_init(init); module_exit(fini); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -869,7 +869,7 @@ static void aun_tx_ack(unsigned long seq foundit: tx_result(skb->sk, eb->cookie, result); - skb_unlink(skb); + skb_unlink(skb, &aun_queue); spin_unlock_irqrestore(&aun_queue_lock, flags); kfree_skb(skb); } @@ -947,7 +947,7 @@ static void ab_cleanup(unsigned long h) { tx_result(skb->sk, eb->cookie, ECTYPE_TRANSMIT_NOT_PRESENT); - skb_unlink(skb); + skb_unlink(skb, &aun_queue); kfree_skb(skb); } skb = newskb; @@ -1009,7 +1009,7 @@ release: * Receive an Econet frame from a device. */ -static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; struct sock *sk; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -163,7 +163,6 @@ __be16 eth_type_trans(struct sk_buff *sk skb->mac.raw=skb->data; skb_pull(skb,ETH_HLEN); eth = eth_hdr(skb); - skb->input_dev = dev; if(*eth->h_dest&1) { diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := utils.o route.o inetpeer.o tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -700,7 +700,7 @@ void arp_send(int type, int ptype, u32 d static void parp_redo(struct sk_buff *skb) { nf_reset(skb); - arp_rcv(skb, skb->dev, NULL); + arp_rcv(skb, skb->dev, NULL, skb->dev); } /* @@ -927,7 +927,7 @@ out: * Receive an arp request from the device layer. */ -int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct arphdr *arp; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -567,7 +567,7 @@ static void nl_fib_input(struct sock *sk static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input); + netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) @@ -662,5 +662,4 @@ void __init ip_fib_init(void) } EXPORT_SYMBOL(inet_addr_type); -EXPORT_SYMBOL(ip_dev_find); EXPORT_SYMBOL(ip_rt_ioctl); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -456,5 +456,3 @@ static void peer_check_expire(unsigned l peer_total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } - -EXPORT_SYMBOL(inet_peer_idlock); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -358,7 +358,7 @@ drop: /* * Main IP Receive routine. */ -int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct iphdr *iph; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -620,6 +620,3 @@ int ip_options_rcv_srr(struct sk_buff *s } return 0; } - -EXPORT_SYMBOL(ip_options_compile); -EXPORT_SYMBOL(ip_options_undo); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -205,7 +205,7 @@ static inline int ip_finish_output2(stru return -EINVAL; } -int ip_finish_output(struct sk_buff *skb) +static int ip_finish_output(struct sk_buff *skb) { struct net_device *dev = skb->dst->dev; @@ -392,7 +392,6 @@ static void ip_copy_metadata(struct sk_b #endif #ifdef CONFIG_NETFILTER to->nfmark = from->nfmark; - to->nfcache = from->nfcache; /* Connection association is same as pre-frag packet */ nf_conntrack_put(to->nfct); to->nfct = from->nfct; @@ -1329,12 +1328,7 @@ void __init ip_init(void) #endif } -EXPORT_SYMBOL(ip_finish_output); EXPORT_SYMBOL(ip_fragment); EXPORT_SYMBOL(ip_generic_getfrag); EXPORT_SYMBOL(ip_queue_xmit); EXPORT_SYMBOL(ip_send_check); - -#ifdef CONFIG_SYSCTL -EXPORT_SYMBOL(sysctl_ip_default_ttl); -#endif diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -393,7 +393,7 @@ static int __init ic_defaults(void) #ifdef IPCONFIG_RARP -static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); +static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type rarp_packet_type __initdata = { .type = __constant_htons(ETH_P_RARP), @@ -414,7 +414,7 @@ static inline void ic_rarp_cleanup(void) * Process received RARP packet. */ static int __init -ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct arphdr *rarp; unsigned char *rarp_ptr; @@ -555,7 +555,7 @@ struct bootp_pkt { /* BOOTP packet form #define DHCPRELEASE 7 #define DHCPINFORM 8 -static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); +static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type bootp_packet_type __initdata = { .type = __constant_htons(ETH_P_IP), @@ -823,7 +823,7 @@ static void __init ic_do_bootp_ext(u8 *e /* * Receive BOOTP reply. */ -static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct bootp_pkt *b; struct iphdr *h; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -22,6 +22,7 @@ * * Changes: * Paul `Rusty' Russell properly handle non-linear skbs + * Harald Welte don't use nfcache * */ @@ -529,7 +530,7 @@ static unsigned int ip_vs_post_routing(u const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY)) + if (!((*pskb)->ipvs_property)) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ @@ -701,7 +702,7 @@ static int ip_vs_out_icmp(struct sk_buff /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); - skb->nfcache |= NFC_IPVS_PROPERTY; + skb->ipvs_property = 1; verdict = NF_ACCEPT; out: @@ -739,7 +740,7 @@ ip_vs_out(unsigned int hooknum, struct s EnterFunction(11); - if (skb->nfcache & NFC_IPVS_PROPERTY) + if (skb->ipvs_property) return NF_ACCEPT; iph = skb->nh.iph; @@ -821,7 +822,7 @@ ip_vs_out(unsigned int hooknum, struct s ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); ip_vs_conn_put(cp); - skb->nfcache |= NFC_IPVS_PROPERTY; + skb->ipvs_property = 1; LeaveFunction(11); return NF_ACCEPT; diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -127,7 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) #define IP_VS_XMIT(skb, rt) \ do { \ - (skb)->nfcache |= NFC_IPVS_PROPERTY; \ + (skb)->ipvs_property = 1; \ (skb)->ip_summed = CHECKSUM_NONE; \ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c @@ -107,7 +107,7 @@ static int drr_dev_event(struct notifier return NOTIFY_DONE; } -struct notifier_block drr_dev_notifier = { +static struct notifier_block drr_dev_notifier = { .notifier_call = drr_dev_event, }; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c new file mode 100644 --- /dev/null +++ b/net/ipv4/netfilter.c @@ -0,0 +1,139 @@ +/* IPv4 specific functions of netfilter core */ + +#include +#ifdef CONFIG_NETFILTER + +#include +#include +#include + +#include +#include +#include +#include +#include + +/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ +int ip_route_me_harder(struct sk_buff **pskb) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct rtable *rt; + struct flowi fl = {}; + struct dst_entry *odst; + unsigned int hh_len; + + /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause + * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. + */ + if (inet_addr_type(iph->saddr) == RTN_LOCAL) { + fl.nl_u.ip4_u.daddr = iph->daddr; + fl.nl_u.ip4_u.saddr = iph->saddr; + fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; +#ifdef CONFIG_IP_ROUTE_FWMARK + fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; +#endif + fl.proto = iph->protocol; + if (ip_route_output_key(&rt, &fl) != 0) + return -1; + + /* Drop old route. */ + dst_release((*pskb)->dst); + (*pskb)->dst = &rt->u.dst; + } else { + /* non-local src, find valid iif to satisfy + * rp-filter when calling ip_route_input. */ + fl.nl_u.ip4_u.daddr = iph->saddr; + if (ip_route_output_key(&rt, &fl) != 0) + return -1; + + odst = (*pskb)->dst; + if (ip_route_input(*pskb, iph->daddr, iph->saddr, + RT_TOS(iph->tos), rt->u.dst.dev) != 0) { + dst_release(&rt->u.dst); + return -1; + } + dst_release(&rt->u.dst); + dst_release(odst); + } + + if ((*pskb)->dst->error) + return -1; + + /* Change in oif may mean change in hh_len. */ + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + + return 0; +} +EXPORT_SYMBOL(ip_route_me_harder); + +/* + * Extra routing may needed on local out, as the QUEUE target never + * returns control to the table. + */ + +struct ip_rt_info { + u_int32_t daddr; + u_int32_t saddr; + u_int8_t tos; +}; + +static void queue_save(const struct sk_buff *skb, struct nf_info *info) +{ + struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { + const struct iphdr *iph = skb->nh.iph; + + rt_info->tos = iph->tos; + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + } +} + +static int queue_reroute(struct sk_buff **pskb, const struct nf_info *info) +{ + const struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { + struct iphdr *iph = (*pskb)->nh.iph; + + if (!(iph->tos == rt_info->tos + && iph->daddr == rt_info->daddr + && iph->saddr == rt_info->saddr)) + return ip_route_me_harder(pskb); + } + return 0; +} + +static struct nf_queue_rerouter ip_reroute = { + .rer_size = sizeof(struct ip_rt_info), + .save = queue_save, + .reroute = queue_reroute, +}; + +static int init(void) +{ + return nf_register_queue_rerouter(PF_INET, &ip_reroute); +} + +static void fini(void) +{ + nf_unregister_queue_rerouter(PF_INET); +} + +module_init(init); +module_exit(fini); + +#endif /* CONFIG_NETFILTER */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK of packets, but this mark value is kept in the conntrack session instead of the individual packets. +config IP_NF_CONNTRACK_EVENTS + bool "Connection tracking events" + depends on IP_NF_CONNTRACK + help + If this option is enabled, the connection tracking code will + provide a notifier chain that can be used by other kernel code + to get notified about changes in the connection tracking state. + + IF unsure, say `N'. + config IP_NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on IP_NF_CONNTRACK && EXPERIMENTAL @@ -100,11 +110,15 @@ config IP_NF_AMANDA To compile it as a module, choose M here. If unsure, say Y. config IP_NF_QUEUE - tristate "Userspace queueing via NETLINK" + tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help Netfilter has the ability to queue packets to user space: the netlink device can be used to access them using this driver. + This option enables the old IPv4-only "ip_queue" implementation + which has been obsoleted by the new "nfnetlink_queue" code (see + CONFIG_NETFILTER_NETLINK_QUEUE). + To compile it as a module, choose M here. If unsure, say N. config IP_NF_IPTABLES @@ -692,5 +706,12 @@ config IP_NF_ARP_MANGLE Allows altering the ARP packet payload: source and destination hardware and network addresses. +config IP_NF_CONNTRACK_NETLINK + tristate 'Connection tracking netlink interface' + depends on IP_NF_CONNTRACK && NETFILTER_NETLINK + help + This option enables support for a netlink-based userspace interface + + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -9,6 +9,10 @@ iptable_nat-objs := ip_nat_standalone.o # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o +# conntrack netlink interface +obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o + + # SCTP protocol connection tracking obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o @@ -87,3 +91,4 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_m obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -37,6 +37,7 @@ #include #include #include +#include /* ip_conntrack_lock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ @@ -49,7 +50,7 @@ #include #include -#define IP_CONNTRACK_VERSION "2.1" +#define IP_CONNTRACK_VERSION "2.3" #if 0 #define DEBUGP printk @@ -76,15 +77,85 @@ unsigned int ip_ct_log_invalid; static LIST_HEAD(unconfirmed); static int ip_conntrack_vmalloc; -DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); +static unsigned int ip_conntrack_next_id = 1; +static unsigned int ip_conntrack_expect_next_id = 1; +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +struct notifier_block *ip_conntrack_chain; +struct notifier_block *ip_conntrack_expect_chain; + +DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); + +static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache) +{ + if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) + notifier_call_chain(&ip_conntrack_chain, ecache->events, + ecache->ct); + ecache->events = 0; +} +void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) +{ + __deliver_cached_events(ecache); +} + +/* Deliver all cached events for a particular conntrack. This is called + * by code prior to async packet handling or freeing the skb */ void -ip_conntrack_put(struct ip_conntrack *ct) +ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct) { - IP_NF_ASSERT(ct); - nf_conntrack_put(&ct->ct_general); + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + if (!ct) + return; + + if (ecache->ct == ct) { + DEBUGP("ecache: delivering event for %p\n", ct); + __deliver_cached_events(ecache); + } else { + if (net_ratelimit()) + printk(KERN_WARNING "ecache: want to deliver for %p, " + "but cache has %p\n", ct, ecache->ct); + } + + /* signalize that events have already been delivered */ + ecache->ct = NULL; +} + +/* Deliver cached events for old pending events, if current conntrack != old */ +void ip_conntrack_event_cache_init(const struct sk_buff *skb) +{ + struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct; + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + /* take care of delivering potentially old events */ + if (ecache->ct != ct) { + enum ip_conntrack_info ctinfo; + /* we have to check, since at startup the cache is NULL */ + if (likely(ecache->ct)) { + DEBUGP("ecache: entered for different conntrack: " + "ecache->ct=%p, skb->nfct=%p. delivering " + "events\n", ecache->ct, ct); + __deliver_cached_events(ecache); + ip_conntrack_put(ecache->ct); + } else { + DEBUGP("ecache: entered for conntrack %p, " + "cache was clean before\n", ct); + } + + /* initialize for this conntrack/packet */ + ecache->ct = ip_conntrack_get(skb, &ctinfo); + /* ecache->events cleared by __deliver_cached_devents() */ + } else { + DEBUGP("ecache: re-entered for conntrack %p.\n", ct); + } } +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + +DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); + static int ip_conntrack_hash_rnd_initted; static unsigned int ip_conntrack_hash_rnd; @@ -146,6 +217,12 @@ static void unlink_expect(struct ip_conn exp->master->expecting--; } +void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp) +{ + unlink_expect(exp); + ip_conntrack_expect_put(exp); +} + static void expectation_timed_out(unsigned long ul_expect) { struct ip_conntrack_expect *exp = (void *)ul_expect; @@ -156,6 +233,33 @@ static void expectation_timed_out(unsign ip_conntrack_expect_put(exp); } +struct ip_conntrack_expect * +__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_expect *i; + + list_for_each_entry(i, &ip_conntrack_expect_list, list) { + if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + atomic_inc(&i->use); + return i; + } + } + return NULL; +} + +/* Just find a expectation corresponding to a tuple. */ +struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_expect *i; + + read_lock_bh(&ip_conntrack_lock); + i = __ip_conntrack_expect_find(tuple); + read_unlock_bh(&ip_conntrack_lock); + + return i; +} + /* If an expectation for this connection is found, it gets delete from * global list then returned. */ static struct ip_conntrack_expect * @@ -180,7 +284,7 @@ find_expectation(const struct ip_conntra } /* delete all expectations for this conntrack */ -static void remove_expectations(struct ip_conntrack *ct) +void ip_ct_remove_expectations(struct ip_conntrack *ct) { struct ip_conntrack_expect *i, *tmp; @@ -210,7 +314,7 @@ clean_from_lists(struct ip_conntrack *ct LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); /* Destroy all pending expectations */ - remove_expectations(ct); + ip_ct_remove_expectations(ct); } static void @@ -223,10 +327,12 @@ destroy_conntrack(struct nf_conntrack *n IP_NF_ASSERT(atomic_read(&nfct->use) == 0); IP_NF_ASSERT(!timer_pending(&ct->timeout)); + set_bit(IPS_DYING_BIT, &ct->status); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ - proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); if (proto && proto->destroy) proto->destroy(ct); @@ -238,7 +344,7 @@ destroy_conntrack(struct nf_conntrack *n * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, * too. */ - remove_expectations(ct); + ip_ct_remove_expectations(ct); /* We overload first tuple to link into unconfirmed list. */ if (!is_confirmed(ct)) { @@ -253,14 +359,14 @@ destroy_conntrack(struct nf_conntrack *n ip_conntrack_put(ct->master); DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); - kmem_cache_free(ip_conntrack_cachep, ct); - atomic_dec(&ip_conntrack_count); + ip_conntrack_free(ct); } static void death_by_timeout(unsigned long ul_conntrack) { struct ip_conntrack *ct = (void *)ul_conntrack; + ip_conntrack_event(IPCT_DESTROY, ct); write_lock_bh(&ip_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ @@ -280,7 +386,7 @@ conntrack_tuple_cmp(const struct ip_conn && ip_ct_tuple_equal(tuple, &i->tuple); } -static struct ip_conntrack_tuple_hash * +struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) { @@ -315,6 +421,29 @@ ip_conntrack_find_get(const struct ip_co return h; } +static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, + unsigned int hash, + unsigned int repl_hash) +{ + ct->id = ++ip_conntrack_next_id; + list_prepend(&ip_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_prepend(&ip_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY].list); +} + +void ip_conntrack_hash_insert(struct ip_conntrack *ct) +{ + unsigned int hash, repl_hash; + + hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + write_lock_bh(&ip_conntrack_lock); + __ip_conntrack_hash_insert(ct, hash, repl_hash); + write_unlock_bh(&ip_conntrack_lock); +} + /* Confirm a connection given skb; places it in hash table */ int __ip_conntrack_confirm(struct sk_buff **pskb) @@ -361,10 +490,7 @@ __ip_conntrack_confirm(struct sk_buff ** /* Remove from unconfirmed list */ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY]); + __ip_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -374,6 +500,16 @@ __ip_conntrack_confirm(struct sk_buff ** set_bit(IPS_CONFIRMED_BIT, &ct->status); CONNTRACK_STAT_INC(insert); write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); +#ifdef CONFIG_IP_NF_NAT_NEEDED + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); +#endif + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); + return NF_ACCEPT; } @@ -438,34 +574,84 @@ static inline int helper_cmp(const struc return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); } -static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) +static struct ip_conntrack_helper * +__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { return LIST_FIND(&helpers, helper_cmp, struct ip_conntrack_helper *, tuple); } -/* Allocate a new conntrack: we return -ENOMEM if classification - failed due to stress. Otherwise it really is unclassifiable. */ -static struct ip_conntrack_tuple_hash * -init_conntrack(const struct ip_conntrack_tuple *tuple, - struct ip_conntrack_protocol *protocol, - struct sk_buff *skb) +struct ip_conntrack_helper * +ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_helper *helper; + + /* need ip_conntrack_lock to assure that helper exists until + * try_module_get() is called */ + read_lock_bh(&ip_conntrack_lock); + + helper = __ip_conntrack_helper_find(tuple); + if (helper) { + /* need to increase module usage count to assure helper will + * not go away while the caller is e.g. busy putting a + * conntrack in the hash that uses the helper */ + if (!try_module_get(helper->me)) + helper = NULL; + } + + read_unlock_bh(&ip_conntrack_lock); + + return helper; +} + +void ip_conntrack_helper_put(struct ip_conntrack_helper *helper) +{ + module_put(helper->me); +} + +struct ip_conntrack_protocol * +__ip_conntrack_proto_find(u_int8_t protocol) +{ + return ip_ct_protos[protocol]; +} + +/* this is guaranteed to always return a valid protocol helper, since + * it falls back to generic_protocol */ +struct ip_conntrack_protocol * +ip_conntrack_proto_find_get(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + preempt_disable(); + p = __ip_conntrack_proto_find(protocol); + if (p) { + if (!try_module_get(p->me)) + p = &ip_conntrack_generic_protocol; + } + preempt_enable(); + + return p; +} + +void ip_conntrack_proto_put(struct ip_conntrack_protocol *p) +{ + module_put(p->me); +} + +struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, + struct ip_conntrack_tuple *repl) { struct ip_conntrack *conntrack; - struct ip_conntrack_tuple repl_tuple; - size_t hash; - struct ip_conntrack_expect *exp; if (!ip_conntrack_hash_rnd_initted) { get_random_bytes(&ip_conntrack_hash_rnd, 4); ip_conntrack_hash_rnd_initted = 1; } - hash = hash_conntrack(tuple); - if (ip_conntrack_max && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { if (net_ratelimit()) @@ -476,31 +662,58 @@ init_conntrack(const struct ip_conntrack } } - if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { - DEBUGP("Can't invert tuple.\n"); - return NULL; - } - conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); - return ERR_PTR(-ENOMEM); + return NULL; } memset(conntrack, 0, sizeof(*conntrack)); atomic_set(&conntrack->ct_general.use, 1); conntrack->ct_general.destroy = destroy_conntrack; - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; - conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; - if (!protocol->new(conntrack, skb)) { - kmem_cache_free(ip_conntrack_cachep, conntrack); - return NULL; - } + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ init_timer(&conntrack->timeout); conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; + atomic_inc(&ip_conntrack_count); + + return conntrack; +} + +void +ip_conntrack_free(struct ip_conntrack *conntrack) +{ + atomic_dec(&ip_conntrack_count); + kmem_cache_free(ip_conntrack_cachep, conntrack); +} + +/* Allocate a new conntrack: we return -ENOMEM if classification + * failed due to stress. Otherwise it really is unclassifiable */ +static struct ip_conntrack_tuple_hash * +init_conntrack(struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol, + struct sk_buff *skb) +{ + struct ip_conntrack *conntrack; + struct ip_conntrack_tuple repl_tuple; + struct ip_conntrack_expect *exp; + + if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return NULL; + } + + if (!(conntrack = ip_conntrack_alloc(tuple, &repl_tuple))) + return NULL; + + if (!protocol->new(conntrack, skb)) { + ip_conntrack_free(conntrack); + return NULL; + } + write_lock_bh(&ip_conntrack_lock); exp = find_expectation(tuple); @@ -516,7 +729,7 @@ init_conntrack(const struct ip_conntrack nf_conntrack_get(&conntrack->master->ct_general); CONNTRACK_STAT_INC(expect_new); } else { - conntrack->helper = ip_ct_find_helper(&repl_tuple); + conntrack->helper = __ip_conntrack_helper_find(&repl_tuple); CONNTRACK_STAT_INC(new); } @@ -524,7 +737,6 @@ init_conntrack(const struct ip_conntrack /* Overload tuple linked list to put us in unconfirmed list. */ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); - atomic_inc(&ip_conntrack_count); write_unlock_bh(&ip_conntrack_lock); if (exp) { @@ -602,7 +814,7 @@ unsigned int ip_conntrack_in(unsigned in struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; struct ip_conntrack_protocol *proto; - int set_reply; + int set_reply = 0; int ret; /* Previously seen (loopback or untracked)? Ignore. */ @@ -620,9 +832,6 @@ unsigned int ip_conntrack_in(unsigned in return NF_DROP; } - /* FIXME: Do this right please. --RR */ - (*pskb)->nfcache |= NFC_UNKNOWN; - /* Doesn't cover locally-generated broadcast, so not worth it. */ #if 0 /* Ignore broadcast: no `connection'. */ @@ -638,7 +847,7 @@ unsigned int ip_conntrack_in(unsigned in } #endif - proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); + proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter @@ -664,6 +873,8 @@ unsigned int ip_conntrack_in(unsigned in IP_NF_ASSERT((*pskb)->nfct); + ip_conntrack_event_cache_init(*pskb); + ret = proto->packet(ct, *pskb, ctinfo); if (ret < 0) { /* Invalid: inverse of the return code tells @@ -674,8 +885,8 @@ unsigned int ip_conntrack_in(unsigned in return -ret; } - if (set_reply) - set_bit(IPS_SEEN_REPLY_BIT, &ct->status); + if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_STATUS, *pskb); return ret; } @@ -684,7 +895,7 @@ int invert_tuplepr(struct ip_conntrack_t const struct ip_conntrack_tuple *orig) { return ip_ct_invert_tuple(inverse, orig, - ip_ct_find_proto(orig->dst.protonum)); + __ip_conntrack_proto_find(orig->dst.protonum)); } /* Would two expected things clash? */ @@ -764,6 +975,8 @@ static void ip_conntrack_expect_insert(s exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; add_timer(&exp->timeout); + exp->id = ++ip_conntrack_expect_next_id; + atomic_inc(&exp->use); CONNTRACK_STAT_INC(expect_create); } @@ -822,6 +1035,7 @@ int ip_conntrack_expect_related(struct i evict_oldest_expect(expect->master); ip_conntrack_expect_insert(expect); + ip_conntrack_expect_event(IPEXP_NEW, expect); ret = 0; out: write_unlock_bh(&ip_conntrack_lock); @@ -842,7 +1056,7 @@ void ip_conntrack_alter_reply(struct ip_ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master && conntrack->expecting == 0) - conntrack->helper = ip_ct_find_helper(newreply); + conntrack->helper = __ip_conntrack_helper_find(newreply); write_unlock_bh(&ip_conntrack_lock); } @@ -856,11 +1070,26 @@ int ip_conntrack_helper_register(struct return 0; } +struct ip_conntrack_helper * +__ip_conntrack_helper_find_byname(const char *name) +{ + struct ip_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (!strcmp(h->name, name)) + return h; + } + + return NULL; +} + static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me) { - if (tuplehash_to_ctrack(i)->helper == me) + if (tuplehash_to_ctrack(i)->helper == me) { + ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; + } return 0; } @@ -922,12 +1151,46 @@ void ip_ct_refresh_acct(struct ip_conntr if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); + ip_conntrack_event_cache(IPCT_REFRESH, skb); } ct_add_counters(ct, ctinfo, skb); write_unlock_bh(&ip_conntrack_lock); } } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be + * in ip_conntrack_core, since we don't want the protocols to autoload + * or depend on ctnetlink */ +int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + &tuple->src.u.tcp.port); + NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + &tuple->dst.u.tcp.port); + return 0; + +nfattr_failure: + return -1; +} + +int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *t) +{ + if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) + return -EINVAL; + + t->src.u.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + t->dst.u.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + + return 0; +} +#endif + /* Returns new sk_buff, or NULL */ struct sk_buff * ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) @@ -938,10 +1201,8 @@ ip_ct_gather_frags(struct sk_buff *skb, skb = ip_defrag(skb, user); local_bh_enable(); - if (skb) { + if (skb) ip_send_check(skb->nh.iph); - skb->nfcache |= NFC_ALTERED; - } return skb; } @@ -1012,6 +1273,23 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack_put(ct); } + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + { + /* we need to deliver all cached events in order to drop + * the reference counts */ + int cpu; + for_each_cpu(cpu) { + struct ip_conntrack_ecache *ecache = + &per_cpu(ip_conntrack_ecache, cpu); + if (ecache->ct) { + __ip_ct_deliver_cached_events(ecache); + ip_conntrack_put(ecache->ct); + ecache->ct = NULL; + } + } + } +#endif } /* Fast function for those who don't want to parse /proc (and I don't @@ -1091,16 +1369,13 @@ static void free_conntrack_hash(void) * ip_conntrack_htable_size)); } -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void ip_conntrack_cleanup(void) +void ip_conntrack_flush() { - ip_ct_attach = NULL; /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module delete... */ synchronize_net(); - + i_see_dead_people: ip_ct_iterate_cleanup(kill_all, NULL); if (atomic_read(&ip_conntrack_count) != 0) { @@ -1110,7 +1385,14 @@ void ip_conntrack_cleanup(void) /* wait until all references to ip_conntrack_untracked are dropped */ while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) schedule(); +} +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void ip_conntrack_cleanup(void) +{ + ip_ct_attach = NULL; + ip_conntrack_flush(); kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); free_conntrack_hash(); diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -262,7 +262,8 @@ static int find_nl_seq(u32 seq, const st } /* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir) +static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, + struct sk_buff *skb) { unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; @@ -276,10 +277,13 @@ static void update_nl_seq(u32 nl_seq, st oldest = i; } - if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) + if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - else if (oldest != NUM_SEQ_TO_REMEMBER) + ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + } else if (oldest != NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][oldest] = nl_seq; + ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + } } static int help(struct sk_buff **pskb, @@ -439,7 +443,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info,dir); + update_nl_seq(seq, ct_ftp_info,dir, *pskb); out: spin_unlock_bh(&ip_ftp_lock); return ret; diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c new file mode 100644 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -0,0 +1,1588 @@ +/* Connection tracking via netlink socket. Allows for user space + * protocol helpers and general trouble making from userspace. + * + * (C) 2001 by Jay Schulist + * (C) 2002-2005 by Harald Welte + * (C) 2003 by Patrick Mchardy + * (C) 2005 by Pablo Neira Ayuso + * + * I've reworked this stuff to use attributes instead of conntrack + * structures. 5.44 am. I need more tea. --pablo 05/07/11. + * + * Initial connection tracking via netlink development funded and + * generally made possible by Network Robots, Inc. (www.networkrobots.com) + * + * Further development of this code funded by Astaro AG (http://www.astaro.com) + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); + +static char __initdata version[] = "0.90"; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + + +static inline int +ctnetlink_dump_tuples_proto(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_protocol *proto; + + NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); + + proto = ip_conntrack_proto_find_get(tuple->dst.protonum); + if (proto && proto->tuple_to_nfattr) + return proto->tuple_to_nfattr(skb, tuple); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_tuples(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + struct nfattr *nest_parms; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); + NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); + NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip); + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); + ctnetlink_dump_tuples_proto(skb, tuple); + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t status = htonl((u_int32_t) ct->status); + NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + long timeout_l = ct->timeout.expires - jiffies; + u_int32_t timeout; + + if (timeout_l < 0) + timeout = 0; + else + timeout = htonl(timeout_l / HZ); + + NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + + struct nfattr *nest_proto; + int ret; + + if (!proto || !proto->to_nfattr) + return 0; + + nest_proto = NFA_NEST(skb, CTA_PROTOINFO); + + ret = proto->to_nfattr(skb, nest_proto, ct); + + ip_conntrack_proto_put(proto); + + NFA_NEST_END(skb, nest_proto); + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + struct nfattr *nest_helper; + + if (!ct->helper) + return 0; + + nest_helper = NFA_NEST(skb, CTA_HELP); + NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name); + + if (ct->helper->to_nfattr) + ct->helper->to_nfattr(skb, ct); + + NFA_NEST_END(skb, nest_helper); + + return 0; + +nfattr_failure: + return -1; +} + +#ifdef CONFIG_IP_NF_CT_ACCT +static inline int +ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, + enum ip_conntrack_dir dir) +{ + enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + struct nfattr *nest_count = NFA_NEST(skb, type); + u_int64_t tmp; + + tmp = cpu_to_be64(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); + + tmp = cpu_to_be64(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); + + NFA_NEST_END(skb, nest_count); + + return 0; + +nfattr_failure: + return -1; +} +#else +#define ctnetlink_dump_counters(a, b, c) (0) +#endif + +#ifdef CONFIG_IP_NF_CONNTRACK_MARK +static inline int +ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t mark = htonl(ct->mark); + + NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + return 0; + +nfattr_failure: + return -1; +} +#else +#define ctnetlink_dump_mark(a, b) (0) +#endif + +static inline int +ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t id = htonl(ct->id); + NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + unsigned int use = htonl(atomic_read(&ct->ct_general.use)); + + NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + return 0; + +nfattr_failure: + return -1; +} + +#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) + +static int +ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + int event, int nowait, + const struct ip_conntrack *ct) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nfattr *nest_parms; + unsigned char *b; + + b = skb->tail; + + event |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + if (ctnetlink_dump_status(skb, ct) < 0 || + ctnetlink_dump_timeout(skb, ct) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_protoinfo(skb, ct) < 0 || + ctnetlink_dump_helpinfo(skb, ct) < 0 || + ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_id(skb, ct) < 0 || + ctnetlink_dump_use(skb, ct) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +nfattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static int ctnetlink_conntrack_event(struct notifier_block *this, + unsigned long events, void *ptr) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nfattr *nest_parms; + struct ip_conntrack *ct = (struct ip_conntrack *)ptr; + struct sk_buff *skb; + unsigned int type; + unsigned char *b; + unsigned int flags = 0, groups; + + /* ignore our fake conntrack entry */ + if (ct == &ip_conntrack_untracked) + return NOTIFY_DONE; + + if (events & IPCT_DESTROY) { + type = IPCTNL_MSG_CT_DELETE; + groups = NF_NETLINK_CONNTRACK_DESTROY; + goto alloc_skb; + } + if (events & (IPCT_NEW | IPCT_RELATED)) { + type = IPCTNL_MSG_CT_NEW; + flags = NLM_F_CREATE|NLM_F_EXCL; + /* dump everything */ + events = ~0UL; + groups = NF_NETLINK_CONNTRACK_NEW; + goto alloc_skb; + } + if (events & (IPCT_STATUS | + IPCT_PROTOINFO | + IPCT_HELPER | + IPCT_HELPINFO | + IPCT_NATINFO)) { + type = IPCTNL_MSG_CT_NEW; + groups = NF_NETLINK_CONNTRACK_UPDATE; + goto alloc_skb; + } + + return NOTIFY_DONE; + +alloc_skb: + /* FIXME: Check if there are any listeners before, don't hurt performance */ + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NOTIFY_DONE; + + b = skb->tail; + + type |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = flags; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + /* NAT stuff is now a status flag */ + if ((events & IPCT_STATUS || events & IPCT_NATINFO) + && ctnetlink_dump_status(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_REFRESH + && ctnetlink_dump_timeout(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_PROTOINFO + && ctnetlink_dump_protoinfo(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_HELPINFO + && ctnetlink_dump_helpinfo(skb, ct) < 0) + goto nfattr_failure; + + if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + nfnetlink_send(skb, 0, groups, 0); + return NOTIFY_DONE; + +nlmsg_failure: +nfattr_failure: + kfree_skb(skb); + return NOTIFY_DONE; +} +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + +static int ctnetlink_done(struct netlink_callback *cb) +{ + DEBUGP("entered %s\n", __FUNCTION__); + return 0; +} + +static int +ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack *ct = NULL; + struct ip_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, + cb->args[0], *id); + + read_lock_bh(&ip_conntrack_lock); + for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { + h = (struct ip_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + } + } +out: + read_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} + +#ifdef CONFIG_IP_NF_CT_ACCT +static int +ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack *ct = NULL; + struct ip_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, + cb->args[0], *id); + + write_lock_bh(&ip_conntrack_lock); + for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { + h = (struct ip_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + + memset(&ct->counters, 0, sizeof(ct->counters)); + } + } +out: + write_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} +#endif + +static const int cta_min_ip[CTA_IP_MAX] = { + [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), + [CTA_IP_V4_DST-1] = sizeof(u_int32_t), +}; + +static inline int +ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) +{ + struct nfattr *tb[CTA_IP_MAX]; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_IP_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) + return -EINVAL; + + if (!tb[CTA_IP_V4_SRC-1]) + return -EINVAL; + tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); + + if (!tb[CTA_IP_V4_DST-1]) + return -EINVAL; + tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + + DEBUGP("leaving\n"); + + return 0; + +nfattr_failure: + return -1; +} + +static const int cta_min_proto[CTA_PROTO_MAX] = { + [CTA_PROTO_NUM-1] = sizeof(u_int16_t), + [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), + [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), + [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t), +}; + +static inline int +ctnetlink_parse_tuple_proto(struct nfattr *attr, + struct ip_conntrack_tuple *tuple) +{ + struct nfattr *tb[CTA_PROTO_MAX]; + struct ip_conntrack_protocol *proto; + int ret = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_PROTO_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + return -EINVAL; + + if (!tb[CTA_PROTO_NUM-1]) + return -EINVAL; + tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); + + proto = ip_conntrack_proto_find_get(tuple->dst.protonum); + + if (likely(proto && proto->nfattr_to_tuple)) { + ret = proto->nfattr_to_tuple(tb, tuple); + ip_conntrack_proto_put(proto); + } + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, + enum ctattr_tuple type) +{ + struct nfattr *tb[CTA_TUPLE_MAX]; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_TUPLE_MAX * sizeof(tb)); + memset(tuple, 0, sizeof(*tuple)); + + if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0) + goto nfattr_failure; + + if (!tb[CTA_TUPLE_IP-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); + if (err < 0) + return err; + + if (!tb[CTA_TUPLE_PROTO-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); + if (err < 0) + return err; + + /* orig and expect tuples get DIR_ORIGINAL */ + if (type == CTA_TUPLE_REPLY) + tuple->dst.dir = IP_CT_DIR_REPLY; + else + tuple->dst.dir = IP_CT_DIR_ORIGINAL; + + DUMP_TUPLE(tuple); + + DEBUGP("leaving\n"); + + return 0; + +nfattr_failure: + return -1; +} + +#ifdef CONFIG_IP_NF_NAT_NEEDED +static const int cta_min_protonat[CTA_PROTONAT_MAX] = { + [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), + [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), +}; + +static int ctnetlink_parse_nat_proto(struct nfattr *attr, + const struct ip_conntrack *ct, + struct ip_nat_range *range) +{ + struct nfattr *tb[CTA_PROTONAT_MAX]; + struct ip_nat_protocol *npt; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_PROTONAT_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) + goto nfattr_failure; + + npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + if (!npt) + return 0; + + if (!npt->nfattr_to_range) { + ip_nat_proto_put(npt); + return 0; + } + + /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ + if (npt->nfattr_to_range(tb, range) > 0) + range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + + ip_nat_proto_put(npt); + + DEBUGP("leaving\n"); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_parse_nat(struct nfattr *cda[], + const struct ip_conntrack *ct, struct ip_nat_range *range) +{ + struct nfattr *tb[CTA_NAT_MAX]; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_NAT_MAX * sizeof(tb)); + memset(range, 0, sizeof(*range)); + + if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0) + goto nfattr_failure; + + if (tb[CTA_NAT_MINIP-1]) + range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + + if (!tb[CTA_NAT_MAXIP-1]) + range->max_ip = range->min_ip; + else + range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + + if (range->min_ip) + range->flags |= IP_NAT_RANGE_MAP_IPS; + + if (!tb[CTA_NAT_PROTO-1]) + return 0; + + err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); + if (err < 0) + return err; + + DEBUGP("leaving\n"); + return 0; + +nfattr_failure: + return -1; +} +#endif + +static inline int +ctnetlink_parse_help(struct nfattr *attr, char **helper_name) +{ + struct nfattr *tb[CTA_HELP_MAX]; + + DEBUGP("entered %s\n", __FUNCTION__); + memset(tb, 0, CTA_HELP_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0) + goto nfattr_failure; + + if (!tb[CTA_HELP_NAME-1]) + return -EINVAL; + + *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); + + return 0; + +nfattr_failure: + return -1; +} + +static int +ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple; + struct ip_conntrack *ct; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else { + /* Flush the whole table */ + ip_conntrack_flush(); + return 0; + } + + if (err < 0) + return err; + + h = ip_conntrack_find_get(&tuple, NULL); + if (!h) { + DEBUGP("tuple not found in conntrack hash\n"); + return -ENOENT; + } + + ct = tuplehash_to_ctrack(h); + + if (cda[CTA_ID-1]) { + u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); + if (ct->id != id) { + ip_conntrack_put(ct); + return -ENOENT; + } + } + if (del_timer(&ct->timeout)) { + ip_conntrack_put(ct); + ct->timeout.function((unsigned long)ct); + return 0; + } + ip_conntrack_put(ct); + DEBUGP("leaving\n"); + + return 0; +} + +static int +ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple; + struct ip_conntrack *ct; + struct sk_buff *skb2 = NULL; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct nfgenmsg *msg = NLMSG_DATA(nlh); + u32 rlen; + + if (msg->nfgen_family != AF_INET) + return -EAFNOSUPPORT; + + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) { +#ifdef CONFIG_IP_NF_CT_ACCT + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table_w, + ctnetlink_done)) != 0) + return -EINVAL; +#else + return -ENOTSUPP; +#endif + } else { + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) + return -EINVAL; + } + + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return 0; + } + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + h = ip_conntrack_find_get(&tuple, NULL); + if (!h) { + DEBUGP("tuple not found in conntrack hash"); + return -ENOENT; + } + DEBUGP("tuple found\n"); + ct = tuplehash_to_ctrack(h); + + err = -ENOMEM; + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb2) { + ip_conntrack_put(ct); + return -ENOMEM; + } + NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; + + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, 1, ct); + ip_conntrack_put(ct); + if (err <= 0) + goto out; + + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto out; + + DEBUGP("leaving\n"); + return 0; + +out: + if (skb2) + kfree_skb(skb2); + return -1; +} + +static inline int +ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]); + d = ct->status ^ status; + + if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) + /* unchangeable */ + return -EINVAL; + + if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) + /* SEEN_REPLY bit can only be set */ + return -EINVAL; + + + if (d & IPS_ASSURED && !(status & IPS_ASSURED)) + /* ASSURED bit can only be set */ + return -EINVAL; + + if (cda[CTA_NAT-1]) { +#ifndef CONFIG_IP_NF_NAT_NEEDED + return -EINVAL; +#else + unsigned int hooknum; + struct ip_nat_range range; + + if (ctnetlink_parse_nat(cda, ct, &range) < 0) + return -EINVAL; + + DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", + NIPQUAD(range.min_ip), NIPQUAD(range.max_ip), + htons(range.min.all), htons(range.max.all)); + + /* This is tricky but it works. ip_nat_setup_info needs the + * hook number as parameter, so let's do the correct + * conversion and run away */ + if (status & IPS_SRC_NAT_DONE) + hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */ + else if (status & IPS_DST_NAT_DONE) + hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */ + else + return -EINVAL; /* Missing NAT flags */ + + DEBUGP("NAT status: %lu\n", + status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); + + if (ip_nat_initialized(ct, hooknum)) + return -EEXIST; + ip_nat_setup_info(ct, &range, hooknum); + + DEBUGP("NAT status after setup_info: %lu\n", + ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); +#endif + } + + /* Be careful here, modifying NAT bits can screw up things, + * so don't let users modify them directly if they don't pass + * ip_nat_range. */ + ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); + return 0; +} + + +static inline int +ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + struct ip_conntrack_helper *helper; + char *helpname; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + /* don't change helper of sibling connections */ + if (ct->master) + return -EINVAL; + + err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); + if (err < 0) + return err; + + helper = __ip_conntrack_helper_find_byname(helpname); + if (!helper) { + if (!strcmp(helpname, "")) + helper = NULL; + else + return -EINVAL; + } + + if (ct->helper) { + if (!helper) { + /* we had a helper before ... */ + ip_ct_remove_expectations(ct); + ct->helper = NULL; + } else { + /* need to zero data of old helper */ + memset(&ct->help, 0, sizeof(ct->help)); + } + } + + ct->helper = helper; + + return 0; +} + +static inline int +ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + + if (!del_timer(&ct->timeout)) + return -ETIME; + + ct->timeout.expires = jiffies + timeout * HZ; + add_timer(&ct->timeout); + + return 0; +} + +static int +ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_HELP-1]) { + err = ctnetlink_change_helper(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_TIMEOUT-1]) { + err = ctnetlink_change_timeout(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_STATUS-1]) { + err = ctnetlink_change_status(ct, cda); + if (err < 0) + return err; + } + + DEBUGP("all done\n"); + return 0; +} + +static int +ctnetlink_create_conntrack(struct nfattr *cda[], + struct ip_conntrack_tuple *otuple, + struct ip_conntrack_tuple *rtuple) +{ + struct ip_conntrack *ct; + int err = -EINVAL; + + DEBUGP("entered %s\n", __FUNCTION__); + + ct = ip_conntrack_alloc(otuple, rtuple); + if (ct == NULL || IS_ERR(ct)) + return -ENOMEM; + + if (!cda[CTA_TIMEOUT-1]) + goto err; + ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + + ct->timeout.expires = jiffies + ct->timeout.expires * HZ; + ct->status |= IPS_CONFIRMED; + + err = ctnetlink_change_status(ct, cda); + if (err < 0) + goto err; + + ct->helper = ip_conntrack_helper_find_get(rtuple); + + add_timer(&ct->timeout); + ip_conntrack_hash_insert(ct); + + if (ct->helper) + ip_conntrack_helper_put(ct->helper); + + DEBUGP("conntrack with id %u inserted\n", ct->id); + return 0; + +err: + ip_conntrack_free(ct); + return err; +} + +static int +ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple otuple, rtuple; + struct ip_conntrack_tuple_hash *h = NULL; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_TUPLE_ORIG-1]) { + err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG); + if (err < 0) + return err; + } + + if (cda[CTA_TUPLE_REPLY-1]) { + err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY); + if (err < 0) + return err; + } + + write_lock_bh(&ip_conntrack_lock); + if (cda[CTA_TUPLE_ORIG-1]) + h = __ip_conntrack_find(&otuple, NULL); + else if (cda[CTA_TUPLE_REPLY-1]) + h = __ip_conntrack_find(&rtuple, NULL); + + if (h == NULL) { + write_unlock_bh(&ip_conntrack_lock); + DEBUGP("no such conntrack, create new\n"); + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) + err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); + goto out_unlock; + } else { + /* we only allow nat config for new conntracks */ + if (cda[CTA_NAT-1]) { + err = -EINVAL; + goto out_unlock; + } + } + + /* We manipulate the conntrack inside the global conntrack table lock, + * so there's no need to increase the refcount */ + DEBUGP("conntrack found\n"); + err = -EEXIST; + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda); + +out_unlock: + write_unlock_bh(&ip_conntrack_lock); + return err; +} + +/*********************************************************************** + * EXPECT + ***********************************************************************/ + +static inline int +ctnetlink_exp_dump_tuple(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple, + enum ctattr_expect type) +{ + struct nfattr *nest_parms = NFA_NEST(skb, type); + + if (ctnetlink_dump_tuples(skb, tuple) < 0) + goto nfattr_failure; + + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_exp_dump_expect(struct sk_buff *skb, + const struct ip_conntrack_expect *exp) +{ + u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); + u_int32_t id = htonl(exp->id); + struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT); + + if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) + goto nfattr_failure; + if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0) + goto nfattr_failure; + + NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); + NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static int +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + int event, + int nowait, + const struct ip_conntrack_expect *exp) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned char *b; + + b = skb->tail; + + event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (ctnetlink_exp_dump_expect(skb, exp) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +nfattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static int ctnetlink_expect_event(struct notifier_block *this, + unsigned long events, void *ptr) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr; + struct sk_buff *skb; + unsigned int type; + unsigned char *b; + int flags = 0; + u16 proto; + + if (events & IPEXP_NEW) { + type = IPCTNL_MSG_EXP_NEW; + flags = NLM_F_CREATE|NLM_F_EXCL; + } else + return NOTIFY_DONE; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NOTIFY_DONE; + + b = skb->tail; + + type |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = flags; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (ctnetlink_exp_dump_expect(skb, exp) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + proto = exp->tuple.dst.protonum; + nfnetlink_send(skb, 0, NF_NETLINK_CONNTRACK_EXP_NEW, 0); + return NOTIFY_DONE; + +nlmsg_failure: +nfattr_failure: + kfree_skb(skb); + return NOTIFY_DONE; +} +#endif + +static int +ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack_expect *exp = NULL; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[0]; + + DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id); + + read_lock_bh(&ip_conntrack_lock); + list_for_each(i, &ip_conntrack_expect_list) { + exp = (struct ip_conntrack_expect *) i; + if (exp->id <= *id) + continue; + if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + 1, exp) < 0) + goto out; + *id = exp->id; + } +out: + read_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last id=%llu\n", *id); + + return skb->len; +} + +static int +ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_expect *exp; + struct sk_buff *skb2; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct nfgenmsg *msg = NLMSG_DATA(nlh); + u32 rlen; + + if (msg->nfgen_family != AF_INET) + return -EAFNOSUPPORT; + + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_exp_dump_table, + ctnetlink_done)) != 0) + return -EINVAL; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return 0; + } + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + exp = ip_conntrack_expect_find_get(&tuple); + if (!exp) + return -ENOENT; + + err = -ENOMEM; + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + goto out; + NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; + + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, + 1, exp); + if (err <= 0) + goto out; + + ip_conntrack_expect_put(exp); + + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto free; + + return err; + +out: + ip_conntrack_expect_put(exp); +free: + if (skb2) + kfree_skb(skb2); + return err; +} + +static int +ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_expect *exp, *tmp; + struct ip_conntrack_tuple tuple; + struct ip_conntrack_helper *h; + int err; + + /* delete by tuple needs either orig or reply tuple */ + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else if (cda[CTA_HELP_NAME-1]) { + char *name = NFA_DATA(cda[CTA_HELP_NAME-1]); + + /* delete all expectations for this helper */ + write_lock_bh(&ip_conntrack_lock); + h = __ip_conntrack_helper_find_byname(name); + if (!h) { + write_unlock_bh(&ip_conntrack_lock); + return -EINVAL; + } + list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, + list) { + if (exp->master->helper == h + && del_timer(&exp->timeout)) + __ip_ct_expect_unlink_destroy(exp); + } + write_unlock(&ip_conntrack_lock); + return 0; + } else { + /* This basically means we have to flush everything*/ + write_lock_bh(&ip_conntrack_lock); + list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, + list) { + if (del_timer(&exp->timeout)) + __ip_ct_expect_unlink_destroy(exp); + } + write_unlock_bh(&ip_conntrack_lock); + return 0; + } + + if (err < 0) + return err; + + /* bump usage count to 2 */ + exp = ip_conntrack_expect_find_get(&tuple); + if (!exp) + return -ENOENT; + + if (cda[CTA_EXPECT_ID-1]) { + u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + if (exp->id != ntohl(id)) { + ip_conntrack_expect_put(exp); + return -ENOENT; + } + } + + /* after list removal, usage count == 1 */ + ip_conntrack_unexpect_related(exp); + /* have to put what we 'get' above. after this line usage count == 0 */ + ip_conntrack_expect_put(exp); + + return 0; +} +static int +ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[]) +{ + return -EOPNOTSUPP; +} + +static int +ctnetlink_create_expect(struct nfattr *cda[]) +{ + struct ip_conntrack_tuple tuple, mask, master_tuple; + struct ip_conntrack_tuple_hash *h = NULL; + struct ip_conntrack_expect *exp; + struct ip_conntrack *ct; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASK); + if (err < 0) + return err; + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &master_tuple, + CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + /* Look for master conntrack of this expectation */ + h = ip_conntrack_find_get(&master_tuple, NULL); + if (!h) + return -ENOENT; + ct = tuplehash_to_ctrack(h); + + if (!ct->helper) { + /* such conntrack hasn't got any helper, abort */ + err = -EINVAL; + goto out; + } + + exp = ip_conntrack_expect_alloc(ct); + if (!exp) { + err = -ENOMEM; + goto out; + } + + exp->expectfn = NULL; + exp->master = ct; + memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple)); + memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple)); + + err = ip_conntrack_expect_related(exp); + ip_conntrack_expect_put(exp); + +out: + ip_conntrack_put(tuplehash_to_ctrack(h)); + return err; +} + +static int +ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_expect *exp; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (!cda[CTA_EXPECT_TUPLE-1] || !cda[CTA_EXPECT_MASK-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); + if (err < 0) + return err; + + write_lock_bh(&ip_conntrack_lock); + exp = __ip_conntrack_expect_find(&tuple); + + if (!exp) { + write_unlock_bh(&ip_conntrack_lock); + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) + err = ctnetlink_create_expect(cda); + return err; + } + + err = -EEXIST; + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_expect(exp, cda); + write_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving\n"); + + return err; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static struct notifier_block ctnl_notifier = { + .notifier_call = ctnetlink_conntrack_event, +}; + +static struct notifier_block ctnl_notifier_exp = { + .notifier_call = ctnetlink_expect_event, +}; +#endif + +static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { + [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_MAX] = { + [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem ctnl_subsys = { + .name = "conntrack", + .subsys_id = NFNL_SUBSYS_CTNETLINK, + .cb_count = IPCTNL_MSG_MAX, + .attr_count = CTA_MAX, + .cb = ctnl_cb, +}; + +static struct nfnetlink_subsystem ctnl_exp_subsys = { + .name = "conntrack_expect", + .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, + .cb_count = IPCTNL_MSG_EXP_MAX, + .attr_count = CTA_MAX, + .cb = ctnl_exp_cb, +}; + +static int __init ctnetlink_init(void) +{ + int ret; + + printk("ctnetlink v%s: registering with nfnetlink.\n", version); + ret = nfnetlink_subsys_register(&ctnl_subsys); + if (ret < 0) { + printk("ctnetlink_init: cannot register with nfnetlink.\n"); + goto err_out; + } + + ret = nfnetlink_subsys_register(&ctnl_exp_subsys); + if (ret < 0) { + printk("ctnetlink_init: cannot register exp with nfnetlink.\n"); + goto err_unreg_subsys; + } + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ret = ip_conntrack_register_notifier(&ctnl_notifier); + if (ret < 0) { + printk("ctnetlink_init: cannot register notifier.\n"); + goto err_unreg_exp_subsys; + } + + ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp); + if (ret < 0) { + printk("ctnetlink_init: cannot expect register notifier.\n"); + goto err_unreg_notifier; + } +#endif + + return 0; + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +err_unreg_notifier: + ip_conntrack_unregister_notifier(&ctnl_notifier); +err_unreg_exp_subsys: + nfnetlink_subsys_unregister(&ctnl_exp_subsys); +#endif +err_unreg_subsys: + nfnetlink_subsys_unregister(&ctnl_subsys); +err_out: + return ret; +} + +static void __exit ctnetlink_exit(void) +{ + printk("ctnetlink: unregistering from nfnetlink.\n"); + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ip_conntrack_unregister_notifier(&ctnl_notifier_exp); + ip_conntrack_unregister_notifier(&ctnl_notifier); +#endif + + nfnetlink_subsys_unregister(&ctnl_exp_subsys); + nfnetlink_subsys_unregister(&ctnl_subsys); + return; +} + +module_init(ctnetlink_init); +module_exit(ctnetlink_exit); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -102,22 +102,24 @@ static int icmp_packet(struct ip_conntra ct->timeout.function((unsigned long)ct); } else { atomic_inc(&ct->proto.icmp.count); + ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout); } return NF_ACCEPT; } +static u_int8_t valid_new[] = { + [ICMP_ECHO] = 1, + [ICMP_TIMESTAMP] = 1, + [ICMP_INFO_REQUEST] = 1, + [ICMP_ADDRESS] = 1 +}; + /* Called when a new connection for this protocol found. */ static int icmp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb) { - static u_int8_t valid_new[] - = { [ICMP_ECHO] = 1, - [ICMP_TIMESTAMP] = 1, - [ICMP_INFO_REQUEST] = 1, - [ICMP_ADDRESS] = 1 }; - if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ @@ -158,11 +160,12 @@ icmp_error_message(struct sk_buff *skb, return NF_ACCEPT; } - innerproto = ip_ct_find_proto(inside->ip.protocol); + innerproto = ip_conntrack_proto_find_get(inside->ip.protocol); dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4; /* Are they talking about one of our connections? */ if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); + ip_conntrack_proto_put(innerproto); return NF_ACCEPT; } @@ -170,8 +173,10 @@ icmp_error_message(struct sk_buff *skb, been preserved inside the ICMP. */ if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { DEBUGP("icmp_error_track: Can't invert tuple\n"); + ip_conntrack_proto_put(innerproto); return NF_ACCEPT; } + ip_conntrack_proto_put(innerproto); *ctinfo = IP_CT_RELATED; @@ -265,6 +270,47 @@ checksum_skipped: return icmp_error_message(skb, ctinfo, hooknum); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +static int icmp_tuple_to_nfattr(struct sk_buff *skb, + const struct ip_conntrack_tuple *t) +{ + NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + &t->src.u.icmp.id); + NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), + &t->dst.u.icmp.type); + NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), + &t->dst.u.icmp.code); + + if (t->dst.u.icmp.type >= sizeof(valid_new) + || !valid_new[t->dst.u.icmp.type]) + return -EINVAL; + + return 0; + +nfattr_failure: + return -1; +} + +static int icmp_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *tuple) +{ + if (!tb[CTA_PROTO_ICMP_TYPE-1] + || !tb[CTA_PROTO_ICMP_CODE-1] + || !tb[CTA_PROTO_ICMP_ID-1]) + return -1; + + tuple->dst.u.icmp.type = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); + tuple->dst.u.icmp.code = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); + tuple->src.u.icmp.id = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + + return 0; +} +#endif + struct ip_conntrack_protocol ip_conntrack_protocol_icmp = { .proto = IPPROTO_ICMP, @@ -276,4 +322,9 @@ struct ip_conntrack_protocol ip_conntrac .packet = icmp_packet, .new = icmp_new, .error = icmp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = icmp_tuple_to_nfattr, + .nfattr_to_tuple = icmp_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -404,6 +404,8 @@ static int sctp_packet(struct ip_conntra } conntrack->proto.sctp.state = newconntrack; + if (oldsctpstate != newconntrack) + ip_conntrack_event_cache(IPCT_PROTOINFO, skb); write_unlock_bh(&sctp_lock); } @@ -503,7 +505,12 @@ static struct ip_conntrack_protocol ip_c .packet = sctp_packet, .new = sctp_new, .destroy = NULL, - .me = THIS_MODULE + .me = THIS_MODULE, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -336,6 +336,23 @@ static int tcp_print_conntrack(struct se return seq_printf(s, "%s ", tcp_conntrack_names[state]); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct) +{ + read_lock_bh(&tcp_lock); + NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), + &ct->proto.tcp.state); + read_unlock_bh(&tcp_lock); + + return 0; + +nfattr_failure: + return -1; +} +#endif + static unsigned int get_conntrack_index(const struct tcphdr *tcph) { if (tcph->rst) return TCP_RST_SET; @@ -973,6 +990,10 @@ static int tcp_packet(struct ip_conntrac ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); + ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + if (new_state != old_state) + ip_conntrack_event_cache(IPCT_PROTOINFO, skb); + if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common @@ -1096,4 +1117,10 @@ struct ip_conntrack_protocol ip_conntrac .packet = tcp_packet, .new = tcp_new, .error = tcp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .to_nfattr = tcp_to_nfattr, + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -73,7 +73,8 @@ static int udp_packet(struct ip_conntrac ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) + ip_conntrack_event_cache(IPCT_STATUS, skb); } else ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout); @@ -144,4 +145,9 @@ struct ip_conntrack_protocol ip_conntrac .packet = udp_packet, .new = udp_new, .error = udp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -5,7 +5,7 @@ */ /* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team + * (C) 2002-2005 Netfilter Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -147,8 +147,7 @@ static int ct_seq_show(struct seq_file * if (DIRECTION(hash)) return 0; - proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); + proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); IP_NF_ASSERT(proto); if (seq_printf(s, "%-8s %u %ld ", @@ -185,7 +184,7 @@ static int ct_seq_show(struct seq_file * return -ENOSPC; #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - if (seq_printf(s, "mark=%lu ", conntrack->mark)) + if (seq_printf(s, "mark=%u ", conntrack->mark)) return -ENOSPC; #endif @@ -283,7 +282,7 @@ static int exp_seq_show(struct seq_file seq_printf(s, "proto=%u ", expect->tuple.dst.protonum); print_tuple(s, &expect->tuple, - ip_ct_find_proto(expect->tuple.dst.protonum)); + __ip_conntrack_proto_find(expect->tuple.dst.protonum)); return seq_putc(s, '\n'); } @@ -402,6 +401,7 @@ static unsigned int ip_confirm(unsigned const struct net_device *out, int (*okfn)(struct sk_buff *)) { + ip_conntrack_event_cache_init(*pskb); /* We've seen it coming out the other side: confirm it */ return ip_conntrack_confirm(pskb); } @@ -419,6 +419,7 @@ static unsigned int ip_conntrack_help(un ct = ip_conntrack_get(*pskb, &ctinfo); if (ct && ct->helper) { unsigned int ret; + ip_conntrack_event_cache_init(*pskb); ret = ct->helper->help(pskb, ct, ctinfo); if (ret != NF_ACCEPT) return ret; @@ -889,6 +890,7 @@ static int init_or_cleanup(int init) return ret; cleanup: + synchronize_net(); #ifdef CONFIG_SYSCTL unregister_sysctl_table(ip_ct_sysctl_header); cleanup_localinops: @@ -971,6 +973,13 @@ void need_ip_conntrack(void) { } +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +EXPORT_SYMBOL_GPL(ip_conntrack_chain); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); +EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); +EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); +EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); +#endif EXPORT_SYMBOL(ip_conntrack_protocol_register); EXPORT_SYMBOL(ip_conntrack_protocol_unregister); EXPORT_SYMBOL(ip_ct_get_tuple); @@ -982,12 +991,16 @@ EXPORT_SYMBOL(ip_conntrack_helper_regist EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); EXPORT_SYMBOL(ip_ct_refresh_acct); -EXPORT_SYMBOL(ip_ct_protos); -EXPORT_SYMBOL(ip_ct_find_proto); + EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); +EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); +EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy); + EXPORT_SYMBOL(ip_conntrack_tuple_taken); EXPORT_SYMBOL(ip_ct_gather_frags); EXPORT_SYMBOL(ip_conntrack_htable_size); @@ -995,7 +1008,28 @@ EXPORT_SYMBOL(ip_conntrack_lock); EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); -EXPORT_SYMBOL_GPL(ip_conntrack_put); #ifdef CONFIG_IP_NF_NAT_NEEDED EXPORT_SYMBOL(ip_conntrack_tcp_update); #endif + +EXPORT_SYMBOL_GPL(ip_conntrack_flush); +EXPORT_SYMBOL_GPL(__ip_conntrack_find); + +EXPORT_SYMBOL_GPL(ip_conntrack_alloc); +EXPORT_SYMBOL_GPL(ip_conntrack_free); +EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert); + +EXPORT_SYMBOL_GPL(ip_ct_remove_expectations); + +EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get); +EXPORT_SYMBOL_GPL(ip_conntrack_helper_put); +EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname); + +EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get); +EXPORT_SYMBOL_GPL(ip_conntrack_proto_put); +EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find); +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr); +EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple); +#endif diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -47,8 +47,39 @@ DEFINE_RWLOCK(ip_nat_lock); static unsigned int ip_nat_htable_size; static struct list_head *bysource; + +#define MAX_IP_NAT_PROTO 256 struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; +static inline struct ip_nat_protocol * +__ip_nat_proto_find(u_int8_t protonum) +{ + return ip_nat_protos[protonum]; +} + +struct ip_nat_protocol * +ip_nat_proto_find_get(u_int8_t protonum) +{ + struct ip_nat_protocol *p; + + /* we need to disable preemption to make sure 'p' doesn't get + * removed until we've grabbed the reference */ + preempt_disable(); + p = __ip_nat_proto_find(protonum); + if (p) { + if (!try_module_get(p->me)) + p = &ip_nat_unknown_protocol; + } + preempt_enable(); + + return p; +} + +void +ip_nat_proto_put(struct ip_nat_protocol *p) +{ + module_put(p->me); +} /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -103,7 +134,8 @@ static int in_range(const struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range) { - struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum); + struct ip_nat_protocol *proto = + __ip_nat_proto_find(tuple->dst.protonum); /* If we are supposed to map IPs, then we must be in the range specified, otherwise let this drag us onto a new src IP. */ @@ -216,8 +248,7 @@ get_unique_tuple(struct ip_conntrack_tup struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - struct ip_nat_protocol *proto - = ip_nat_find_proto(orig_tuple->dst.protonum); + struct ip_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -242,14 +273,20 @@ get_unique_tuple(struct ip_conntrack_tup /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ + proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) - && !ip_nat_used_tuple(tuple, conntrack)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ip_nat_proto_put(proto); return; + } /* Last change: get protocol to try to obtain unique tuple. */ proto->unique_tuple(tuple, range, maniptype, conntrack); + + ip_nat_proto_put(proto); } unsigned int @@ -320,17 +357,20 @@ manip_pkt(u_int16_t proto, enum ip_nat_manip_type maniptype) { struct iphdr *iph; + struct ip_nat_protocol *p; - (*pskb)->nfcache |= NFC_ALTERED; - if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; iph = (void *)(*pskb)->data + iphdroff; /* Manipulate protcol part. */ - if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff, - target, maniptype)) + p = ip_nat_proto_find_get(proto); + if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { + ip_nat_proto_put(p); return 0; + } + ip_nat_proto_put(p); iph = (void *)(*pskb)->data + iphdroff; @@ -391,7 +431,7 @@ int icmp_reply_translation(struct sk_buf struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; - if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; @@ -426,7 +466,8 @@ int icmp_reply_translation(struct sk_buf if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr) + inside->ip.ihl*4, - &inner, ip_ct_find_proto(inside->ip.protocol))) + &inner, + __ip_conntrack_proto_find(inside->ip.protocol))) return 0; /* Change inner back to look like incoming packet. We do the @@ -496,6 +537,49 @@ void ip_nat_protocol_unregister(struct i synchronize_net(); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +int +ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range) +{ + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), + &range->min.tcp.port); + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), + &range->max.tcp.port); + + return 0; + +nfattr_failure: + return -1; +} + +int +ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) +{ + int ret = 0; + + /* we have to return whether we actually parsed something or not */ + + if (tb[CTA_PROTONAT_PORT_MIN-1]) { + ret = 1; + range->min.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + } + + if (!tb[CTA_PROTONAT_PORT_MAX-1]) { + if (ret) + range->max.tcp.port = range->min.tcp.port; + } else { + ret = 1; + range->max.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + } + + return ret; +} +#endif + int __init ip_nat_init(void) { size_t i; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -168,7 +168,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff struct tcphdr *tcph; int datalen; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -228,7 +228,7 @@ ip_nat_mangle_udp_packet(struct sk_buff match_offset + match_len) return 0; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -315,7 +315,7 @@ ip_nat_sack_adjust(struct sk_buff **pskb optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; - if (!skb_ip_make_writable(pskb, optend)) + if (!skb_make_writable(pskb, optend)) return 0; dir = CTINFO2DIR(ctinfo); @@ -363,7 +363,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, this_way = &ct->nat.info.seq[dir]; other_way = &ct->nat.info.seq[!dir]; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -62,7 +62,7 @@ icmp_manip_pkt(struct sk_buff **pskb, struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); @@ -106,11 +106,18 @@ icmp_print_range(char *buffer, const str else return 0; } -struct ip_nat_protocol ip_nat_protocol_icmp -= { "ICMP", IPPROTO_ICMP, - icmp_manip_pkt, - icmp_in_range, - icmp_unique_tuple, - icmp_print, - icmp_print_range +struct ip_nat_protocol ip_nat_protocol_icmp = { + .name = "ICMP", + .protonum = IPPROTO_ICMP, + .me = THIS_MODULE, + .manip_pkt = icmp_manip_pkt, + .in_range = icmp_in_range, + .unique_tuple = icmp_unique_tuple, + .print = icmp_print, + .print_range = icmp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,7 @@ tcp_manip_pkt(struct sk_buff **pskb, if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); - if (!skb_ip_make_writable(pskb, hdroff + hdrsize)) + if (!skb_make_writable(pskb, hdroff + hdrsize)) return 0; iph = (struct iphdr *)((*pskb)->data + iphdroff); @@ -169,11 +170,18 @@ tcp_print_range(char *buffer, const stru else return 0; } -struct ip_nat_protocol ip_nat_protocol_tcp -= { "TCP", IPPROTO_TCP, - tcp_manip_pkt, - tcp_in_range, - tcp_unique_tuple, - tcp_print, - tcp_print_range +struct ip_nat_protocol ip_nat_protocol_tcp = { + .name = "TCP", + .protonum = IPPROTO_TCP, + .me = THIS_MODULE, + .manip_pkt = tcp_manip_pkt, + .in_range = tcp_in_range, + .unique_tuple = tcp_unique_tuple, + .print = tcp_print, + .print_range = tcp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -94,7 +94,7 @@ udp_manip_pkt(struct sk_buff **pskb, u32 oldip, newip; u16 *portptr, newport; - if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; iph = (struct iphdr *)((*pskb)->data + iphdroff); @@ -156,11 +156,18 @@ udp_print_range(char *buffer, const stru else return 0; } -struct ip_nat_protocol ip_nat_protocol_udp -= { "UDP", IPPROTO_UDP, - udp_manip_pkt, - udp_in_range, - udp_unique_tuple, - udp_print, - udp_print_range +struct ip_nat_protocol ip_nat_protocol_udp = { + .name = "UDP", + .protonum = IPPROTO_UDP, + .me = THIS_MODULE, + .manip_pkt = udp_manip_pkt, + .in_range = udp_in_range, + .unique_tuple = udp_unique_tuple, + .print = udp_print, + .print_range = udp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c @@ -61,10 +61,11 @@ unknown_print_range(char *buffer, const } struct ip_nat_protocol ip_nat_unknown_protocol = { - "unknown", 0, - unknown_manip_pkt, - unknown_in_range, - unknown_unique_tuple, - unknown_print, - unknown_print_range + .name = "unknown", + .me = THIS_MODULE, + .manip_pkt = unknown_manip_pkt, + .in_range = unknown_in_range, + .unique_tuple = unknown_unique_tuple, + .print = unknown_print, + .print_range = unknown_print_range }; diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -1275,7 +1275,7 @@ static int help(struct sk_buff **pskb, return NF_DROP; } - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; spin_lock_bh(&snmp_lock); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -73,8 +73,6 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); - (*pskb)->nfcache |= NFC_UNKNOWN; - /* If we had a hardware checksum before, it's now invalid */ if ((*pskb)->ip_summed == CHECKSUM_HW) if (skb_checksum_help(*pskb, (out == NULL))) @@ -392,6 +390,8 @@ module_exit(fini); EXPORT_SYMBOL(ip_nat_setup_info); EXPORT_SYMBOL(ip_nat_protocol_register); EXPORT_SYMBOL(ip_nat_protocol_unregister); +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); +EXPORT_SYMBOL_GPL(ip_nat_proto_put); EXPORT_SYMBOL(ip_nat_cheat_check); EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); EXPORT_SYMBOL(ip_nat_mangle_udp_packet); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -43,17 +43,10 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" -struct ipq_rt_info { - __u8 tos; - __u32 daddr; - __u32 saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); @@ -281,7 +274,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; @@ -299,14 +293,6 @@ ipq_enqueue_packet(struct sk_buff *skb, entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = skb->nh.iph; - - entry->rt_info.tos = iph->tos; - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -382,23 +368,10 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, st } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); - e->skb->nfcache |= NFC_ALTERED; - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = e->skb->nh.iph; - - if (!(iph->tos == e->rt_info.tos - && iph->daddr == e->rt_info.daddr - && iph->saddr == e->rt_info.saddr)) - return ip_route_me_harder(&e->skb); - } return 0; } @@ -686,7 +659,8 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -312,7 +312,6 @@ ipt_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - (*pskb)->nfcache |= e->nfcache; if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { struct ipt_entry_target *t; diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c --- a/net/ipv4/netfilter/ipt_CLASSIFY.c +++ b/net/ipv4/netfilter/ipt_CLASSIFY.c @@ -32,10 +32,8 @@ target(struct sk_buff **pskb, { const struct ipt_classify_target_info *clinfo = targinfo; - if((*pskb)->priority != clinfo->priority) { + if((*pskb)->priority != clinfo->priority) (*pskb)->priority = clinfo->priority; - (*pskb)->nfcache |= NFC_ALTERED; - } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -367,7 +367,7 @@ target(struct sk_buff **pskb, #ifdef DEBUG_CLUSTERP DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark); + DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); if (!clusterip_responsible(cipinfo->config, hash)) { DEBUGP("not responsible\n"); return NF_DROP; diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c @@ -40,9 +40,9 @@ target(struct sk_buff **pskb, void *userinfo) { const struct ipt_connmark_target_info *markinfo = targinfo; - unsigned long diff; - unsigned long nfmark; - unsigned long newmark; + u_int32_t diff; + u_int32_t nfmark; + u_int32_t newmark; enum ip_conntrack_info ctinfo; struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); @@ -61,10 +61,8 @@ target(struct sk_buff **pskb, case IPT_CONNMARK_RESTORE: nfmark = (*pskb)->nfmark; diff = (ct->mark ^ nfmark) & markinfo->mask; - if (diff != 0) { + if (diff != 0) (*pskb)->nfmark = nfmark ^ diff; - (*pskb)->nfcache |= NFC_ALTERED; - } break; } } @@ -94,6 +92,11 @@ checkentry(const char *tablename, } } + if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { + printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c --- a/net/ipv4/netfilter/ipt_DSCP.c +++ b/net/ipv4/netfilter/ipt_DSCP.c @@ -39,7 +39,7 @@ target(struct sk_buff **pskb, if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; @@ -51,7 +51,6 @@ target(struct sk_buff **pskb, sizeof(diffs), (*pskb)->nh.iph->check ^ 0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -31,7 +31,7 @@ set_ect_ip(struct sk_buff **pskb, const != (einfo->ip_ect & IPT_ECN_IP_MASK)) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; @@ -43,7 +43,6 @@ set_ect_ip(struct sk_buff **pskb, const sizeof(diffs), (*pskb)->nh.iph->check ^0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return 1; } @@ -67,7 +66,7 @@ set_ect_tcp(struct sk_buff **pskb, const || tcph->cwr == einfo->proto.tcp.cwr))) return 1; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; @@ -86,7 +85,6 @@ set_ect_tcp(struct sk_buff **pskb, const else if (skb_checksum_help(*pskb, inward)) return 0; - (*pskb)->nfcache |= NFC_ALTERED; return 1; } diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c --- a/net/ipv4/netfilter/ipt_MARK.c +++ b/net/ipv4/netfilter/ipt_MARK.c @@ -29,10 +29,9 @@ target_v0(struct sk_buff **pskb, { const struct ipt_mark_target_info *markinfo = targinfo; - if((*pskb)->nfmark != markinfo->mark) { + if((*pskb)->nfmark != markinfo->mark) (*pskb)->nfmark = markinfo->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IPT_CONTINUE; } @@ -61,10 +60,9 @@ target_v1(struct sk_buff **pskb, break; } - if((*pskb)->nfmark != mark) { + if((*pskb)->nfmark != mark) (*pskb)->nfmark = mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IPT_CONTINUE; } @@ -76,6 +74,8 @@ checkentry_v0(const char *tablename, unsigned int targinfosize, unsigned int hook_mask) { + struct ipt_mark_target_info *markinfo = targinfo; + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) { printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", targinfosize, @@ -88,6 +88,11 @@ checkentry_v0(const char *tablename, return 0; } + if (markinfo->mark > 0xffffffff) { + printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); + return 0; + } + return 1; } @@ -120,6 +125,11 @@ checkentry_v1(const char *tablename, return 0; } + if (markinfo->mark > 0xffffffff) { + printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c new file mode 100644 --- /dev/null +++ b/net/ipv4/netfilter/ipt_NFQUEUE.c @@ -0,0 +1,70 @@ +/* iptables module for using new netfilter netlink queue + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("iptables NFQUEUE target"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_NFQ_info *tinfo = targinfo; + + return NF_QUEUE_NR(tinfo->queuenum); +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) { + printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_NFQ_info))); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_NFQ_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_NFQ_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -156,7 +156,6 @@ static void send_reset(struct sk_buff *o /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); - nskb->nfcache = 0; nskb->nfmark = 0; #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(nskb->nf_bridge); diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -58,7 +58,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, unsigned int i; u_int8_t *opt; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; iph = (*pskb)->nh.iph; @@ -189,7 +189,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, /* We never hw checksum SYN packets. */ BUG_ON((*pskb)->ip_summed == CHECKSUM_HW); - (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -33,7 +33,7 @@ target(struct sk_buff **pskb, if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; @@ -46,7 +46,6 @@ target(struct sk_buff **pskb, sizeof(diffs), (*pskb)->nh.iph->check ^0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -62,6 +62,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables userspace logging module"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); #define ULOG_NL_EVENT 111 /* Harald's favorite number */ #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ @@ -372,7 +373,7 @@ static int __init init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c --- a/net/ipv4/netfilter/ipt_connmark.c +++ b/net/ipv4/netfilter/ipt_connmark.c @@ -54,9 +54,16 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + struct ipt_connmark_info *cm = + (struct ipt_connmark_info *)matchinfo; if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info))) return 0; + if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { + printk(KERN_WARNING "connmark: only support 32bit mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c --- a/net/ipv4/netfilter/ipt_mark.c +++ b/net/ipv4/netfilter/ipt_mark.c @@ -37,9 +37,16 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo; + if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info))) return 0; + if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { + printk(KERN_WARNING "mark: only supports 32bit mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -240,7 +240,9 @@ static unsigned rt_hash_mask; static int rt_hash_log; static unsigned int rt_hash_rnd; -struct rt_cache_stat *rt_cache_stat; +static struct rt_cache_stat *rt_cache_stat; +#define RT_CACHE_STAT_INC(field) \ + (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) static int rt_intern_hash(unsigned hash, struct rtable *rth, struct rtable **res); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -487,7 +487,7 @@ int tcp_listen_start(struct sock *sk) } sk->sk_state = TCP_CLOSE; - reqsk_queue_destroy(&tp->accept_queue); + __reqsk_queue_destroy(&tp->accept_queue); return -EADDRINUSE; } @@ -499,38 +499,23 @@ int tcp_listen_start(struct sock *sk) static void tcp_listen_stop (struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt; struct request_sock *acc_req; struct request_sock *req; - int i; tcp_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ - lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue); acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); - if (lopt->qlen) { - for (i = 0; i < TCP_SYNQ_HSIZE; i++) { - while ((req = lopt->syn_table[i]) != NULL) { - lopt->syn_table[i] = req->dl_next; - lopt->qlen--; - reqsk_free(req); - - /* Following specs, it would be better either to send FIN - * (and enter FIN-WAIT-1, it is normal close) - * or to send active reset (abort). - * Certainly, it is pretty dangerous while synflood, but it is - * bad justification for our negligence 8) - * To be honest, we are not able to make either - * of the variants now. --ANK - */ - } - } - } - BUG_TRAP(!lopt->qlen); - - kfree(lopt); + /* Following specs, it would be better either to send FIN + * (and enter FIN-WAIT-1, it is normal close) + * or to send active reset (abort). + * Certainly, it is pretty dangerous while synflood, but it is + * bad justification for our negligence 8) + * To be honest, we are not able to make either + * of the variants now. --ANK + */ + reqsk_queue_destroy(&tp->accept_queue); while ((req = acc_req) != NULL) { struct sock *child = req->sk; @@ -975,7 +960,7 @@ do_fault: if (!skb->len) { if (sk->sk_send_head == skb) sk->sk_send_head = NULL; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_write_queue); sk_stream_free_skb(sk, skb); } diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -774,7 +774,8 @@ static void tcpdiag_rcv(struct sock *sk, static int __init tcpdiag_init(void) { - tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv); + tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, + THIS_MODULE); if (tcpnl == NULL) return -ENOMEM; return 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2085,7 +2085,7 @@ static int tcp_clean_rtx_queue(struct so seq_rtt = now - scb->when; tcp_dec_pcount_approx(&tp->fackets_out, skb); tcp_packets_out_dec(tp, skb); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_write_queue); sk_stream_free_skb(sk, skb); } @@ -2853,7 +2853,7 @@ static void tcp_ofo_queue(struct sock *s if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { SOCK_DEBUG(sk, "ofo packet was already received \n"); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &tp->out_of_order_queue); __kfree_skb(skb); continue; } @@ -2861,7 +2861,7 @@ static void tcp_ofo_queue(struct sock *s tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if(skb->h.th->fin) @@ -3027,7 +3027,7 @@ drop: u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (seq == TCP_SKB_CB(skb1)->end_seq) { - __skb_append(skb1, skb); + __skb_append(skb1, skb, &tp->out_of_order_queue); if (!tp->rx_opt.num_sacks || tp->selective_acks[0].end_seq != seq) @@ -3071,7 +3071,7 @@ drop: tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); break; } - __skb_unlink(skb1, skb1->list); + __skb_unlink(skb1, &tp->out_of_order_queue); tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); __kfree_skb(skb1); } @@ -3088,8 +3088,9 @@ add_sack: * simplifies code) */ static void -tcp_collapse(struct sock *sk, struct sk_buff *head, - struct sk_buff *tail, u32 start, u32 end) +tcp_collapse(struct sock *sk, struct sk_buff_head *list, + struct sk_buff *head, struct sk_buff *tail, + u32 start, u32 end) { struct sk_buff *skb; @@ -3099,7 +3100,7 @@ tcp_collapse(struct sock *sk, struct sk_ /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; @@ -3145,7 +3146,7 @@ tcp_collapse(struct sock *sk, struct sk_ nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, skb->list); + __skb_insert(nskb, skb->prev, skb, list); sk_stream_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -3164,7 +3165,7 @@ tcp_collapse(struct sock *sk, struct sk_ } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; @@ -3200,7 +3201,8 @@ static void tcp_collapse_ofo_queue(struc if (skb == (struct sk_buff *)&tp->out_of_order_queue || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, head, skb, start, end); + tcp_collapse(sk, &tp->out_of_order_queue, + head, skb, start, end); head = skb; if (skb == (struct sk_buff *)&tp->out_of_order_queue) break; @@ -3237,7 +3239,8 @@ static int tcp_prune_queue(struct sock * tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); - tcp_collapse(sk, sk->sk_receive_queue.next, + tcp_collapse(sk, &sk->sk_receive_queue, + sk->sk_receive_queue.next, (struct sk_buff*)&sk->sk_receive_queue, tp->copied_seq, tp->rcv_nxt); sk_stream_mem_reclaim(sk); @@ -3462,7 +3465,7 @@ static void tcp_check_urg(struct sock * struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); tp->copied_seq++; if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -507,7 +507,7 @@ static int tcp_fragment(struct sock *sk, /* Link BUFF into the send queue. */ skb_header_release(buff); - __skb_append(skb, buff); + __skb_append(skb, buff, &sk->sk_write_queue); return 0; } @@ -892,7 +892,7 @@ static int tso_fragment(struct sock *sk, /* Link BUFF into the send queue. */ skb_header_release(buff); - __skb_append(skb, buff); + __skb_append(skb, buff, &sk->sk_write_queue); return 0; } @@ -1257,7 +1257,7 @@ static void tcp_retrans_try_collapse(str tcp_skb_pcount(next_skb) != 1); /* Ok. We will be able to collapse the packet. */ - __skb_unlink(next_skb, next_skb->list); + __skb_unlink(next_skb, &sk->sk_write_queue); memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -128,8 +128,10 @@ void __init xfrm4_state_init(void) xfrm_state_register_afinfo(&xfrm4_state_afinfo); } +#if 0 void __exit xfrm4_state_fini(void) { xfrm_state_unregister_afinfo(&xfrm4_state_afinfo); } +#endif /* 0 */ diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,7 +8,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_ou route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ - ip6_flowlabel.o ipv6_syms.o + ip6_flowlabel.o ipv6_syms.o netfilter.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -757,6 +757,9 @@ static int __init inet6_init(void) err = igmp6_init(&inet6_family_ops); if (err) goto igmp_fail; + err = ipv6_netfilter_init(); + if (err) + goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -813,6 +816,8 @@ proc_tcp6_fail: raw6_proc_exit(); proc_raw6_fail: #endif + ipv6_netfilter_fini(); +netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); @@ -852,6 +857,7 @@ static void __exit inet6_exit(void) ip6_route_cleanup(); ipv6_packet_cleanup(); igmp6_cleanup(); + ipv6_netfilter_fini(); ndisc_cleanup(); icmpv6_cleanup(); #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -56,7 +56,7 @@ static inline int ip6_rcv_finish( struct return dst_input(skb); } -int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ipv6hdr *hdr; u32 pkt_len; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -153,51 +153,6 @@ int ip6_output(struct sk_buff *skb) return ip6_output2(skb); } -#ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct sk_buff *skb) -{ - struct ipv6hdr *iph = skb->nh.ipv6h; - struct dst_entry *dst; - struct flowi fl = { - .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .nl_u = - { .ip6_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, } }, - .proto = iph->nexthdr, - }; - - dst = ip6_route_output(skb->sk, &fl); - - if (dst->error) { - IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG( - printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); - dst_release(dst); - return -EINVAL; - } - - /* Drop old route. */ - dst_release(skb->dst); - - skb->dst = dst; - return 0; -} -#endif - -static inline int ip6_maybe_reroute(struct sk_buff *skb) -{ -#ifdef CONFIG_NETFILTER - if (skb->nfcache & NFC_ALTERED){ - if (ip6_route_me_harder(skb) != 0){ - kfree_skb(skb); - return -EINVAL; - } - } -#endif /* CONFIG_NETFILTER */ - return dst_output(skb); -} - /* * xmit an sk_buff (used by TCP) */ @@ -266,7 +221,8 @@ int ip6_xmit(struct sock *sk, struct sk_ mtu = dst_mtu(dst); if ((skb->len <= mtu) || ipfragok) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); - return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute); + return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, + dst_output); } if (net_ratelimit()) diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -15,9 +15,6 @@ EXPORT_SYMBOL(ndisc_mc_map); EXPORT_SYMBOL(register_inet6addr_notifier); EXPORT_SYMBOL(unregister_inet6addr_notifier); EXPORT_SYMBOL(ip6_route_output); -#ifdef CONFIG_NETFILTER -EXPORT_SYMBOL(ip6_route_me_harder); -#endif EXPORT_SYMBOL(addrconf_lock); EXPORT_SYMBOL(ipv6_setsockopt); EXPORT_SYMBOL(ipv6_getsockopt); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c new file mode 100644 --- /dev/null +++ b/net/ipv6/netfilter.c @@ -0,0 +1,104 @@ +#include + +#ifdef CONFIG_NETFILTER + +#include +#include +#include +#include +#include +#include +#include + +int ip6_route_me_harder(struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct dst_entry *dst; + struct flowi fl = { + .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .nl_u = + { .ip6_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, } }, + .proto = iph->nexthdr, + }; + + dst = ip6_route_output(skb->sk, &fl); + + if (dst->error) { + IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); + dst_release(dst); + return -EINVAL; + } + + /* Drop old route. */ + dst_release(skb->dst); + + skb->dst = dst; + return 0; +} +EXPORT_SYMBOL(ip6_route_me_harder); + +/* + * Extra routing may needed on local out, as the QUEUE target never + * returns control to the table. + */ + +struct ip6_rt_info { + struct in6_addr daddr; + struct in6_addr saddr; +}; + +static void save(const struct sk_buff *skb, struct nf_info *info) +{ + struct ip6_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP6_LOCAL_OUT) { + struct ipv6hdr *iph = skb->nh.ipv6h; + + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + } +} + +static int reroute(struct sk_buff **pskb, const struct nf_info *info) +{ + struct ip6_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP6_LOCAL_OUT) { + struct ipv6hdr *iph = (*pskb)->nh.ipv6h; + if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || + !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) + return ip6_route_me_harder(*pskb); + } + return 0; +} + +static struct nf_queue_rerouter ip6_reroute = { + .rer_size = sizeof(struct ip6_rt_info), + .save = &save, + .reroute = &reroute, +}; + +int __init ipv6_netfilter_init(void) +{ + return nf_register_queue_rerouter(PF_INET6, &ip6_reroute); +} + +void __exit ipv6_netfilter_fini(void) +{ + nf_unregister_queue_rerouter(PF_INET6); +} + +#else /* CONFIG_NETFILTER */ +int __init ipv6_netfilter_init(void) +{ + return 0; +} + +void __exit ipv6_netfilter_fini(void) +{ +} +#endif /* CONFIG_NETFILTER */ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -10,13 +10,16 @@ menu "IPv6: Netfilter Configuration (EXP # dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK #fi config IP6_NF_QUEUE - tristate "Userspace queueing via NETLINK" + tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" ---help--- This option adds a queue handler to the kernel for IPv6 - packets which lets us to receive the filtered packets - with QUEUE target using libiptc as we can do with - the IPv4 now. + packets which enables users to receive the filtered packets + with QUEUE target using libipq. + + THis option enables the old IPv6-only "ip6_queue" implementation + which has been obsoleted by the new "nfnetlink_queue" code (see + CONFIG_NETFILTER_NETLINK_QUEUE). (C) Fernando Anton 2001 IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue. obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -47,16 +47,10 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" -struct ipq_rt_info { - struct in6_addr daddr; - struct in6_addr saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); @@ -278,7 +272,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; @@ -296,13 +291,6 @@ ipq_enqueue_packet(struct sk_buff *skb, entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct ipv6hdr *iph = skb->nh.ipv6h; - - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -378,22 +366,10 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, st } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); - e->skb->nfcache |= NFC_ALTERED; - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - * Not a nice way to cmp, but works - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct ipv6hdr *iph = e->skb->nh.ipv6h; - if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) || - !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr)) - return ip6_route_me_harder(e->skb); - } return 0; } @@ -679,7 +655,7 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -401,7 +401,6 @@ ip6t_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - (*pskb)->nfcache |= e->nfcache; if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, &protoff, &offset)) { struct ip6t_entry_target *t; diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c @@ -28,10 +28,9 @@ target(struct sk_buff **pskb, { const struct ip6t_mark_target_info *markinfo = targinfo; - if((*pskb)->nfmark != markinfo->mark) { + if((*pskb)->nfmark != markinfo->mark) (*pskb)->nfmark = markinfo->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IP6T_CONTINUE; } diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c new file mode 100644 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NFQUEUE.c @@ -0,0 +1,70 @@ +/* ip6tables module for using new netfilter netlink queue + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("ip6tables NFQUEUE target"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_NFQ_info *tinfo = targinfo; + + return NF_QUEUE_NR(tinfo->queuenum); +} + +static int +checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) { + printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ipt_NFQ_info))); + return 0; + } + + return 1; +} + +static struct ip6t_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_target(&ipt_NFQ_reg); +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ipt_NFQ_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1627,7 +1627,7 @@ out: return rc; } -static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { /* NULL here for pt means the packet was looped back */ struct ipx_interface *intrfc; diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -988,9 +988,6 @@ void irlap_resend_rejected_frames(struct IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); return; } - /* Unlink tx_skb from list */ - tx_skb->next = tx_skb->prev = NULL; - tx_skb->list = NULL; /* Clear old Nr field + poll bit */ tx_skb->data[1] &= 0x0f; @@ -1063,9 +1060,6 @@ void irlap_resend_rejected_frame(struct IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); return; } - /* Unlink tx_skb from list */ - tx_skb->next = tx_skb->prev = NULL; - tx_skb->list = NULL; /* Clear old Nr field + poll bit */ tx_skb->data[1] &= 0x0f; @@ -1309,7 +1303,7 @@ static void irlap_recv_test_frame(struct * Jean II */ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { struct irlap_info info; struct irlap_cb *self; diff --git a/net/irda/irmod.c b/net/irda/irmod.c --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -54,7 +54,7 @@ extern int irsock_init(void); extern void irsock_cleanup(void); /* irlap_frame.c */ extern int irlap_driver_rcv(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); /* * Module parameters diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c --- a/net/lapb/lapb_subr.c +++ b/net/lapb/lapb_subr.c @@ -78,7 +78,7 @@ void lapb_requeue_frames(struct lapb_cb if (!skb_prev) skb_queue_head(&lapb->write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &lapb->write_queue); skb_prev = skb; } } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -714,7 +714,7 @@ static int llc_ui_recvmsg(struct kiocb * if (uaddr) memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); msg->msg_namelen = sizeof(*uaddr); - if (!skb->list) { + if (!skb->next) { dgram_free: kfree_skb(skb); } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -71,7 +71,11 @@ int llc_conn_state_process(struct sock * if (!ev->ind_prim && !ev->cfm_prim) { /* indicate or confirm not required */ - if (!skb->list) + /* XXX this is not very pretty, perhaps we should store + * XXX indicate/confirm-needed state in the llc_conn_state_ev + * XXX control block of the SKB instead? -DaveM + */ + if (!skb->next) goto out_kfree_skb; goto out_skb_put; } diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -103,7 +103,8 @@ out: struct llc_sap *llc_sap_open(unsigned char lsap, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)) + struct packet_type *pt, + struct net_device *orig_dev)) { struct llc_sap *sap = llc_sap_find(lsap); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -132,7 +132,7 @@ static inline int llc_fixup_skb(struct s * data now), it queues this frame in the connection's backlog. */ int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct llc_sap *sap; struct llc_pdu_sn *pdu; @@ -165,7 +165,7 @@ int llc_rcv(struct sk_buff *skb, struct * LLC functionality */ if (sap->rcv_func) { - sap->rcv_func(skb, dev, pt); + sap->rcv_func(skb, dev, pt, orig_dev); goto out; } dest = llc_pdu_type(skb); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig new file mode 100644 --- /dev/null +++ b/net/netfilter/Kconfig @@ -0,0 +1,13 @@ +config NETFILTER_NETLINK + tristate "Netfilter netlink interface" + help + If this option is enabled, the kernel will include support + for the new netfilter netlink interface. + +config NETFILTER_NETLINK_QUEUE + tristate "Netfilter NFQUEUE over NFNETLINK interface" + depends on NETFILTER_NETLINK + help + If this option isenabled, the kernel will include support + for queueing packets via NFNETLINK. + diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile new file mode 100644 --- /dev/null +++ b/net/netfilter/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c new file mode 100644 --- /dev/null +++ b/net/netfilter/nfnetlink.c @@ -0,0 +1,363 @@ +/* Netfilter messages via netlink socket. Allows for user space + * protocol helpers and general trouble making from userspace. + * + * (C) 2001 by Jay Schulist , + * (C) 2002-2005 by Harald Welte + * (C) 2005 by Pablo Neira Ayuso + * + * Initial netfilter messages via netlink development funded and + * generally made possible by Network Robots, Inc. (www.networkrobots.com) + * + * Further development of this code funded by Astaro AG (http://www.astaro.com) + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); + +static char __initdata nfversion[] = "0.30"; + +#if 0 +#define DEBUGP(format, args...) \ + printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \ + __LINE__, __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +static struct sock *nfnl = NULL; +static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; +DECLARE_MUTEX(nfnl_sem); + +void nfnl_lock(void) +{ + nfnl_shlock(); +} + +void nfnl_unlock(void) +{ + nfnl_shunlock(); +} + +int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) +{ + DEBUGP("registering subsystem ID %u\n", n->subsys_id); + + nfnl_lock(); + if (subsys_table[n->subsys_id]) { + nfnl_unlock(); + return -EBUSY; + } + subsys_table[n->subsys_id] = n; + nfnl_unlock(); + + return 0; +} + +int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) +{ + DEBUGP("unregistering subsystem ID %u\n", n->subsys_id); + + nfnl_lock(); + subsys_table[n->subsys_id] = NULL; + nfnl_unlock(); + + return 0; +} + +static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) +{ + u_int8_t subsys_id = NFNL_SUBSYS_ID(type); + + if (subsys_id >= NFNL_SUBSYS_COUNT + || subsys_table[subsys_id] == NULL) + return NULL; + + return subsys_table[subsys_id]; +} + +static inline struct nfnl_callback * +nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) +{ + u_int8_t cb_id = NFNL_MSG_TYPE(type); + + if (cb_id >= ss->cb_count) { + DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count); + return NULL; + } + + return &ss->cb[cb_id]; +} + +void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, + const void *data) +{ + struct nfattr *nfa; + int size = NFA_LENGTH(attrlen); + + nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); + nfa->nfa_type = attrtype; + nfa->nfa_len = size; + memcpy(NFA_DATA(nfa), data, attrlen); + memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); +} + +int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) +{ + memset(tb, 0, sizeof(struct nfattr *) * maxattr); + + while (NFA_OK(nfa, len)) { + unsigned flavor = nfa->nfa_type; + if (flavor && flavor <= maxattr) + tb[flavor-1] = nfa; + nfa = NFA_NEXT(nfa, len); + } + + return 0; +} + +/** + * nfnetlink_check_attributes - check and parse nfnetlink attributes + * + * subsys: nfnl subsystem for which this message is to be parsed + * nlmsghdr: netlink message to be checked/parsed + * cda: array of pointers, needs to be at least subsys->attr_count big + * + */ +static int +nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, + struct nlmsghdr *nlh, struct nfattr *cda[]) +{ + int min_len; + + memset(cda, 0, sizeof(struct nfattr *) * subsys->attr_count); + + /* check attribute lengths. */ + min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); + if (nlh->nlmsg_len < min_len) + return -EINVAL; + + if (nlh->nlmsg_len > min_len) { + struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); + int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); + + while (NFA_OK(attr, attrlen)) { + unsigned flavor = attr->nfa_type; + if (flavor) { + if (flavor > subsys->attr_count) + return -EINVAL; + cda[flavor - 1] = attr; + } + attr = NFA_NEXT(attr, attrlen); + } + } else + return -EINVAL; + + return 0; +} + +int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +{ + int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; + int err = 0; + + NETLINK_CB(skb).dst_groups = group; + if (echo) + atomic_inc(&skb->users); + netlink_broadcast(nfnl, skb, pid, group, allocation); + if (echo) + err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT); + + return err; +} + +int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +{ + return netlink_unicast(nfnl, skb, pid, flags); +} + +/* Process one complete nfnetlink message. */ +static inline int nfnetlink_rcv_msg(struct sk_buff *skb, + struct nlmsghdr *nlh, int *errp) +{ + struct nfnl_callback *nc; + struct nfnetlink_subsystem *ss; + int type, err = 0; + + DEBUGP("entered; subsys=%u, msgtype=%u\n", + NFNL_SUBSYS_ID(nlh->nlmsg_type), + NFNL_MSG_TYPE(nlh->nlmsg_type)); + + /* Only requests are handled by kernel now. */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + DEBUGP("received non-request message\n"); + return 0; + } + + /* All the messages must at least contain nfgenmsg */ + if (nlh->nlmsg_len < + NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) { + DEBUGP("received message was too short\n"); + return 0; + } + + type = nlh->nlmsg_type; + ss = nfnetlink_get_subsys(type); + if (!ss) { +#ifdef CONFIG_KMOD + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ + up(&nfnl_sem); + request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); + nfnl_shlock(); + ss = nfnetlink_get_subsys(type); + if (!ss) +#endif + goto err_inval; + } + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + DEBUGP("unable to find client for type %d\n", type); + goto err_inval; + } + + if (nc->cap_required && + !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) { + DEBUGP("permission denied for type %d\n", type); + *errp = -EPERM; + return -1; + } + + { + struct nfattr *cda[ss->attr_count]; + + memset(cda, 0, ss->attr_count*sizeof(struct nfattr *)); + + err = nfnetlink_check_attributes(ss, nlh, cda); + if (err < 0) + goto err_inval; + + DEBUGP("calling handler\n"); + err = nc->call(nfnl, skb, nlh, cda, errp); + *errp = err; + return err; + } + +err_inval: + DEBUGP("returning -EINVAL\n"); + *errp = -EINVAL; + return -1; +} + +/* Process one packet of messages. */ +static inline int nfnetlink_rcv_skb(struct sk_buff *skb) +{ + int err; + struct nlmsghdr *nlh; + + while (skb->len >= NLMSG_SPACE(0)) { + u32 rlen; + + nlh = (struct nlmsghdr *)skb->data; + if (nlh->nlmsg_len < sizeof(struct nlmsghdr) + || skb->len < nlh->nlmsg_len) + return 0; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + if (nfnetlink_rcv_msg(skb, nlh, &err)) { + if (!err) + return -1; + netlink_ack(skb, nlh, err); + } else + if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + skb_pull(skb, rlen); + } + + return 0; +} + +static void nfnetlink_rcv(struct sock *sk, int len) +{ + do { + struct sk_buff *skb; + + if (nfnl_shlock_nowait()) + return; + + while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (nfnetlink_rcv_skb(skb)) { + if (skb->len) + skb_queue_head(&sk->sk_receive_queue, + skb); + else + kfree_skb(skb); + break; + } + kfree_skb(skb); + } + + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ + up(&nfnl_sem); + } while(nfnl && nfnl->sk_receive_queue.qlen); +} + +void __exit nfnetlink_exit(void) +{ + printk("Removing netfilter NETLINK layer.\n"); + sock_release(nfnl->sk_socket); + return; +} + +int __init nfnetlink_init(void) +{ + printk("Netfilter messages via NETLINK v%s.\n", nfversion); + + nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv, + THIS_MODULE); + if (!nfnl) { + printk(KERN_ERR "cannot initialize nfnetlink!\n"); + return -1; + } + + return 0; +} + +module_init(nfnetlink_init); +module_exit(nfnetlink_exit); + +EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); +EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); +EXPORT_SYMBOL_GPL(nfnetlink_send); +EXPORT_SYMBOL_GPL(nfnetlink_unicast); +EXPORT_SYMBOL_GPL(nfattr_parse); +EXPORT_SYMBOL_GPL(__nfa_fill); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c new file mode 100644 --- /dev/null +++ b/net/netfilter/nfnetlink_queue.c @@ -0,0 +1,877 @@ +/* + * This is a module which is used for queueing packets and communicating with + * userspace via nfetlink. + * + * (C) 2005 by Harald Welte + * + * Based on the old ipv4-only ip_queue.c: + * (C) 2000-2002 James Morris + * (C) 2003-2005 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NFQNL_QMAX_DEFAULT 1024 + +#if 0 +#define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ + __FILE__, __LINE__, __FUNCTION__, \ + ## args) +#else +#define QDEBUG(x, ...) +#endif + +struct nfqnl_queue_entry { + struct list_head list; + struct nf_info *info; + struct sk_buff *skb; + unsigned int id; +}; + +struct nfqnl_instance { + struct hlist_node hlist; /* global list of queues */ + + int peer_pid; + unsigned int queue_maxlen; + unsigned int copy_range; + unsigned int queue_total; + unsigned int queue_dropped; + unsigned int queue_user_dropped; + + atomic_t id_sequence; /* 'sequence' of pkt ids */ + + u_int16_t queue_num; /* number of this queue */ + u_int8_t copy_mode; + + spinlock_t lock; + + struct list_head queue_list; /* packets in queue */ +}; + +typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); + +static DEFINE_RWLOCK(instances_lock); + +u_int64_t htonll(u_int64_t in) +{ + u_int64_t out; + int i; + + for (i = 0; i < sizeof(u_int64_t); i++) + ((u_int8_t *)&out)[sizeof(u_int64_t)-1] = ((u_int8_t *)&in)[i]; + + return out; +} + +#define INSTANCE_BUCKETS 16 +static struct hlist_head instance_table[INSTANCE_BUCKETS]; + +static inline u_int8_t instance_hashfn(u_int16_t queue_num) +{ + return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; +} + +static struct nfqnl_instance * +__instance_lookup(u_int16_t queue_num) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct nfqnl_instance *inst; + + head = &instance_table[instance_hashfn(queue_num)]; + hlist_for_each_entry(inst, pos, head, hlist) { + if (inst->queue_num == queue_num) + return inst; + } + return NULL; +} + +static struct nfqnl_instance * +instance_lookup(u_int16_t queue_num) +{ + struct nfqnl_instance *inst; + + read_lock_bh(&instances_lock); + inst = __instance_lookup(queue_num); + read_unlock_bh(&instances_lock); + + return inst; +} + +static struct nfqnl_instance * +instance_create(u_int16_t queue_num, int pid) +{ + struct nfqnl_instance *inst; + + QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); + + write_lock_bh(&instances_lock); + if (__instance_lookup(queue_num)) { + inst = NULL; + QDEBUG("aborting, instance already exists\n"); + goto out_unlock; + } + + inst = kmalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) + goto out_unlock; + + memset(inst, 0, sizeof(*inst)); + inst->queue_num = queue_num; + inst->peer_pid = pid; + inst->queue_maxlen = NFQNL_QMAX_DEFAULT; + inst->copy_range = 0xfffff; + inst->copy_mode = NFQNL_COPY_NONE; + atomic_set(&inst->id_sequence, 0); + inst->lock = SPIN_LOCK_UNLOCKED; + INIT_LIST_HEAD(&inst->queue_list); + + if (!try_module_get(THIS_MODULE)) + goto out_free; + + hlist_add_head(&inst->hlist, + &instance_table[instance_hashfn(queue_num)]); + + write_unlock_bh(&instances_lock); + + QDEBUG("successfully created new instance\n"); + + return inst; + +out_free: + kfree(inst); +out_unlock: + write_unlock_bh(&instances_lock); + return NULL; +} + +static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); + +static void +_instance_destroy2(struct nfqnl_instance *inst, int lock) +{ + /* first pull it out of the global list */ + if (lock) + write_lock_bh(&instances_lock); + + QDEBUG("removing instance %p (queuenum=%u) from hash\n", + inst, inst->queue_num); + hlist_del(&inst->hlist); + + if (lock) + write_unlock_bh(&instances_lock); + + /* then flush all pending skbs from the queue */ + nfqnl_flush(inst, NF_DROP); + + /* and finally free the data structure */ + kfree(inst); + + module_put(THIS_MODULE); +} + +static inline void +__instance_destroy(struct nfqnl_instance *inst) +{ + _instance_destroy2(inst, 0); +} + +static inline void +instance_destroy(struct nfqnl_instance *inst) +{ + _instance_destroy2(inst, 1); +} + + + +static void +issue_verdict(struct nfqnl_queue_entry *entry, int verdict) +{ + QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); + + /* TCP input path (and probably other bits) assume to be called + * from softirq context, not from syscall, like issue_verdict is + * called. TCP input path deadlocks with locks taken from timer + * softirq, e.g. We therefore emulate this by local_bh_disable() */ + + local_bh_disable(); + nf_reinject(entry->skb, entry->info, verdict); + local_bh_enable(); + + kfree(entry); +} + +static inline void +__enqueue_entry(struct nfqnl_instance *queue, + struct nfqnl_queue_entry *entry) +{ + list_add(&entry->list, &queue->queue_list); + queue->queue_total++; +} + +/* + * Find and return a queued entry matched by cmpfn, or return the last + * entry if cmpfn is NULL. + */ +static inline struct nfqnl_queue_entry * +__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, + unsigned long data) +{ + struct list_head *p; + + list_for_each_prev(p, &queue->queue_list) { + struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p; + + if (!cmpfn || cmpfn(entry, data)) + return entry; + } + return NULL; +} + +static inline void +__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry) +{ + list_del(&entry->list); + q->queue_total--; +} + +static inline struct nfqnl_queue_entry * +__find_dequeue_entry(struct nfqnl_instance *queue, + nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nfqnl_queue_entry *entry; + + entry = __find_entry(queue, cmpfn, data); + if (entry == NULL) + return NULL; + + __dequeue_entry(queue, entry); + return entry; +} + + +static inline void +__nfqnl_flush(struct nfqnl_instance *queue, int verdict) +{ + struct nfqnl_queue_entry *entry; + + while ((entry = __find_dequeue_entry(queue, NULL, 0))) + issue_verdict(entry, verdict); +} + +static inline int +__nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status = 0; + + switch (mode) { + case NFQNL_COPY_NONE: + case NFQNL_COPY_META: + queue->copy_mode = mode; + queue->copy_range = 0; + break; + + case NFQNL_COPY_PACKET: + queue->copy_mode = mode; + /* we're using struct nfattr which has 16bit nfa_len */ + if (range > 0xffff) + queue->copy_range = 0xffff; + else + queue->copy_range = range; + break; + + default: + status = -EINVAL; + + } + return status; +} + +static struct nfqnl_queue_entry * +find_dequeue_entry(struct nfqnl_instance *queue, + nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nfqnl_queue_entry *entry; + + spin_lock_bh(&queue->lock); + entry = __find_dequeue_entry(queue, cmpfn, data); + spin_unlock_bh(&queue->lock); + + return entry; +} + +static void +nfqnl_flush(struct nfqnl_instance *queue, int verdict) +{ + spin_lock_bh(&queue->lock); + __nfqnl_flush(queue, verdict); + spin_unlock_bh(&queue->lock); +} + +static struct sk_buff * +nfqnl_build_packet_message(struct nfqnl_instance *queue, + struct nfqnl_queue_entry *entry, int *errp) +{ + unsigned char *old_tail; + size_t size; + size_t data_len = 0; + struct sk_buff *skb; + struct nfqnl_msg_packet_hdr pmsg; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int tmp_uint; + + QDEBUG("entered\n"); + + /* all macros expand to constant values at compile time */ + size = NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* mark */ + + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) + + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); + + spin_lock_bh(&queue->lock); + + switch (queue->copy_mode) { + case NFQNL_COPY_META: + case NFQNL_COPY_NONE: + data_len = 0; + break; + + case NFQNL_COPY_PACKET: + if (queue->copy_range == 0 + || queue->copy_range > entry->skb->len) + data_len = entry->skb->len; + else + data_len = queue->copy_range; + + size += NLMSG_SPACE(data_len); + break; + + default: + *errp = -EINVAL; + spin_unlock_bh(&queue->lock); + return NULL; + } + + spin_unlock_bh(&queue->lock); + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + goto nlmsg_failure; + + old_tail= skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, + NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, + sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + nfmsg->nfgen_family = entry->info->pf; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(queue->queue_num); + + pmsg.packet_id = htonl(entry->id); + pmsg.hw_protocol = htons(entry->skb->protocol); + pmsg.hook = entry->info->hook; + + NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + + if (entry->info->indev) { + tmp_uint = htonl(entry->info->indev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); + } + + if (entry->info->outdev) { + tmp_uint = htonl(entry->info->outdev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); + } + + if (entry->skb->nfmark) { + tmp_uint = htonl(entry->skb->nfmark); + NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); + } + + if (entry->info->indev && entry->skb->dev + && entry->skb->dev->hard_header_parse) { + struct nfqnl_msg_packet_hw phw; + + phw.hw_addrlen = + entry->skb->dev->hard_header_parse(entry->skb, + phw.hw_addr); + phw.hw_addrlen = htons(phw.hw_addrlen); + NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + } + + if (entry->skb->stamp.tv_sec) { + struct nfqnl_msg_packet_timestamp ts; + + ts.sec = htonll(entry->skb->stamp.tv_sec); + ts.usec = htonll(entry->skb->stamp.tv_usec); + + NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + } + + if (data_len) { + struct nfattr *nfa; + int size = NFA_LENGTH(data_len); + + if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { + printk(KERN_WARNING "nf_queue: no tailroom!\n"); + goto nlmsg_failure; + } + + nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); + nfa->nfa_type = NFQA_PAYLOAD; + nfa->nfa_len = size; + + if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len)) + BUG(); + } + + nlh->nlmsg_len = skb->tail - old_tail; + return skb; + +nlmsg_failure: +nfattr_failure: + if (skb) + kfree_skb(skb); + *errp = -EINVAL; + if (net_ratelimit()) + printk(KERN_ERR "nf_queue: error creating packet message\n"); + return NULL; +} + +static int +nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) +{ + int status = -EINVAL; + struct sk_buff *nskb; + struct nfqnl_instance *queue; + struct nfqnl_queue_entry *entry; + + QDEBUG("entered\n"); + + queue = instance_lookup(queuenum); + if (!queue) { + QDEBUG("no queue instance matching\n"); + return -EINVAL; + } + + if (queue->copy_mode == NFQNL_COPY_NONE) { + QDEBUG("mode COPY_NONE, aborting\n"); + return -EAGAIN; + } + + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) { + if (net_ratelimit()) + printk(KERN_ERR + "nf_queue: OOM in nfqnl_enqueue_packet()\n"); + return -ENOMEM; + } + + entry->info = info; + entry->skb = skb; + entry->id = atomic_inc_return(&queue->id_sequence); + + nskb = nfqnl_build_packet_message(queue, entry, &status); + if (nskb == NULL) + goto err_out_free; + + spin_lock_bh(&queue->lock); + + if (!queue->peer_pid) + goto err_out_free_nskb; + + if (queue->queue_total >= queue->queue_maxlen) { + queue->queue_dropped++; + status = -ENOSPC; + if (net_ratelimit()) + printk(KERN_WARNING "ip_queue: full at %d entries, " + "dropping packets(s). Dropped: %d\n", + queue->queue_total, queue->queue_dropped); + goto err_out_free_nskb; + } + + /* nfnetlink_unicast will either free the nskb or add it to a socket */ + status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + if (status < 0) { + queue->queue_user_dropped++; + goto err_out_unlock; + } + + __enqueue_entry(queue, entry); + + spin_unlock_bh(&queue->lock); + return status; + +err_out_free_nskb: + kfree_skb(nskb); + +err_out_unlock: + spin_unlock_bh(&queue->lock); + +err_out_free: + kfree(entry); + return status; +} + +static int +nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) +{ + int diff; + + diff = data_len - e->skb->len; + if (diff < 0) + skb_trim(e->skb, data_len); + else if (diff > 0) { + if (data_len > 0xFFFF) + return -EINVAL; + if (diff > skb_tailroom(e->skb)) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { + printk(KERN_WARNING "ip_queue: OOM " + "in mangle, dropping packet\n"); + return -ENOMEM; + } + if (e->skb->sk) + skb_set_owner_w(newskb, e->skb->sk); + kfree_skb(e->skb); + e->skb = newskb; + } + skb_put(e->skb, diff); + } + if (!skb_make_writable(&e->skb, data_len)) + return -ENOMEM; + memcpy(e->skb->data, data, data_len); + + return 0; +} + +static inline int +id_cmp(struct nfqnl_queue_entry *e, unsigned long id) +{ + return (id == e->id); +} + +static int +nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status; + + spin_lock_bh(&queue->lock); + status = __nfqnl_set_mode(queue, mode, range); + spin_unlock_bh(&queue->lock); + + return status; +} + +static int +dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) +{ + if (entry->info->indev) + if (entry->info->indev->ifindex == ifindex) + return 1; + + if (entry->info->outdev) + if (entry->info->outdev->ifindex == ifindex) + return 1; + + return 0; +} + +/* drop all packets with either indev or outdev == ifindex from all queue + * instances */ +static void +nfqnl_dev_drop(int ifindex) +{ + int i; + + QDEBUG("entering for ifindex %u\n", ifindex); + + /* this only looks like we have to hold the readlock for a way too long + * time, issue_verdict(), nf_reinject(), ... - but we always only + * issue NF_DROP, which is processed directly in nf_reinject() */ + read_lock_bh(&instances_lock); + + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry(inst, tmp, head, hlist) { + struct nfqnl_queue_entry *entry; + while ((entry = find_dequeue_entry(inst, dev_cmp, + ifindex)) != NULL) + issue_verdict(entry, NF_DROP); + } + } + + read_unlock_bh(&instances_lock); +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +static int +nfqnl_rcv_dev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + nfqnl_dev_drop(dev->ifindex); + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_dev_notifier = { + .notifier_call = nfqnl_rcv_dev_event, +}; + +static int +nfqnl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_NETFILTER && n->pid) { + int i; + + /* destroy all instances for this pid */ + write_lock_bh(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp, *t2; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + if (n->pid == inst->peer_pid) + __instance_destroy(inst); + } + } + write_unlock_bh(&instances_lock); + } + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_rtnl_notifier = { + .notifier_call = nfqnl_rcv_nl_event, +}; + +static int +nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + unsigned int verdict; + struct nfqnl_queue_entry *entry; + + queue = instance_lookup(queue_num); + if (!queue) + return -ENODEV; + + if (queue->peer_pid != NETLINK_CB(skb).pid) + return -EPERM; + + if (!nfqa[NFQA_VERDICT_HDR-1]) + return -EINVAL; + + vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); + verdict = ntohl(vhdr->verdict); + + if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + return -EINVAL; + + entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); + if (entry == NULL) + return -ENOENT; + + if (nfqa[NFQA_PAYLOAD-1]) { + if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), + NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) + verdict = NF_DROP; + } + + if (nfqa[NFQA_MARK-1]) + skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1])); + + issue_verdict(entry, verdict); + return 0; +} + +static int +nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + return -ENOTSUPP; +} + +static int +nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + struct nfqnl_instance *queue; + + QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); + + queue = instance_lookup(queue_num); + if (nfqa[NFQA_CFG_CMD-1]) { + struct nfqnl_msg_config_cmd *cmd; + cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); + QDEBUG("found CFG_CMD\n"); + + switch (cmd->command) { + case NFQNL_CFG_CMD_BIND: + if (queue) + return -EBUSY; + + queue = instance_create(queue_num, NETLINK_CB(skb).pid); + if (!queue) + return -EINVAL; + break; + case NFQNL_CFG_CMD_UNBIND: + if (!queue) + return -ENODEV; + + if (queue->peer_pid != NETLINK_CB(skb).pid) + return -EPERM; + + instance_destroy(queue); + break; + case NFQNL_CFG_CMD_PF_BIND: + QDEBUG("registering queue handler for pf=%u\n", + ntohs(cmd->pf)); + return nf_register_queue_handler(ntohs(cmd->pf), + nfqnl_enqueue_packet, + NULL); + + break; + case NFQNL_CFG_CMD_PF_UNBIND: + QDEBUG("unregistering queue handler for pf=%u\n", + ntohs(cmd->pf)); + /* This is a bug and a feature. We can unregister + * other handlers(!) */ + return nf_unregister_queue_handler(ntohs(cmd->pf)); + break; + default: + return -EINVAL; + } + } else { + if (!queue) { + QDEBUG("no config command, and no instance ENOENT\n"); + return -ENOENT; + } + + if (queue->peer_pid != NETLINK_CB(skb).pid) { + QDEBUG("no config command, and wrong pid\n"); + return -EPERM; + } + } + + if (nfqa[NFQA_CFG_PARAMS-1]) { + struct nfqnl_msg_config_params *params; + params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); + + nfqnl_set_mode(queue, params->copy_mode, + ntohl(params->copy_range)); + } + + return 0; +} + +static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { + [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, + .cap_required = CAP_NET_ADMIN }, + [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, + .cap_required = CAP_NET_ADMIN }, + [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem nfqnl_subsys = { + .name = "nf_queue", + .subsys_id = NFNL_SUBSYS_QUEUE, + .cb_count = NFQNL_MSG_MAX, + .attr_count = NFQA_MAX, + .cb = nfqnl_cb, +}; + +static int +init_or_cleanup(int init) +{ + int status = -ENOMEM; + + if (!init) + goto cleanup; + + netlink_register_notifier(&nfqnl_rtnl_notifier); + status = nfnetlink_subsys_register(&nfqnl_subsys); + if (status < 0) { + printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + + register_netdevice_notifier(&nfqnl_dev_notifier); + return status; + +cleanup: + nf_unregister_queue_handlers(nfqnl_enqueue_packet); + unregister_netdevice_notifier(&nfqnl_dev_notifier); + nfnetlink_subsys_unregister(&nfqnl_subsys); + +cleanup_netlink_notifier: + netlink_unregister_notifier(&nfqnl_rtnl_notifier); + return status; +} + +static int __init init(void) +{ + + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +MODULE_DESCRIPTION("netfilter packet queue handler"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); + +module_init(init); +module_exit(fini); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -13,7 +13,12 @@ * added netlink_proto_exit * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo * use nlk_sk, as sk->protinfo is on a diet 8) - * + * Fri Jul 22 19:51:12 MEST 2005 Harald Welte + * - inc module use count of module that owns + * the kernel socket in case userspace opens + * socket of same protocol + * - remove all module support, since netlink is + * mandatory if CONFIG_NET=y these days */ #include @@ -92,6 +97,7 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned int nl_nonroot; + struct proto_ops *p_ops; }; static struct netlink_table *nl_table; @@ -341,7 +347,21 @@ static int netlink_create(struct socket if (protocol<0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; - sock->ops = &netlink_ops; + netlink_table_grab(); + if (!nl_table[protocol].hash.entries) { + netlink_table_ungrab(); +#ifdef CONFIG_KMOD + /* We do 'best effort'. If we find a matching module, + * it is loaded. If not, we don't return an error to + * allow pure userspace<->userspace communication. -HW + */ + request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); + netlink_table_grab(); +#endif + } + netlink_table_ungrab(); + + sock->ops = nl_table[protocol].p_ops; sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) @@ -394,6 +414,22 @@ static int netlink_release(struct socket }; notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } + + /* When this is a kernel socket, we need to remove the owner pointer, + * since we don't know whether the module will be dying at any given + * point - HW + */ + if (!nlk->pid) { + struct proto_ops *p_tmp; + + netlink_table_grab(); + p_tmp = nl_table[sk->sk_protocol].p_ops; + if (p_tmp != &netlink_ops) { + nl_table[sk->sk_protocol].p_ops = &netlink_ops; + kfree(p_tmp); + } + netlink_table_ungrab(); + } sock_put(sk); return 0; @@ -1023,8 +1059,9 @@ static void netlink_data_ready(struct so */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) +netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module) { + struct proto_ops *p_ops; struct socket *sock; struct sock *sk; @@ -1034,22 +1071,63 @@ netlink_kernel_create(int unit, void (*i if (unit<0 || unit>=MAX_LINKS) return NULL; + /* Do a quick check, to make us not go down to netlink_insert() + * if protocol already has kernel socket. + */ + sk = netlink_lookup(unit, 0); + if (unlikely(sk)) { + sock_put(sk); + return NULL; + } + if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; + sk = NULL; + if (module) { + /* Every registering protocol implemented in a module needs + * it's own p_ops, since the socket code cannot deal with + * module refcounting otherwise. -HW + */ + p_ops = kmalloc(sizeof(*p_ops), GFP_KERNEL); + if (!p_ops) + goto out_sock_release; + + memcpy(p_ops, &netlink_ops, sizeof(*p_ops)); + p_ops->owner = module; + } else + p_ops = &netlink_ops; + + netlink_table_grab(); + nl_table[unit].p_ops = p_ops; + netlink_table_ungrab(); + if (netlink_create(sock, unit) < 0) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; } + sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) nlk_sk(sk)->data_ready = input; if (netlink_insert(sk, 0)) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; + } + + return sk; + +out_kfree_p_ops: + netlink_table_grab(); + if (nl_table[unit].p_ops != &netlink_ops) { + kfree(nl_table[unit].p_ops); + nl_table[unit].p_ops = &netlink_ops; } + netlink_table_ungrab(); +out_sock_release: + sock_release(sock); return sk; } @@ -1413,6 +1491,8 @@ enomem: for (i = 0; i < MAX_LINKS; i++) { struct nl_pid_hash *hash = &nl_table[i].hash; + nl_table[i].p_ops = &netlink_ops; + hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) @@ -1438,21 +1518,7 @@ out: return err; } -static void __exit netlink_proto_exit(void) -{ - sock_unregister(PF_NETLINK); - proc_net_remove("netlink"); - kfree(nl_table); - nl_table = NULL; - proto_unregister(&netlink_proto); -} - core_initcall(netlink_proto_init); -module_exit(netlink_proto_exit); - -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_NETPROTO(PF_NETLINK); EXPORT_SYMBOL(netlink_ack); EXPORT_SYMBOL(netlink_broadcast); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -64,7 +64,7 @@ int nr_rx_ip(struct sk_buff *skb, struct skb->nh.raw = skb->data; skb->pkt_type = PACKET_HOST; - ip_rcv(skb, skb->dev, NULL); + ip_rcv(skb, skb->dev, NULL, skb->dev); return 1; } diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -77,7 +77,7 @@ void nr_requeue_frames(struct sock *sk) if (skb_prev == NULL) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -241,7 +241,7 @@ static struct proto_ops packet_ops; #ifdef CONFIG_SOCK_PACKET static struct proto_ops packet_ops_spkt; -static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_pkt *spkt; @@ -441,7 +441,7 @@ static inline unsigned run_filter(struct we will not harm anyone. */ -static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_ll *sll; @@ -546,7 +546,7 @@ drop: } #ifdef CONFIG_PACKET_MMAP -static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct packet_sock *po; diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -74,7 +74,7 @@ void rose_requeue_frames(struct sock *sk if (skb_prev == NULL) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } diff --git a/net/sched/act_api.c b/net/sched/act_api.c --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, while ((a = act) != NULL) { repeat: if (a->ops && a->ops->act) { - ret = a->ops->act(&skb, a); + ret = a->ops->act(&skb, a, res); if (TC_MUNGED & skb->tc_verd) { /* copied already, allow trampling */ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -179,11 +179,6 @@ repeat: act = a->next; } exec_done: - if (skb->tc_classid > 0) { - res->classid = skb->tc_classid; - res->class = 0; - skb->tc_classid = 0; - } return ret; } diff --git a/net/sched/gact.c b/net/sched/gact.c --- a/net/sched/gact.c +++ b/net/sched/gact.c @@ -135,7 +135,7 @@ tcf_gact_cleanup(struct tc_action *a, in } static int -tcf_gact(struct sk_buff **pskb, struct tc_action *a) +tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_gact *p = PRIV(a, gact); struct sk_buff *skb = *pskb; diff --git a/net/sched/ipt.c b/net/sched/ipt.c --- a/net/sched/ipt.c +++ b/net/sched/ipt.c @@ -201,7 +201,7 @@ tcf_ipt_cleanup(struct tc_action *a, int } static int -tcf_ipt(struct sk_buff **pskb, struct tc_action *a) +tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *p = PRIV(a, ipt); diff --git a/net/sched/mirred.c b/net/sched/mirred.c --- a/net/sched/mirred.c +++ b/net/sched/mirred.c @@ -158,7 +158,7 @@ tcf_mirred_cleanup(struct tc_action *a, } static int -tcf_mirred(struct sk_buff **pskb, struct tc_action *a) +tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *p = PRIV(a, mirred); struct net_device *dev; diff --git a/net/sched/pedit.c b/net/sched/pedit.c --- a/net/sched/pedit.c +++ b/net/sched/pedit.c @@ -130,7 +130,7 @@ tcf_pedit_cleanup(struct tc_action *a, i } static int -tcf_pedit(struct sk_buff **pskb, struct tc_action *a) +tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = PRIV(a, pedit); struct sk_buff *skb = *pskb; diff --git a/net/sched/police.c b/net/sched/police.c --- a/net/sched/police.c +++ b/net/sched/police.c @@ -284,7 +284,8 @@ static int tcf_act_police_cleanup(struct return 0; } -static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a) +static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a, + struct tcf_result *res) { psched_time_t now; struct sk_buff *skb = *pskb; diff --git a/net/sched/simple.c b/net/sched/simple.c --- a/net/sched/simple.c +++ b/net/sched/simple.c @@ -44,7 +44,7 @@ static DEFINE_RWLOCK(simp_lock); #include #include -static int tcf_simp(struct sk_buff **pskb, struct tc_action *a) +static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct sk_buff *skb = *pskb; struct tcf_defact *p = PRIV(a, defact); diff --git a/net/sctp/socket.c b/net/sctp/socket.c --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4892,7 +4892,7 @@ static void sctp_sock_migrate(struct soc sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &oldsk->sk_receive_queue); __skb_queue_tail(&newsk->sk_receive_queue, skb); } } @@ -4921,7 +4921,7 @@ static void sctp_sock_migrate(struct soc sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &oldsp->pd_lobby); __skb_queue_tail(queue, skb); } } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -50,9 +50,9 @@ /* Forward declarations for internal helpers. */ static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq, - struct sctp_ulpevent *); + struct sctp_ulpevent *); static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, - struct sctp_ulpevent *); + struct sctp_ulpevent *); /* 1st Level Abstractions */ @@ -125,7 +125,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq event = sctp_ulpq_order(ulpq, event); } - /* Send event to the ULP. */ + /* Send event to the ULP. 'event' is the sctp_ulpevent for + * very first SKB on the 'temp' list. + */ if (event) sctp_ulpq_tail_event(ulpq, event); @@ -158,14 +160,18 @@ static int sctp_ulpq_clear_pd(struct sct return sctp_clear_pd(ulpq->asoc->base.sk); } - - +/* If the SKB of 'event' is on a list, it is the first such member + * of that list. + */ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sock *sk = ulpq->asoc->base.sk; - struct sk_buff_head *queue; + struct sk_buff_head *queue, *skb_list; + struct sk_buff *skb = sctp_event2skb(event); int clear_pd = 0; + skb_list = (struct sk_buff_head *) skb->prev; + /* If the socket is just going to throw this away, do not * even try to deliver it. */ @@ -197,10 +203,10 @@ int sctp_ulpq_tail_event(struct sctp_ulp /* If we are harvesting multiple skbs they will be * collected on a list. */ - if (sctp_event2skb(event)->list) - sctp_skb_list_tail(sctp_event2skb(event)->list, queue); + if (skb_list) + sctp_skb_list_tail(skb_list, queue); else - __skb_queue_tail(queue, sctp_event2skb(event)); + __skb_queue_tail(queue, skb); /* Did we just complete partial delivery and need to get * rolling again? Move pending data to the receive @@ -214,10 +220,11 @@ int sctp_ulpq_tail_event(struct sctp_ulp return 1; out_free: - if (sctp_event2skb(event)->list) - sctp_queue_purge_ulpevents(sctp_event2skb(event)->list); + if (skb_list) + sctp_queue_purge_ulpevents(skb_list); else sctp_ulpevent_free(event); + return 0; } @@ -269,7 +276,7 @@ static inline void sctp_ulpq_store_reasm * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ -static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag) +static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) { struct sk_buff *pos; struct sctp_ulpevent *event; @@ -294,7 +301,7 @@ static struct sctp_ulpevent *sctp_make_r skb_shinfo(f_frag)->frag_list = pos; /* Remove the first fragment from the reassembly queue. */ - __skb_unlink(f_frag, f_frag->list); + __skb_unlink(f_frag, queue); while (pos) { pnext = pos->next; @@ -304,7 +311,7 @@ static struct sctp_ulpevent *sctp_make_r f_frag->data_len += pos->len; /* Remove the fragment from the reassembly queue. */ - __skb_unlink(pos, pos->list); + __skb_unlink(pos, queue); /* Break if we have reached the last fragment. */ if (pos == l_frag) @@ -375,7 +382,7 @@ static inline struct sctp_ulpevent *sctp done: return retval; found: - retval = sctp_make_reassembled_event(first_frag, pos); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; @@ -435,7 +442,7 @@ static inline struct sctp_ulpevent *sctp * further. */ done: - retval = sctp_make_reassembled_event(first_frag, last_frag); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; @@ -527,7 +534,7 @@ static inline struct sctp_ulpevent *sctp * further. */ done: - retval = sctp_make_reassembled_event(first_frag, last_frag); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); return retval; } @@ -537,6 +544,7 @@ done: static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { + struct sk_buff_head *event_list; struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_stream *in; @@ -547,6 +555,8 @@ static inline void sctp_ulpq_retrieve_or ssn = event->ssn; in = &ulpq->asoc->ssnmap->in; + event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; + /* We are holding the chunks by stream, by SSN. */ sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; @@ -567,10 +577,10 @@ static inline void sctp_ulpq_retrieve_or /* Found it, so mark in the ssnmap. */ sctp_ssn_next(in, sid); - __skb_unlink(pos, pos->list); + __skb_unlink(pos, &ulpq->lobby); /* Attach all gathered skbs to the event. */ - __skb_queue_tail(sctp_event2skb(event)->list, pos); + __skb_queue_tail(event_list, pos); } } @@ -626,7 +636,7 @@ static inline void sctp_ulpq_store_order } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, - struct sctp_ulpevent *event) + struct sctp_ulpevent *event) { __u16 sid, ssn; struct sctp_stream *in; @@ -667,7 +677,7 @@ static inline void sctp_ulpq_reap_ordere { struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; - struct sctp_ulpevent *event = NULL; + struct sctp_ulpevent *event; struct sctp_stream *in; struct sk_buff_head temp; __u16 csid, cssn; @@ -675,6 +685,8 @@ static inline void sctp_ulpq_reap_ordere in = &ulpq->asoc->ssnmap->in; /* We are holding the chunks by stream, by SSN. */ + skb_queue_head_init(&temp); + event = NULL; sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; @@ -686,19 +698,20 @@ static inline void sctp_ulpq_reap_ordere /* Found it, so mark in the ssnmap. */ sctp_ssn_next(in, csid); - __skb_unlink(pos, pos->list); + __skb_unlink(pos, &ulpq->lobby); if (!event) { /* Create a temporary list to collect chunks on. */ event = sctp_skb2event(pos); - skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); } else { /* Attach all gathered skbs to the event. */ - __skb_queue_tail(sctp_event2skb(event)->list, pos); + __skb_queue_tail(&temp, pos); } } - /* Send event to the ULP. */ + /* Send event to the ULP. 'event' is the sctp_ulpevent for + * very first SKB on the 'temp' list. + */ if (event) sctp_ulpq_tail_event(ulpq, event); } diff --git a/net/unix/garbage.c b/net/unix/garbage.c --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -286,16 +286,16 @@ void unix_gc(void) skb = skb_peek(&s->sk_receive_queue); while (skb && skb != (struct sk_buff *)&s->sk_receive_queue) { - nextsk=skb->next; + nextsk = skb->next; /* * Do we have file descriptors ? */ - if(UNIXCB(skb).fp) - { - __skb_unlink(skb, skb->list); - __skb_queue_tail(&hitlist,skb); + if (UNIXCB(skb).fp) { + __skb_unlink(skb, + &s->sk_receive_queue); + __skb_queue_tail(&hitlist, skb); } - skb=nextsk; + skb = nextsk; } spin_unlock(&s->sk_receive_queue.lock); } diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -81,7 +81,7 @@ static int x25_receive_data(struct sk_bu } int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { struct sk_buff *nskb; struct x25_neigh *nb; diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -80,7 +80,7 @@ void x25_requeue_frames(struct sock *sk) if (!skb_prev) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1516,7 +1516,8 @@ static int __init xfrm_user_init(void) { printk(KERN_INFO "Initializing IPsec netlink socket\n"); - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv, + THIS_MODULE); if (xfrm_nl == NULL) return -ENOMEM; @@ -1534,3 +1535,4 @@ static void __exit xfrm_user_exit(void) module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -103,7 +103,7 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, NULL); + selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV);