aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack.h11
-rw-r--r--include/linux/netfilter_ipv4/ip_nat.h26
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_core.h17
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_protocol.h6
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c438
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c50
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c23
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c24
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c25
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c7
12 files changed, 218 insertions, 419 deletions
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index be86d7d28a635..05c771d1cf3e6 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -40,6 +40,17 @@ enum ip_conntrack_status {
/* Connection is confirmed: originating packet has left box */
IPS_CONFIRMED_BIT = 3,
IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
+
+ /* Connection needs src nat in orig dir. This bit never changed. */
+ IPS_SRC_NAT_BIT = 4,
+ IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
+
+ /* Connection needs dst nat in orig dir. This bit never changed. */
+ IPS_DST_NAT_BIT = 5,
+ IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
+
+ /* Both together. */
+ IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
};
#ifdef __KERNEL__
diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index c4366280256ae..5018bcfaac54e 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -48,42 +48,16 @@ struct ip_nat_multi_range_compat
struct ip_nat_range range[1];
};
-/* Worst case: local-out manip + 1 post-routing, and reverse dirn. */
-#define IP_NAT_MAX_MANIPS (2*2)
-
-struct ip_nat_info_manip
-{
- /* The direction. */
- u_int8_t direction;
-
- /* Which hook the manipulation happens on. */
- u_int8_t hooknum;
-
- /* The manipulation type. */
- u_int8_t maniptype;
-
- /* Manipulations to occur at each conntrack in this dirn. */
- struct ip_conntrack_manip manip;
-};
-
#ifdef __KERNEL__
#include <linux/list.h>
#include <linux/netfilter_ipv4/lockhelp.h>
-/* Protects NAT hash tables, and NAT-private part of conntracks. */
-DECLARE_RWLOCK_EXTERN(ip_nat_lock);
-
/* The structure embedded in the conntrack structure. */
struct ip_nat_info
{
/* Set to zero when conntrack created: bitmask of maniptypes */
u_int16_t initialized;
- u_int16_t num_manips;
-
- /* Manipulations to be done on this conntrack. */
- struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS];
-
struct list_head bysource;
/* Helper (NULL if none). */
diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h
index 0ae9a21d9746e..3b50eb91f007c 100644
--- a/include/linux/netfilter_ipv4/ip_nat_core.h
+++ b/include/linux/netfilter_ipv4/ip_nat_core.h
@@ -8,16 +8,13 @@
extern int ip_nat_init(void);
extern void ip_nat_cleanup(void);
-extern unsigned int do_bindings(struct ip_conntrack *ct,
- enum ip_conntrack_info conntrackinfo,
- struct ip_nat_info *info,
- unsigned int hooknum,
- struct sk_buff **pskb);
+extern unsigned int nat_packet(struct ip_conntrack *ct,
+ enum ip_conntrack_info conntrackinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb);
extern int icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *conntrack,
- unsigned int hooknum,
- int dir);
-
-
+ struct ip_conntrack *ct,
+ enum ip_nat_manip_type manip,
+ enum ip_conntrack_dir dir);
#endif /* _IP_NAT_CORE_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h
index f343239cd4ea0..129708c22386f 100644
--- a/include/linux/netfilter_ipv4/ip_nat_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h
@@ -15,11 +15,11 @@ struct ip_nat_protocol
/* Protocol number. */
unsigned int protonum;
- /* Do a packet translation according to the ip_nat_proto_manip
- * and manip type. Return true if succeeded. */
+ /* Translate a packet to the target according to manip type.
+ Return true if succeeded. */
int (*manip_pkt)(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype);
/* Is the manipable part of the tuple between min and max incl? */
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index d7f0369e2dbbc..dda60fd643d8d 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -42,7 +42,6 @@
#endif
DECLARE_RWLOCK(ip_nat_lock);
-DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
/* Calculated at init based on memory size */
static unsigned int ip_nat_htable_size;
@@ -52,26 +51,22 @@ struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
/* We keep an extra hash for each conntrack, for fast searching. */
-static inline size_t
-hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto)
+static inline unsigned int
+hash_by_src(const struct ip_conntrack_tuple *tuple)
{
/* Original src, to ensure we map it consistently if poss. */
- return (manip->ip + manip->u.all + proto) % ip_nat_htable_size;
+ return jhash_3words(tuple->src.ip, tuple->src.u.all,
+ tuple->dst.protonum, 0) % ip_nat_htable_size;
}
/* Noone using conntrack by the time this called. */
static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
{
struct ip_nat_info *info = &conn->nat.info;
- unsigned int hs;
if (!info->initialized)
return;
- hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src,
- conn->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum);
-
WRITE_LOCK(&ip_nat_lock);
list_del(&info->bysource);
WRITE_UNLOCK(&ip_nat_lock);
@@ -104,25 +99,6 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
}
-/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(u32 dstip, u32 srcip)
-{
- static int warned = 0;
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
- struct rtable *rt;
-
- if (ip_route_output_key(&rt, &fl) != 0)
- return;
-
- if (rt->rt_src != srcip && !warned) {
- printk("NAT: no longer support implicit source local NAT\n");
- printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
- NIPQUAD(srcip), NIPQUAD(dstip));
- warned = 1;
- }
- ip_rt_put(rt);
-}
-
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range. */
static int
@@ -165,11 +141,10 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_tuple *result,
const struct ip_nat_range *range)
{
- unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum);
+ unsigned int h = hash_by_src(tuple);
struct ip_conntrack *ct;
- MUST_BE_READ_LOCKED(&ip_nat_lock);
-
+ READ_LOCK(&ip_nat_lock);
list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
if (same_src(ct, tuple)) {
/* Copy source part from reply tuple. */
@@ -177,10 +152,13 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
- if (in_range(result, range))
+ if (in_range(result, range)) {
+ READ_UNLOCK(&ip_nat_lock);
return 1;
+ }
}
}
+ READ_UNLOCK(&ip_nat_lock);
return 0;
}
@@ -194,7 +172,7 @@ static void
find_best_ips_proto(struct ip_conntrack_tuple *tuple,
const struct ip_nat_range *range,
const struct ip_conntrack *conntrack,
- unsigned int hooknum)
+ enum ip_nat_manip_type maniptype)
{
u_int32_t *var_ipp;
/* Host order */
@@ -204,7 +182,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
return;
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
+ if (maniptype == IP_NAT_MANIP_SRC)
var_ipp = &tuple->src.ip;
else
var_ipp = &tuple->dst.ip;
@@ -219,7 +197,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
* spread in practice (if there are a small number of IPs
* involved, there usually aren't that many connections
* anyway). The consistency means that servers see the same
- * client coming from the same IP (some Internet Backing sites
+ * client coming from the same IP (some Internet Banking sites
* like this), even across reboots. */
minip = ntohl(range->min_ip);
maxip = ntohl(range->max_ip);
@@ -238,7 +216,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_tuple *orig_tuple,
const struct ip_nat_range *range,
struct ip_conntrack *conntrack,
- unsigned int hooknum)
+ enum ip_nat_manip_type maniptype)
{
struct ip_nat_protocol *proto
= ip_nat_find_proto(orig_tuple->dst.protonum);
@@ -250,7 +228,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
This is only required for source (ie. NAT/masq) mappings.
So far, we don't do local source mappings, so multiple
manips not an issue. */
- if (hooknum == NF_IP_POST_ROUTING) {
+ if (maniptype == IP_NAT_MANIP_SRC) {
if (find_appropriate_src(orig_tuple, tuple, range)) {
DEBUGP("get_unique_tuple: Found current src map\n");
if (!ip_nat_used_tuple(tuple, conntrack))
@@ -261,56 +239,19 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
/* 2) Select the least-used IP/proto combination in the given
range. */
*tuple = *orig_tuple;
- find_best_ips_proto(tuple, range, conntrack, hooknum);
-
- if (hooknum == NF_IP_LOCAL_OUT && tuple->dst.ip != orig_tuple->dst.ip)
- warn_if_extra_mangle(tuple->src.ip, tuple->dst.ip);
+ find_best_ips_proto(tuple, range, conntrack, maniptype);
/* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */
/* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
- || proto->in_range(tuple, HOOK2MANIP(hooknum),
- &range->min, &range->max))
+ || proto->in_range(tuple, maniptype, &range->min, &range->max))
&& !ip_nat_used_tuple(tuple, conntrack))
return;
/* Last change: get protocol to try to obtain unique tuple. */
- proto->unique_tuple(tuple, range, HOOK2MANIP(hooknum), conntrack);
-}
-
-/* Where to manip the reply packets (will be reverse manip). */
-static unsigned int opposite_hook[NF_IP_NUMHOOKS]
-= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
- [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING,
- [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN,
- [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT,
-};
-
-static void replace_in_hashes(struct ip_conntrack *conntrack,
- struct ip_nat_info *info)
-{
- /* Source has changed, so replace in hashes. */
- unsigned int srchash
- = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.src,
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum);
- MUST_BE_WRITE_LOCKED(&ip_nat_lock);
- list_move(&info->bysource, &bysource[srchash]);
-}
-
-static void place_in_hashes(struct ip_conntrack *conntrack,
- struct ip_nat_info *info)
-{
- unsigned int srchash
- = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.src,
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum);
- MUST_BE_WRITE_LOCKED(&ip_nat_lock);
- list_add(&info->bysource, &bysource[srchash]);
+ proto->unique_tuple(tuple, range, maniptype, conntrack);
}
unsigned int
@@ -318,121 +259,53 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
const struct ip_nat_range *range,
unsigned int hooknum)
{
- struct ip_conntrack_tuple new_tuple, inv_tuple, reply;
- struct ip_conntrack_tuple orig_tp;
+ struct ip_conntrack_tuple curr_tuple, new_tuple;
struct ip_nat_info *info = &conntrack->nat.info;
- int in_hashes = info->initialized;
+ int have_to_hash = !info->initialized;
+ enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
- MUST_BE_WRITE_LOCKED(&ip_nat_lock);
IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_IN
|| hooknum == NF_IP_LOCAL_OUT);
- IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
- IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
+ IP_NF_ASSERT(!(info->initialized & (1 << maniptype)));
/* What we've got will look like inverse of reply. Normally
this is what is in the conntrack, except for prior
manipulations (future optimization: if num_manips == 0,
orig_tp =
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
- invert_tuplepr(&orig_tp,
+ invert_tuplepr(&curr_tuple,
&conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
-#if 0
- {
- unsigned int i;
-
- DEBUGP("Hook %u (%s), ", hooknum,
- HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST");
- DUMP_TUPLE(&orig_tp);
- DEBUGP("Range %p: ", mr);
- for (i = 0; i < mr->rangesize; i++) {
- DEBUGP("%u:%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n",
- i,
- (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS)
- ? " MAP_IPS" : "",
- (mr->range[i].flags
- & IP_NAT_RANGE_PROTO_SPECIFIED)
- ? " PROTO_SPECIFIED" : "",
- NIPQUAD(mr->range[i].min_ip),
- NIPQUAD(mr->range[i].max_ip),
- mr->range[i].min.all,
- mr->range[i].max.all);
- }
- }
-#endif
-
- get_unique_tuple(&new_tuple, &orig_tp, range, conntrack, hooknum);
-
- /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT):
- the original (A/B/C/D') and the mangled one (E/F/G/H').
-
- We're only allowed to work with the SRC per-proto
- part, so we create inverses of both to start, then
- derive the other fields we need. */
+ get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
- /* Reply connection: simply invert the new tuple
- (G/H/E/F') */
- invert_tuplepr(&reply, &new_tuple);
+ if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+ struct ip_conntrack_tuple reply;
- /* Alter conntrack table so will recognize replies. */
- ip_conntrack_alter_reply(conntrack, &reply);
+ /* Alter conntrack table so will recognize replies. */
+ invert_tuplepr(&reply, &new_tuple);
+ ip_conntrack_alter_reply(conntrack, &reply);
- /* FIXME: We can simply used existing conntrack reply tuple
- here --RR */
- /* Create inverse of original: C/D/A/B' */
- invert_tuplepr(&inv_tuple, &orig_tp);
-
- /* Has source changed?. */
- if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {
- IP_NF_ASSERT(HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC);
- IP_NF_ASSERT(ip_ct_tuple_dst_equal(&new_tuple, &orig_tp));
-
- /* In this direction, a source manip. */
- info->manips[info->num_manips++] =
- ((struct ip_nat_info_manip)
- { IP_CT_DIR_ORIGINAL, hooknum,
- IP_NAT_MANIP_SRC, new_tuple.src });
-
- IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
-
- /* In the reverse direction, a destination manip. */
- info->manips[info->num_manips++] =
- ((struct ip_nat_info_manip)
- { IP_CT_DIR_REPLY, opposite_hook[hooknum],
- IP_NAT_MANIP_DST, orig_tp.src });
- IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
+ /* Non-atomic: we own this at the moment. */
+ if (maniptype == IP_NAT_MANIP_SRC)
+ conntrack->status |= IPS_SRC_NAT;
+ else
+ conntrack->status |= IPS_DST_NAT;
}
- /* Has destination changed? */
- if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {
- IP_NF_ASSERT(HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST);
-
- /* In this direction, a destination manip */
- info->manips[info->num_manips++] =
- ((struct ip_nat_info_manip)
- { IP_CT_DIR_ORIGINAL, hooknum,
- IP_NAT_MANIP_DST, reply.src });
-
- IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
-
- /* In the reverse direction, a source manip. */
- info->manips[info->num_manips++] =
- ((struct ip_nat_info_manip)
- { IP_CT_DIR_REPLY, opposite_hook[hooknum],
- IP_NAT_MANIP_SRC, inv_tuple.src });
- IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
+ /* Place in source hash if this is the first time. */
+ if (have_to_hash) {
+ unsigned int srchash
+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple);
+ WRITE_LOCK(&ip_nat_lock);
+ list_add(&info->bysource, &bysource[srchash]);
+ WRITE_UNLOCK(&ip_nat_lock);
}
/* It's done. */
- info->initialized |= (1 << HOOK2MANIP(hooknum));
-
- if (in_hashes)
- replace_in_hashes(conntrack, info);
- else
- place_in_hashes(conntrack, info);
-
+ info->initialized |= (1 << maniptype);
return NF_ACCEPT;
}
@@ -441,121 +314,95 @@ static int
manip_pkt(u_int16_t proto,
struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *target,
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph;
(*pskb)->nfcache |= NFC_ALTERED;
- if (!skb_ip_make_writable(pskb, iphdroff+sizeof(*iph)))
+ if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
return 0;
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff,
- manip, maniptype))
+ target, maniptype))
return 0;
iph = (void *)(*pskb)->data + iphdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
- iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip,
+ iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
iph->check);
- iph->saddr = manip->ip;
+ iph->saddr = target->src.ip;
} else {
- iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip,
+ iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
iph->check);
- iph->daddr = manip->ip;
+ iph->daddr = target->dst.ip;
}
return 1;
}
-/* Do packet manipulations according to binding. */
-unsigned int
-do_bindings(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct ip_nat_info *info,
- unsigned int hooknum,
- struct sk_buff **pskb)
+/* Do packet manipulations according to ip_nat_setup_info. */
+unsigned int nat_packet(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
- int i, ret = NF_ACCEPT;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- int proto = (*pskb)->nh.iph->protocol;
-
- /* Need nat lock to protect against modification, but neither
- conntrack (referenced) and helper (deleted with
- synchronize_bh()) can vanish. */
- READ_LOCK(&ip_nat_lock);
- for (i = 0; i < info->num_manips; i++) {
- if (info->manips[i].direction == dir
- && info->manips[i].hooknum == hooknum) {
- DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
- *pskb,
- info->manips[i].maniptype == IP_NAT_MANIP_SRC
- ? "SRC" : "DST",
- NIPQUAD(info->manips[i].manip.ip),
- htons(info->manips[i].manip.u.all));
- if (!manip_pkt(proto, pskb, 0,
- &info->manips[i].manip,
- info->manips[i].maniptype)) {
- READ_UNLOCK(&ip_nat_lock);
- return NF_DROP;
- }
- }
- }
- READ_UNLOCK(&ip_nat_lock);
+ unsigned long statusbit;
+ enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
- /* FIXME: NAT/conntrack helpers should set ctinfo &
- * CT_INFO_RESYNC on packets, so we don't have to adjust all
- * connections with conntrack helpers --RR */
+ /* FIXME: use a bit in status for this. */
if (ct->helper
- && proto == IPPROTO_TCP
+ && ct->tuplehash[0].tuple.dst.protonum == IPPROTO_TCP
&& (hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_IN)) {
DEBUGP("ip_nat_core: adjusting sequence number\n");
/* future: put this in a l4-proto specific function,
* and call this function here. */
if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
- ret = NF_DROP;
+ return NF_DROP;
}
- return ret;
-}
+ if (mtype == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
-static inline int tuple_src_equal_dst(const struct ip_conntrack_tuple *t1,
- const struct ip_conntrack_tuple *t2)
-{
- if (t1->dst.protonum != t2->dst.protonum || t1->src.ip != t2->dst.ip)
- return 0;
- if (t1->dst.protonum != IPPROTO_ICMP)
- return t1->src.u.all == t2->dst.u.all;
- else {
- struct ip_conntrack_tuple inv;
-
- /* ICMP tuples are asymetric */
- invert_tuplepr(&inv, t1);
- return inv.src.u.all == t2->src.u.all &&
- inv.dst.u.all == t2->dst.u.all;
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ /* Non-atomic: these bits don't change. */
+ if (ct->status & statusbit) {
+ struct ip_conntrack_tuple target;
+
+ /* We are aiming to look like inverse of other direction. */
+ invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+
+ if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+ return NF_DROP;
}
+ return NF_ACCEPT;
}
-int
-icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *conntrack,
- unsigned int hooknum,
- int dir)
+/* Dir is direction ICMP is coming from (opposite to packet it contains) */
+int icmp_reply_translation(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_nat_manip_type manip,
+ enum ip_conntrack_dir dir)
{
struct {
struct icmphdr icmp;
struct iphdr ip;
} *inside;
- unsigned int i;
- struct ip_nat_info *info = &conntrack->nat.info;
- struct ip_conntrack_tuple *cttuple, innertuple;
- int hdrlen;
+ struct ip_conntrack_tuple inner, target;
+ int hdrlen = (*pskb)->nh.iph->ihl * 4;
- if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside)))
+ if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside)))
return 0;
+
inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
/* We're actually going to mangle it beyond trivial checksum
@@ -576,92 +423,51 @@ icmp_reply_translation(struct sk_buff **pskb,
confused... --RR */
if (inside->icmp.type == ICMP_REDIRECT) {
/* Don't care about races here. */
- if (info->initialized
+ if (ct->nat.info.initialized
!= ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST))
- || info->num_manips != 0)
+ || (ct->status & IPS_NAT_MASK))
return 0;
}
- DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n",
- *pskb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
- /* Note: May not be from a NAT'd host, but probably safest to
- do translation always as if it came from the host itself
- (even though a "host unreachable" coming from the host
- itself is a bit weird).
-
- More explanation: some people use NAT for anonymizing.
- Also, CERT recommends dropping all packets from private IP
- addresses (although ICMP errors from internal links with
- such addresses are not too uncommon, as Alan Cox points
- out) */
+ DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
+ *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4,
- &innertuple,
- ip_ct_find_proto(inside->ip.protocol)))
+ &inner, ip_ct_find_proto(inside->ip.protocol)))
return 0;
- cttuple = &conntrack->tuplehash[dir].tuple;
- READ_LOCK(&ip_nat_lock);
- for (i = 0; i < info->num_manips; i++) {
- DEBUGP("icmp_reply: manip %u dir %s hook %u\n",
- i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ?
- "ORIG" : "REPLY", info->manips[i].hooknum);
-
- if (info->manips[i].direction != dir)
- continue;
-
- /* Mapping the inner packet is just like a normal packet, except
- * it was never src/dst reversed, so where we would normally
- * apply a dst manip, we apply a src, and vice versa. */
-
- /* Only true for forwarded packets, locally generated packets
- * never hit PRE_ROUTING, we need to apply their PRE_ROUTING
- * manips in LOCAL_OUT. */
- if (hooknum == NF_IP_LOCAL_OUT &&
- info->manips[i].hooknum == NF_IP_PRE_ROUTING)
- hooknum = info->manips[i].hooknum;
-
- if (info->manips[i].hooknum != hooknum)
- continue;
-
- /* ICMP errors may be generated locally for packets that
- * don't have all NAT manips applied yet. Verify manips
- * have been applied before reversing them */
- if (info->manips[i].maniptype == IP_NAT_MANIP_SRC) {
- if (!tuple_src_equal_dst(cttuple, &innertuple))
- continue;
- } else {
- if (!tuple_src_equal_dst(&innertuple, cttuple))
- continue;
- }
+ /* Change inner back to look like incoming packet. We do the
+ opposite manip on this hook to normal, because it might not
+ pass all hooks (locally-generated ICMP). Consider incoming
+ packet: PREROUTING (DST manip), routing produces ICMP, goes
+ through POSTROUTING (which must correct the DST manip). */
+ if (!manip_pkt(inside->ip.protocol, pskb,
+ (*pskb)->nh.iph->ihl*4
+ + sizeof(inside->icmp),
+ &ct->tuplehash[!dir].tuple,
+ !manip))
+ return 0;
- DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n",
- info->manips[i].maniptype == IP_NAT_MANIP_SRC
- ? "DST" : "SRC", NIPQUAD(info->manips[i].manip.ip),
- ntohs(info->manips[i].manip.u.udp.port));
- if (!manip_pkt(inside->ip.protocol, pskb,
- (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp),
- &info->manips[i].manip,
- !info->manips[i].maniptype))
- goto unlock_fail;
-
- /* Outer packet needs to have IP header NATed like
- it's a reply. */
-
- /* Use mapping to map outer packet: 0 give no
- per-proto mapping */
- DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n",
- info->manips[i].maniptype == IP_NAT_MANIP_SRC
- ? "SRC" : "DST", NIPQUAD(info->manips[i].manip.ip));
- if (!manip_pkt(0, pskb, 0, &info->manips[i].manip,
- info->manips[i].maniptype))
- goto unlock_fail;
- }
- READ_UNLOCK(&ip_nat_lock);
+ /* Change outer to look the reply to an incoming packet
+ * (proto 0 means don't invert per-proto part). */
- hdrlen = (*pskb)->nh.iph->ihl * 4;
+ /* Obviously, we need to NAT destination IP, but source IP
+ should be NAT'ed only if it is from a NAT'd host.
+ Explanation: some people use NAT for anonymizing. Also,
+ CERT recommends dropping all packets from private IP
+ addresses (although ICMP errors from internal links with
+ such addresses are not too uncommon, as Alan Cox points
+ out) */
+ if (manip != IP_NAT_MANIP_SRC
+ || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) {
+ invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ if (!manip_pkt(0, pskb, 0, &target, manip))
+ return 0;
+ }
+
+ /* Reloading "inside" here since manip_pkt inner. */
inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
inside->icmp.checksum = 0;
@@ -669,10 +475,6 @@ icmp_reply_translation(struct sk_buff **pskb,
(*pskb)->len - hdrlen,
0));
return 1;
-
- unlock_fail:
- READ_UNLOCK(&ip_nat_lock);
- return 0;
}
int __init ip_nat_init(void)
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 9bca2faa52bc6..b7a5179d266a6 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -405,46 +405,28 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
return 1;
}
-/* We look at the master's nat fields without ip_nat_lock. This works
- because the master's NAT must be fully initialized, because we
- don't match expectations set up by unconfirmed connections. We
- can't grab the lock because we hold the ip_conntrack_lock, and that
- would be backwards from other locking orders. */
-static void ip_nat_copy_manip(struct ip_nat_info *master,
- struct ip_conntrack_expect *exp,
- struct ip_conntrack *ct)
-{
- struct ip_nat_range range;
- unsigned int i;
-
- range.flags = IP_NAT_RANGE_MAP_IPS;
-
- /* Find what master is mapped to (if any), so we can do the same. */
- for (i = 0; i < master->num_manips; i++) {
- if (master->manips[i].direction != exp->dir)
- continue;
-
- range.min_ip = range.max_ip = master->manips[i].manip.ip;
-
- /* If this is a DST manip, map port here to where it's
- * expected. */
- if (master->manips[i].maniptype == IP_NAT_MANIP_DST) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
- }
- ip_nat_setup_info(ct, &range, master->manips[i].hooknum);
- }
-}
-
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
void ip_nat_follow_master(struct ip_conntrack *ct,
- struct ip_conntrack_expect *this)
+ struct ip_conntrack_expect *exp)
{
- struct ip_nat_info *master = &ct->master->nat.info;
+ struct ip_nat_range range;
/* This must be a fresh one. */
BUG_ON(ct->nat.info.initialized);
- ip_nat_copy_manip(master, this, ct);
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
+ /* hook doesn't matter, but it has to do source manip */
+ ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.src.ip;
+ /* hook doesn't matter, but it has to do destination manip */
+ ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 7cbe08819b0e2..a558cf0eee8a4 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -54,7 +54,7 @@ icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
static int
icmp_manip_pkt(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
@@ -64,12 +64,12 @@ icmp_manip_pkt(struct sk_buff **pskb,
if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
return 0;
- hdr = (void *)(*pskb)->data + hdroff;
+ hdr = (struct icmphdr *)((*pskb)->data + hdroff);
hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
- manip->u.icmp.id,
+ tuple->src.u.icmp.id,
hdr->checksum);
- hdr->un.echo.id = manip->u.icmp.id;
+ hdr->un.echo.id = tuple->src.u.icmp.id;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index fb21a0875fa44..694838c0acd0d 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -85,14 +85,14 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
static int
tcp_manip_pkt(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
struct tcphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
- u32 oldip, oldsrc = iph->saddr, olddst = iph->daddr;
- u16 *portptr, oldport;
+ u32 oldip, newip;
+ u16 *portptr, newport, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
/* this could be a inner header returned in icmp packet; in such
@@ -104,27 +104,32 @@ tcp_manip_pkt(struct sk_buff **pskb,
if (!skb_ip_make_writable(pskb, hdroff + hdrsize))
return 0;
- hdr = (void *)(*pskb)->data + hdroff;
+ iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ hdr = (struct tcphdr *)((*pskb)->data + iph->ihl*4);
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
- oldip = oldsrc;
+ oldip = iph->saddr;
+ newip = tuple->src.ip;
+ newport = tuple->src.u.tcp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst ip and dst pt */
- oldip = olddst;
+ oldip = iph->daddr;
+ newip = tuple->dst.ip;
+ newport = tuple->dst.u.tcp.port;
portptr = &hdr->dest;
}
oldport = *portptr;
- *portptr = manip->u.tcp.port;
+ *portptr = newport;
if (hdrsize < sizeof(*hdr))
return 1;
- hdr->check = ip_nat_cheat_check(~oldip, manip->ip,
+ hdr->check = ip_nat_cheat_check(~oldip, newip,
ip_nat_cheat_check(oldport ^ 0xFFFF,
- manip->u.tcp.port,
+ newport,
hdr->check));
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index 3c492530863c1..c669e3b5f5d0d 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -84,34 +84,40 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
static int
udp_manip_pkt(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
struct udphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
- u32 oldip, oldsrc = iph->saddr, olddst = iph->daddr;
- u16 *portptr;
+ u32 oldip, newip;
+ u16 *portptr, newport;
if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
return 0;
- hdr = (void *)(*pskb)->data + hdroff;
+ iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ hdr = (struct udphdr *)((*pskb)->data + hdroff);
+
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
- oldip = oldsrc;
+ oldip = iph->saddr;
+ newip = tuple->src.ip;
+ newport = tuple->src.u.udp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst ip and dst pt */
- oldip = olddst;
+ oldip = iph->daddr;
+ newip = tuple->dst.ip;
+ newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
if (hdr->check) /* 0 is a special case meaning no checksum */
- hdr->check = ip_nat_cheat_check(~oldip, manip->ip,
+ hdr->check = ip_nat_cheat_check(~oldip, newip,
ip_nat_cheat_check(*portptr ^ 0xFFFF,
- manip->u.udp.port,
+ newport,
hdr->check));
- *portptr = manip->u.udp.port;
+ *portptr = newport;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index 8f2e7ddbbdc89..f5525bd58d16f 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -40,7 +40,7 @@ static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
static int
unknown_manip_pkt(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype)
{
return 1;
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 80773588d8ad0..08d0fba85b85d 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -16,6 +16,7 @@
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <net/checksum.h>
+#include <net/route.h>
#include <linux/bitops.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
@@ -120,6 +121,25 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
return ip_nat_setup_info(ct, &mr->range[0], hooknum);
}
+/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
+static void warn_if_extra_mangle(u32 dstip, u32 srcip)
+{
+ static int warned = 0;
+ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
+ struct rtable *rt;
+
+ if (ip_route_output_key(&rt, &fl) != 0)
+ return;
+
+ if (rt->rt_src != srcip && !warned) {
+ printk("NAT: no longer support implicit source local NAT\n");
+ printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
+ NIPQUAD(srcip), NIPQUAD(dstip));
+ warned = 1;
+ }
+ ip_rt_put(rt);
+}
+
static unsigned int ipt_dnat_target(struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
@@ -139,6 +159,11 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
/* Connection must be valid and new. */
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ if (hooknum == NF_IP_LOCAL_OUT
+ && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
+ warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+ mr->range[0].min_ip);
+
return ip_nat_setup_info(ct, &mr->range[0], hooknum);
}
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 164dde6fb1f7b..6f40b664041fe 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -106,7 +106,7 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!icmp_reply_translation(pskb, ct, hooknum,
+ if (!icmp_reply_translation(pskb, ct, maniptype,
CTINFO2DIR(ctinfo)))
return NF_DROP;
else
@@ -116,7 +116,6 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_NEW:
info = &ct->nat.info;
- WRITE_LOCK(&ip_nat_lock);
/* Seen it before? This can happen for loopback, retrans,
or local packets.. */
if (!(info->initialized & (1 << maniptype))) {
@@ -131,14 +130,12 @@ ip_nat_fn(unsigned int hooknum,
info);
if (ret != NF_ACCEPT) {
- WRITE_UNLOCK(&ip_nat_lock);
return ret;
}
} else
DEBUGP("Already setup manip %s for ct %p\n",
maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- WRITE_UNLOCK(&ip_nat_lock);
break;
default:
@@ -149,7 +146,7 @@ ip_nat_fn(unsigned int hooknum,
}
IP_NF_ASSERT(info);
- return do_bindings(ct, ctinfo, info, hooknum, pskb);
+ return nat_packet(ct, ctinfo, hooknum, pskb);
}
static unsigned int