aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-10-04 14:25:37 -0700
committerJakub Kicinski <kuba@kernel.org>2023-10-04 14:25:37 -0700
commit07cf7974a2236a66f989869c301aa0220f33905c (patch)
tree1548edc75f70361d996abdad86daa306f19b8a3d
parent2b464cc2fd57c5a0250ae2f31505ac8e26e9748c (diff)
parent013714bf3e125a218bb02c938ff6df348dda743e (diff)
downloadnf-next-07cf7974a2236a66f989869c301aa0220f33905c.tar.gz
Merge tag 'nf-next-23-09-28' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Florian Westphal says: ==================== netfilter updates for net-next First patch, from myself, is a bug fix. The issue (connect timeout) is ancient, so I think its safe to give this more soak time given the esoteric conditions needed to trigger this. Also updates the existing selftest to cover this. Add netlink extacks when an update references a non-existent table/chain/set. This allows userspace to provide much better errors to the user, from Pablo Neira Ayuso. Last patch adds more policy checks to nf_tables as a better alternative to the existing runtime checks, from Phil Sutter. * tag 'nf-next-23-09-28' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: netfilter: nf_tables: Utilize NLA_POLICY_NESTED_ARRAY netfilter: nf_tables: missing extended netlink error in lookup functions selftests: netfilter: test nat source port clash resolution interaction with tcp early demux netfilter: nf_nat: undo erroneous tcp edemux lookup after port clash ==================== Link: https://lore.kernel.org/r/20230928144916.18339-1-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/netfilter/nf_nat_proto.c64
-rw-r--r--net/netfilter/nf_tables_api.c43
-rwxr-xr-xtools/testing/selftests/netfilter/nf_nat_edemux.sh46
3 files changed, 126 insertions, 27 deletions
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 48cc60084d28b3..5a049740758f63 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
}
#endif
+static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport)
+{
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ const struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return false;
+
+ switch (nf_ct_protonum(ct)) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ return false;
+ }
+
+ dir = CTINFO2DIR(ctinfo);
+ if (dir != IP_CT_DIR_ORIGINAL)
+ return false;
+
+ return ct->tuplehash[!dir].tuple.dst.u.all != sport;
+}
+
static unsigned int
nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
ret = nf_nat_ipv4_fn(priv, skb, state);
- if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
- !inet_sk_transparent(sk))
+ if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
+ return ret;
+
+ /* skb has a socket assigned via tcp edemux. We need to check
+ * if nf_nat_ipv4_fn() has mangled the packet in a way that
+ * edemux would not have found this socket.
+ *
+ * This includes both changes to the source address and changes
+ * to the source port, which are both handled by the
+ * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux
+ * might have found a socket for the old (pre-snat) address.
+ */
+ if (saddr != ip_hdr(skb)->saddr ||
+ nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
skb_orphan(skb); /* TCP edemux obtained wrong socket */
return ret;
@@ -938,6 +975,27 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
}
static unsigned int
+nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct in6_addr saddr = ipv6_hdr(skb)->saddr;
+ struct sock *sk = skb->sk;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(priv, skb, state);
+
+ if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
+ return ret;
+
+ /* see nf_nat_ipv4_local_in */
+ if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) ||
+ nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
+ skb_orphan(skb);
+
+ return ret;
+}
+
+static unsigned int
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -1051,7 +1109,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv6_fn,
+ .hook = nf_nat_ipv6_local_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4356189360fb88..7e2e76086d25ea 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3316,7 +3316,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_RULE_HANDLE] = { .type = NLA_U64 },
- [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
@@ -4254,12 +4254,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_SET_HANDLE] = { .type = NLA_U64 },
[NFTA_SET_EXPR] = { .type = NLA_NESTED },
- [NFTA_SET_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
+};
+
+static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
+ [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
- [NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED },
+ [NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy),
};
static struct nft_set *nft_set_lookup(const struct nft_table *table,
@@ -4695,8 +4699,10 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
+ }
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb2 == NULL)
@@ -4713,10 +4719,6 @@ err_fill_set_info:
return err;
}
-static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
- [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
-};
-
static int nft_set_desc_concat_parse(const struct nlattr *attr,
struct nft_set_desc *desc)
{
@@ -5498,7 +5500,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING,
.len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED },
- [NFTA_SET_ELEM_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -5506,7 +5508,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
- [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy),
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
@@ -6025,8 +6027,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -6919,8 +6923,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
if (!list_empty(&set->bindings) &&
(set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
@@ -7195,8 +7201,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
if (nft_set_is_anonymous(set))
return -EOPNOTSUPP;
@@ -8680,6 +8688,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
+ struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
@@ -8705,13 +8714,17 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
genmask, 0);
- if (IS_ERR(table))
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
return PTR_ERR(table);
+ }
flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
genmask);
- if (IS_ERR(flowtable))
+ if (IS_ERR(flowtable)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
return PTR_ERR(flowtable);
+ }
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
index 1092bbcb1fba67..a1aa8f4a582838 100755
--- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh
+++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
@@ -11,16 +11,18 @@ ret=0
sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
+socatpid=0
cleanup()
{
+ [ $socatpid -gt 0 ] && kill $socatpid
ip netns del $ns1
ip netns del $ns2
}
-iperf3 -v > /dev/null 2>&1
+socat -h > /dev/null 2>&1
if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without iperf3"
+ echo "SKIP: Could not run test without socat"
exit $ksft_skip
fi
@@ -60,8 +62,8 @@ ip netns exec $ns2 ip link set up dev veth2
ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
# Create a server in one namespace
-ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 &
-iperfs=$!
+ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
+socatpid=$!
# Restrict source port to just one so we don't have to exhaust
# all others.
@@ -83,17 +85,43 @@ sleep 1
# ip daddr:dport will be rewritten to 192.168.1.1 5201
# NAT must reallocate source port 10000 because
# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
-echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null
+echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
ret=$?
-kill $iperfs
-
# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
if [ $ret -eq 0 ]; then
echo "PASS: socat can connect via NAT'd address"
else
echo "FAIL: socat cannot connect via NAT'd address"
- exit 1
fi
-exit 0
+# check sport clashres.
+ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
+ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
+
+sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+cpid1=$!
+sleep 1
+
+# if connect succeeds, client closes instantly due to EOF on stdin.
+# if connect hangs, it will time out after 5s.
+echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+cpid2=$!
+
+time_then=$(date +%s)
+wait $cpid2
+rv=$?
+time_now=$(date +%s)
+
+# Check how much time has elapsed, expectation is for
+# 'cpid2' to connect and then exit (and no connect delay).
+delta=$((time_now - time_then))
+
+if [ $delta -lt 2 -a $rv -eq 0 ]; then
+ echo "PASS: could connect to service via redirected ports"
+else
+ echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ ret=1
+fi
+
+exit $ret