aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c36
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/netfilter/nf_conntrack_standalone.c8
-rw-r--r--net/netfilter/nf_flow_table_core.c8
-rw-r--r--net/netfilter/nf_flow_table_ip.c8
-rw-r--r--net/netfilter/nf_log.c5
-rw-r--r--net/netfilter/nf_tables_api.c16
-rw-r--r--net/netfilter/nfnetlink.c5
-rw-r--r--net/netfilter/nft_chain_filter.c6
-rw-r--r--net/netfilter/nft_connlimit.c4
-rw-r--r--net/netfilter/nft_counter.c4
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_last.c4
-rw-r--r--net/netfilter/nft_limit.c14
-rw-r--r--net/netfilter/nft_quota.c4
-rw-r--r--net/netfilter/nft_rt.c4
-rw-r--r--net/netfilter/nft_set_pipapo.c258
-rw-r--r--net/netfilter/nft_set_pipapo.h2
-rw-r--r--net/netfilter/nft_tunnel.c44
25 files changed, 246 insertions, 250 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a2c16b5010877..c7a8a08b73089 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1550,6 +1550,7 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
if (!dest)
goto unk;
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be16 type;
/* Only support version 0 and C (csum) */
@@ -1560,7 +1561,10 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
if (type != htons(ETH_P_IP))
goto unk;
*proto = IPPROTO_IPIP;
- return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags));
+
+ gre_flags_to_tnl_flags(flags, greh->flags);
+
+ return gre_calc_hlen(flags);
}
unk:
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 143a341bbc0a4..b6d0dcf3a5c34 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -94,6 +94,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
{
struct sysinfo i;
int availmem;
+ int amemthresh;
int nomem;
int to_change = -1;
@@ -105,7 +106,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
- nomem = (availmem < ipvs->sysctl_amemthresh);
+ amemthresh = max(READ_ONCE(ipvs->sysctl_amemthresh), 0);
+ nomem = (availmem < amemthresh);
local_bh_disable();
@@ -145,9 +147,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
break;
case 1:
if (nomem) {
- ipvs->drop_rate = ipvs->drop_counter
- = ipvs->sysctl_amemthresh /
- (ipvs->sysctl_amemthresh-availmem);
+ ipvs->drop_counter = amemthresh / (amemthresh - availmem);
+ ipvs->drop_rate = ipvs->drop_counter;
ipvs->sysctl_drop_packet = 2;
} else {
ipvs->drop_rate = 0;
@@ -155,9 +156,8 @@ static void update_defense_level(struct netns_ipvs *ipvs)
break;
case 2:
if (nomem) {
- ipvs->drop_rate = ipvs->drop_counter
- = ipvs->sysctl_amemthresh /
- (ipvs->sysctl_amemthresh-availmem);
+ ipvs->drop_counter = amemthresh / (amemthresh - availmem);
+ ipvs->drop_rate = ipvs->drop_counter;
} else {
ipvs->drop_rate = 0;
ipvs->sysctl_drop_packet = 1;
@@ -2263,7 +2263,6 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec,
},
#endif
- { }
};
#endif
@@ -4270,6 +4269,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
struct ctl_table *tbl;
int idx, ret;
size_t ctl_table_size = ARRAY_SIZE(vs_vars);
+ bool unpriv = net->user_ns != &init_user_ns;
atomic_set(&ipvs->dropentry, 0);
spin_lock_init(&ipvs->dropentry_lock);
@@ -4284,12 +4284,6 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
if (tbl == NULL)
return -ENOMEM;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- tbl[0].procname = NULL;
- ctl_table_size = 0;
- }
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
@@ -4315,10 +4309,17 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_ports = 1;
tbl[idx++].data = &ipvs->sysctl_sync_ports;
tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
+
ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+
ipvs->sysctl_sync_sock_size = 0;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
+
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
@@ -4341,15 +4342,22 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+
ipvs->sysctl_run_estimation = 1;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_run_estimation;
ipvs->est_cpulist_valid = 0;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_cpulist;
ipvs->sysctl_est_nice = IPVS_EST_NICE;
+ if (unpriv)
+ tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_nice;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 8ceec7a2fa8f3..2423513d701d4 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -123,7 +123,6 @@ static struct ctl_table vs_vars_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
#endif
@@ -563,10 +562,8 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- ipvs->lblc_ctl_table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
vars_table_size = 0;
- }
} else
ipvs->lblc_ctl_table = vs_vars_table;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0fb64707213f8..cdb1d4bf6761c 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -294,7 +294,6 @@ static struct ctl_table vs_vars_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
#endif
@@ -749,10 +748,8 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- ipvs->lblcr_ctl_table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns)
vars_table_size = 0;
- }
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 65e0259178da4..3313bceb6cc99 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
(!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
(addr_type & IPV6_ADDR_LOOPBACK);
old_rt_is_local = __ip_vs_is_local_route6(
- (struct rt6_info *)skb_dst(skb));
+ dst_rt6_info(skb_dst(skb)));
} else
#endif
{
@@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rtable *) dest_dst->dst_cache;
+ rt = dst_rtable(dest_dst->dst_cache);
else {
dest_dst = ip_vs_dest_dst_alloc();
spin_lock_bh(&dest->dst_lock);
@@ -390,10 +390,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
skb->ip_summed == CHECKSUM_PARTIAL)
mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
mtu -= gre_calc_hlen(tflags);
}
if (mtu < 68) {
@@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rt6_info *) dest_dst->dst_cache;
+ rt = dst_rt6_info(dest_dst->dst_cache);
else {
u32 cookie;
@@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
cookie = rt6_get_cookie(rt);
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
spin_unlock_bh(&dest->dst_lock);
@@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
rt_mode);
if (!dst)
goto err_unreach;
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
}
local = __ip_vs_is_local_route6(rt);
@@ -553,10 +553,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
skb->ip_summed == CHECKSUM_PARTIAL)
mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
} else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
mtu -= gre_calc_hlen(tflags);
}
if (mtu < IPV6_MIN_MTU) {
@@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_RDR);
if (local < 0)
goto tx_error;
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -1082,11 +1082,11 @@ ipvs_gre_encap(struct net *net, struct sk_buff *skb,
{
__be16 proto = *next_protocol == IPPROTO_IPIP ?
htons(ETH_P_IP) : htons(ETH_P_IPV6);
- __be16 tflags = 0;
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t hdrlen;
if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
hdrlen = gre_calc_hlen(tflags);
gre_build_header(skb, hdrlen, tflags, proto, 0, 0);
@@ -1165,11 +1165,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom += sizeof(struct udphdr) + gue_hdrlen;
} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t gre_hdrlen;
- __be16 tflags = 0;
if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
gre_hdrlen = gre_calc_hlen(tflags);
max_headroom += gre_hdrlen;
@@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
tdev = rt->dst.dev;
/*
@@ -1310,11 +1310,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom += sizeof(struct udphdr) + gue_hdrlen;
} else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ IP_TUNNEL_DECLARE_FLAGS(tflags) = { };
size_t gre_hdrlen;
- __be16 tflags = 0;
if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
- tflags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, tflags);
gre_hdrlen = gre_calc_hlen(tflags);
max_headroom += gre_hdrlen;
@@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c63868666bd9e..7ac20750c127b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1440,8 +1440,6 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
const struct nf_conntrack_l4proto *l4proto;
u8 protonum = nf_ct_protonum(ct);
- if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
- return false;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
@@ -2024,7 +2022,7 @@ repeat:
goto repeat;
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
- if (ret == -NF_DROP)
+ if (ret == NF_DROP)
NF_CT_STAT_INC_ATOMIC(state->net, drop);
ret = -ret;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index e2db1f4ec2df9..ebc4f733bb2e6 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -525,7 +525,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
if (!dh)
- return NF_DROP;
+ return -NF_ACCEPT;
if (dccp_error(dh, skb, dataoff, state))
return -NF_ACCEPT;
@@ -533,7 +533,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
/* pull again, including possible 48 bit sequences and subtype header */
dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
if (!dh)
- return NF_DROP;
+ return -NF_ACCEPT;
type = dh->dccph_type;
if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 1020d67600a95..327b8059025da 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -62,7 +62,9 @@ static const u_int8_t noct_valid_new[] = {
[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
- [ICMPV6_MLD2_REPORT - 130] = 1
+ [ICMPV6_MLD2_REPORT - 130] = 1,
+ [ICMPV6_MRDISC_ADV - 130] = 1,
+ [ICMPV6_MRDISC_SOL - 130] = 1
};
bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0ee98ce5b8165..74112e9c5dabc 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -616,11 +616,9 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_LWTUNNEL,
#endif
- __NF_SYSCTL_CT_LAST_SYSCTL,
+ NF_SYSCTL_CT_LAST_SYSCTL,
};
-#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1)
-
static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_MAX] = {
.procname = "nf_conntrack_max",
@@ -957,7 +955,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = nf_hooks_lwtunnel_sysctl_handler,
},
#endif
- {}
};
static struct ctl_table nf_ct_netfilter_table[] = {
@@ -968,7 +965,6 @@ static struct ctl_table nf_ct_netfilter_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
@@ -1122,7 +1118,7 @@ out_unregister_netfilter:
static void nf_conntrack_standalone_fini_sysctl(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- struct ctl_table *table;
+ const struct ctl_table *table;
table = cnet->sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(cnet->sysctl_header);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index a0571339239c4..5c1ff07eaee0b 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc);
static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
{
- const struct rt6_info *rt;
-
- if (flow_tuple->l3proto == NFPROTO_IPV6) {
- rt = (const struct rt6_info *)flow_tuple->dst_cache;
- return rt6_get_cookie(rt);
- }
+ if (flow_tuple->l3proto == NFPROTO_IPV6)
+ return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache));
return 0;
}
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 5383bed3d3e00..c2c005234dcd3 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
@@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
@@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ rt = dst_rt6_info(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex;
IP6CB(skb)->flags = IP6SKB_FORWARDED;
@@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ rt = dst_rt6_info(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 370f8231385ca..769fd7680fac2 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -395,7 +395,7 @@ static const struct seq_operations nflog_seq_ops = {
#ifdef CONFIG_SYSCTL
static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
-static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
+static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO];
static struct ctl_table_header *nf_log_sysctl_fhdr;
static struct ctl_table nf_log_sysctl_ftable[] = {
@@ -406,7 +406,6 @@ static struct ctl_table nf_log_sysctl_ftable[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static int nf_log_proc_dostring(struct ctl_table *table, int write,
@@ -514,7 +513,7 @@ err_alloc:
static void netfilter_log_sysctl_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->nf.nf_log_dir_header->ctl_table_arg;
unregister_net_sysctl_table(net->nf.nf_log_dir_header);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 167074283ea91..be3b4c90d2eda 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3333,7 +3333,7 @@ err_expr_parse:
return ERR_PTR(err);
}
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp)
{
int err;
@@ -3341,7 +3341,7 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
return -EINVAL;
dst->ops = src->ops;
- err = src->ops->clone(dst, src);
+ err = src->ops->clone(dst, src, gfp);
if (err < 0)
return err;
@@ -6525,7 +6525,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
if (!expr)
goto err_expr;
- err = nft_expr_clone(expr, set->exprs[i]);
+ err = nft_expr_clone(expr, set->exprs[i], GFP_KERNEL_ACCOUNT);
if (err < 0) {
kfree(expr);
goto err_expr;
@@ -6564,7 +6564,7 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx,
for (i = 0; i < num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- err = nft_expr_clone(expr, expr_array[i]);
+ err = nft_expr_clone(expr, expr_array[i], GFP_KERNEL_ACCOUNT);
if (err < 0)
goto err_elem_expr_setup;
@@ -7776,6 +7776,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (WARN_ON_ONCE(!type))
return -ENOENT;
+ if (!obj->ops->update)
+ return 0;
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
@@ -9467,9 +9470,10 @@ static void nft_obj_commit_update(struct nft_trans *trans)
obj = nft_trans_obj(trans);
newobj = nft_trans_obj_newobj(trans);
- if (obj->ops->update)
- obj->ops->update(obj, newobj);
+ if (WARN_ON_ONCE(!obj->ops->update))
+ return;
+ obj->ops->update(obj, newobj);
nft_obj_destroy(&trans->ctx, newobj);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c9fbe0f707b5f..4abf660c7baff 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -427,6 +427,9 @@ replay_abort:
nfnl_unlock(subsys_id);
+ if (nlh->nlmsg_flags & NLM_F_ACK)
+ nfnl_err_add(&err_list, nlh, 0, &extack);
+
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
@@ -573,6 +576,8 @@ done:
} else if (err) {
ss->abort(net, oskb, NFNL_ABORT_NONE);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
+ } else if (nlh->nlmsg_flags & NLM_F_ACK) {
+ nfnl_err_add(&err_list, nlh, 0, &extack);
}
} else {
enum nfnl_abort_action abort_action;
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index d170758a1eb5d..7010541fcca66 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -325,9 +325,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
struct nft_hook *hook, *found = NULL;
int n = 0;
- if (event != NETDEV_UNREGISTER)
- return;
-
list_for_each_entry(hook, &basechain->hook_list, list) {
if (hook->ops.dev == dev)
found = hook;
@@ -367,8 +364,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
.net = dev_net(dev),
};
- if (event != NETDEV_UNREGISTER &&
- event != NETDEV_CHANGENAME)
+ if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
nft_net = nft_pernet(ctx.net);
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index de9d1980df696..92b984fa8175c 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -210,12 +210,12 @@ static void nft_connlimit_destroy(const struct nft_ctx *ctx,
nft_connlimit_do_destroy(ctx, priv);
}
-static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
- priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+ priv_dst->list = kmalloc(sizeof(*priv_dst->list), gfp);
if (!priv_dst->list)
return -ENOMEM;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index dccc68a5135ad..291ed2026367e 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -226,7 +226,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
nft_counter_do_destroy(priv);
}
-static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
@@ -236,7 +236,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
nft_counter_fetch(priv, &total);
- cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp);
if (cpu_stats == NULL)
return -ENOMEM;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index c09dba57354c1..b4ada3ab21679 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -35,7 +35,7 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
for (i = 0; i < priv->num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
- if (nft_expr_clone(expr, priv->expr_array[i]) < 0)
+ if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0)
return -1;
elem_expr->size += priv->expr_array[i]->ops->size;
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 8e6d7eaf9dc8b..de1b6066bfa85 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -102,12 +102,12 @@ static void nft_last_destroy(const struct nft_ctx *ctx,
kfree(priv->last);
}
-static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
struct nft_last_priv *priv_src = nft_expr_priv(src);
- priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+ priv_dst->last = kzalloc(sizeof(*priv_dst->last), gfp);
if (!priv_dst->last)
return -ENOMEM;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index cefa25e0dbb0a..21d26b79b4607 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -150,7 +150,7 @@ static void nft_limit_destroy(const struct nft_ctx *ctx,
}
static int nft_limit_clone(struct nft_limit_priv *priv_dst,
- const struct nft_limit_priv *priv_src)
+ const struct nft_limit_priv *priv_src, gfp_t gfp)
{
priv_dst->tokens_max = priv_src->tokens_max;
priv_dst->rate = priv_src->rate;
@@ -158,7 +158,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->burst = priv_src->burst;
priv_dst->invert = priv_src->invert;
- priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+ priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), gfp);
if (!priv_dst->limit)
return -ENOMEM;
@@ -223,14 +223,15 @@ static void nft_limit_pkts_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, &priv->limit);
}
-static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
priv_dst->cost = priv_src->cost;
- return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+ return nft_limit_clone(&priv_dst->limit, &priv_src->limit, gfp);
}
static struct nft_expr_type nft_limit_type;
@@ -281,12 +282,13 @@ static void nft_limit_bytes_destroy(const struct nft_ctx *ctx,
nft_limit_destroy(ctx, priv);
}
-static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src,
+ gfp_t gfp)
{
struct nft_limit_priv *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv *priv_src = nft_expr_priv(src);
- return nft_limit_clone(priv_dst, priv_src);
+ return nft_limit_clone(priv_dst, priv_src, gfp);
}
static const struct nft_expr_ops nft_limit_bytes_ops = {
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 3ba12a7471b0f..9b2d7463d3d32 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -233,7 +233,7 @@ static void nft_quota_destroy(const struct nft_ctx *ctx,
return nft_quota_do_destroy(ctx, priv);
}
-static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
+static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp)
{
struct nft_quota *priv_dst = nft_expr_priv(dst);
struct nft_quota *priv_src = nft_expr_priv(src);
@@ -241,7 +241,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
priv_dst->quota = priv_src->quota;
priv_dst->flags = priv_src->flags;
- priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+ priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), gfp);
if (!priv_dst->consumed)
return -ENOMEM;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 24d9771385729..14d88394bcb7f 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -73,14 +73,14 @@ void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ *dest = (__force u32)rt_nexthop(dst_rtable(dst),
ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
goto err;
- memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
+ memcpy(dest, rt6_nexthop(dst_rt6_info(dst),
&ipv6_hdr(skb)->daddr),
sizeof(struct in6_addr));
break;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 187138afac45d..15a236bebb46a 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -504,6 +504,7 @@ out:
* pipapo_get() - Get matching element reference given key data
* @net: Network namespace
* @set: nftables API set representation
+ * @m: storage containing active/existing elements
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
* @tstamp: timestamp to check for expired elements
@@ -517,17 +518,15 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
+ const struct nft_pipapo_match *m,
const u8 *data, u8 genmask,
u64 tstamp, gfp_t gfp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
- struct nft_pipapo *priv = nft_set_priv(set);
unsigned long *res_map, *fill_map = NULL;
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
int i;
- m = priv->clone;
if (m->bsize_max == 0)
return ret;
@@ -612,9 +611,11 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m = rcu_dereference(priv->match);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
nft_genmask_cur(net), get_jiffies_64(),
GFP_ATOMIC);
if (IS_ERR(e))
@@ -1247,6 +1248,40 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return 0;
}
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ const struct net *net = read_pnet(&set->net);
+
+ return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+ return true;
+#endif
+}
+
+static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old);
+
+/**
+ * pipapo_maybe_clone() - Build clone for pending data changes, if not existing
+ * @set: nftables API set representation
+ *
+ * Return: newly created or existing clone, if any. NULL on allocation failure
+ */
+static struct nft_pipapo_match *pipapo_maybe_clone(const struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *m;
+
+ if (priv->clone)
+ return priv->clone;
+
+ m = rcu_dereference_protected(priv->match,
+ nft_pipapo_transaction_mutex_held(set));
+ priv->clone = pipapo_clone(m);
+
+ return priv->clone;
+}
+
/**
* nft_pipapo_insert() - Validate and insert ranged elements
* @net: Network namespace
@@ -1263,8 +1298,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *start = (const u8 *)elem->key.val.data, *end;
- struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *m = priv->clone;
+ struct nft_pipapo_match *m = pipapo_maybe_clone(set);
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
u64 tstamp = nft_net_tstamp(net);
@@ -1272,12 +1306,15 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
+ if (!m)
+ return -ENOMEM;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
end = (const u8 *)nft_set_ext_key_end(ext)->data;
else
end = start;
- dup = pipapo_get(net, set, start, genmask, tstamp, GFP_KERNEL);
+ dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1299,7 +1336,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp,
+ dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp,
GFP_KERNEL);
}
@@ -1332,8 +1369,6 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
}
/* Insert */
- priv->dirty = true;
-
bsize_max = m->bsize_max;
nft_pipapo_for_each_field(f, i, m) {
@@ -1384,7 +1419,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
* pipapo_clone() - Clone matching data to create new working copy
* @old: Existing matching data
*
- * Return: copy of matching data passed as 'old', error pointer on failure
+ * Return: copy of matching data passed as 'old' or NULL.
*/
static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
{
@@ -1394,7 +1429,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
if (!new)
- return ERR_PTR(-ENOMEM);
+ return NULL;
new->field_count = old->field_count;
new->bsize_max = old->bsize_max;
@@ -1466,7 +1501,7 @@ out_scratch:
free_percpu(new->scratch);
kfree(new);
- return ERR_PTR(-ENOMEM);
+ return NULL;
}
/**
@@ -1698,8 +1733,6 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
* NFT_SET_ELEM_DEAD_BIT.
*/
if (__nft_set_elem_expired(&e->ext, tstamp)) {
- priv->dirty = true;
-
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
return;
@@ -1777,57 +1810,30 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *new_clone, *old;
-
- if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- pipapo_gc(set, priv->clone);
+ struct nft_pipapo_match *old;
- if (!priv->dirty)
+ if (!priv->clone)
return;
- new_clone = pipapo_clone(priv->clone);
- if (IS_ERR(new_clone))
- return;
+ if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+ pipapo_gc(set, priv->clone);
- priv->dirty = false;
+ old = rcu_replace_pointer(priv->match, priv->clone,
+ nft_pipapo_transaction_mutex_held(set));
+ priv->clone = NULL;
- old = rcu_access_pointer(priv->match);
- rcu_assign_pointer(priv->match, priv->clone);
if (old)
call_rcu(&old->rcu, pipapo_reclaim_match);
-
- priv->clone = new_clone;
-}
-
-static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
-{
-#ifdef CONFIG_PROVE_LOCKING
- const struct net *net = read_pnet(&set->net);
-
- return lockdep_is_held(&nft_pernet(net)->commit_mutex);
-#else
- return true;
-#endif
}
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct nft_pipapo_match *new_clone, *m;
-
- if (!priv->dirty)
- return;
-
- m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
- new_clone = pipapo_clone(m);
- if (IS_ERR(new_clone))
+ if (!priv->clone)
return;
-
- priv->dirty = false;
-
pipapo_free_match(priv->clone);
- priv->clone = new_clone;
+ priv->clone = NULL;
}
/**
@@ -1851,52 +1857,38 @@ static void nft_pipapo_activate(const struct net *net,
}
/**
- * pipapo_deactivate() - Check that element is in set, mark as inactive
+ * nft_pipapo_deactivate() - Search for element and make it inactive
* @net: Network namespace
* @set: nftables API set representation
- * @data: Input key data
- * @ext: nftables API extension pointer, used to check for end element
- *
- * This is a convenience function that can be called from both
- * nft_pipapo_deactivate() and nft_pipapo_flush(), as they are in fact the same
- * operation.
+ * @elem: nftables API element representation containing key data
*
* Return: deactivated element if found, NULL otherwise.
*/
-static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
- const u8 *data, const struct nft_set_ext *ext)
+static struct nft_elem_priv *
+nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_pipapo_match *m = pipapo_maybe_clone(set);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net),
- nft_net_tstamp(net), GFP_KERNEL);
+ /* removal must occur on priv->clone, if we are low on memory
+ * we have no choice and must fail the removal request.
+ */
+ if (!m)
+ return NULL;
+
+ e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
+ nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL);
if (IS_ERR(e))
return NULL;
nft_set_elem_change_active(net, set, &e->ext);
- return e;
-}
-
-/**
- * nft_pipapo_deactivate() - Call pipapo_deactivate() to make element inactive
- * @net: Network namespace
- * @set: nftables API set representation
- * @elem: nftables API element representation containing key data
- *
- * Return: deactivated element if found, NULL otherwise.
- */
-static struct nft_elem_priv *
-nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
-
- return pipapo_deactivate(net, set, (const u8 *)elem->key.val.data, ext);
+ return &e->priv;
}
/**
- * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
+ * nft_pipapo_flush() - make element inactive
* @net: Network namespace
* @set: nftables API set representation
* @elem_priv: nftables API element representation containing key data
@@ -2093,7 +2085,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
if (last && f->mt[rulemap[i].to].e == e) {
- priv->dirty = true;
pipapo_drop(m, rulemap);
return;
}
@@ -2106,35 +2097,23 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
}
/**
- * nft_pipapo_walk() - Walk over elements
+ * nft_pipapo_do_walk() - Walk over elements in m
* @ctx: nftables API context
* @set: nftables API set representation
+ * @m: matching data pointing to key mapping array
* @iter: Iterator
*
* As elements are referenced in the mapping array for the last field, directly
* scan that array: there's no need to follow rule mappings from the first
- * field.
+ * field. @m is protected either by RCU read lock or by transaction mutex.
*/
-static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_iter *iter)
+static void nft_pipapo_do_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nft_pipapo_match *m,
+ struct nft_set_iter *iter)
{
- struct nft_pipapo *priv = nft_set_priv(set);
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
unsigned int i, r;
- WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
- iter->type != NFT_ITER_UPDATE);
-
- rcu_read_lock();
- if (iter->type == NFT_ITER_READ)
- m = rcu_dereference(priv->match);
- else
- m = priv->clone;
-
- if (unlikely(!m))
- goto out;
-
for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
;
@@ -2151,14 +2130,49 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
- goto out;
+ return;
cont:
iter->count++;
}
+}
-out:
- rcu_read_unlock();
+/**
+ * nft_pipapo_walk() - Walk over elements
+ * @ctx: nftables API context
+ * @set: nftables API set representation
+ * @iter: Iterator
+ *
+ * Test if destructive action is needed or not, clone active backend if needed
+ * and call the real function to work on the data.
+ */
+static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ const struct nft_pipapo_match *m;
+
+ switch (iter->type) {
+ case NFT_ITER_UPDATE:
+ m = pipapo_maybe_clone(set);
+ if (!m) {
+ iter->err = -ENOMEM;
+ return;
+ }
+
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ break;
+ case NFT_ITER_READ:
+ rcu_read_lock();
+ m = rcu_dereference(priv->match);
+ nft_pipapo_do_walk(ctx, set, m, iter);
+ rcu_read_unlock();
+ break;
+ default:
+ iter->err = -EINVAL;
+ WARN_ON_ONCE(1);
+ break;
+ }
}
/**
@@ -2267,21 +2281,10 @@ static int nft_pipapo_init(const struct nft_set *set,
f->mt = NULL;
}
- /* Create an initial clone of matching data for next insertion */
- priv->clone = pipapo_clone(m);
- if (IS_ERR(priv->clone)) {
- err = PTR_ERR(priv->clone);
- goto out_free;
- }
-
- priv->dirty = false;
-
rcu_assign_pointer(priv->match, m);
return 0;
-out_free:
- free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2326,33 +2329,18 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
- int cpu;
m = rcu_dereference_protected(priv->match, true);
- if (m) {
- rcu_barrier();
-
- for_each_possible_cpu(cpu)
- pipapo_free_scratch(m, cpu);
- free_percpu(m->scratch);
- pipapo_free_fields(m);
- kfree(m);
- priv->match = NULL;
- }
if (priv->clone) {
- m = priv->clone;
-
- nft_set_pipapo_match_destroy(ctx, set, m);
-
- for_each_possible_cpu(cpu)
- pipapo_free_scratch(priv->clone, cpu);
- free_percpu(priv->clone->scratch);
-
- pipapo_free_fields(priv->clone);
- kfree(priv->clone);
+ nft_set_pipapo_match_destroy(ctx, set, priv->clone);
+ pipapo_free_match(priv->clone);
priv->clone = NULL;
+ } else {
+ nft_set_pipapo_match_destroy(ctx, set, m);
}
+
+ pipapo_free_match(m);
}
/**
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 24cd1ff73f981..0d2e40e10f7f5 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -155,14 +155,12 @@ struct nft_pipapo_match {
* @match: Currently in-use matching data
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
- * @dirty: Working copy has pending insertions or deletions
* @last_gc: Timestamp of last garbage collection run, jiffies
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
- bool dirty;
unsigned long last_gc;
};
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index f735d79d8be57..60a76e6e348e7 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -174,8 +174,8 @@ struct nft_tunnel_opts {
struct erspan_metadata erspan;
u8 data[IP_TUNNEL_OPTS_MAX];
} u;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
u32 len;
- __be16 flags;
};
struct nft_tunnel_obj {
@@ -271,7 +271,8 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr,
opts->u.vxlan.gbp = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_VXLAN_GBP]));
opts->len = sizeof(struct vxlan_metadata);
- opts->flags = TUNNEL_VXLAN_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags);
return 0;
}
@@ -325,7 +326,8 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
opts->u.erspan.version = version;
opts->len = sizeof(struct erspan_metadata);
- opts->flags = TUNNEL_ERSPAN_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags);
return 0;
}
@@ -366,7 +368,8 @@ static int nft_tunnel_obj_geneve_init(const struct nlattr *attr,
opt->length = data_len / 4;
opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]);
opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]);
- opts->flags = TUNNEL_GENEVE_OPT;
+ ip_tunnel_flags_zero(opts->flags);
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags);
return 0;
}
@@ -385,8 +388,8 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
struct nft_tunnel_opts *opts)
{
struct nlattr *nla;
- __be16 type = 0;
int err, rem;
+ u32 type = 0;
err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX,
nft_tunnel_opts_policy, NULL);
@@ -401,7 +404,7 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
err = nft_tunnel_obj_vxlan_init(nla, opts);
if (err)
return err;
- type = TUNNEL_VXLAN_OPT;
+ type = IP_TUNNEL_VXLAN_OPT_BIT;
break;
case NFTA_TUNNEL_KEY_OPTS_ERSPAN:
if (type)
@@ -409,15 +412,15 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
err = nft_tunnel_obj_erspan_init(nla, opts);
if (err)
return err;
- type = TUNNEL_ERSPAN_OPT;
+ type = IP_TUNNEL_ERSPAN_OPT_BIT;
break;
case NFTA_TUNNEL_KEY_OPTS_GENEVE:
- if (type && type != TUNNEL_GENEVE_OPT)
+ if (type && type != IP_TUNNEL_GENEVE_OPT_BIT)
return -EINVAL;
err = nft_tunnel_obj_geneve_init(nla, opts);
if (err)
return err;
- type = TUNNEL_GENEVE_OPT;
+ type = IP_TUNNEL_GENEVE_OPT_BIT;
break;
default:
return -EOPNOTSUPP;
@@ -454,7 +457,9 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
memset(&info, 0, sizeof(info));
info.mode = IP_TUNNEL_INFO_TX;
info.key.tun_id = key32_to_tunnel_id(nla_get_be32(tb[NFTA_TUNNEL_KEY_ID]));
- info.key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
+ __set_bit(IP_TUNNEL_KEY_BIT, info.key.tun_flags);
+ __set_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags);
+ __set_bit(IP_TUNNEL_NOCACHE_BIT, info.key.tun_flags);
if (tb[NFTA_TUNNEL_KEY_IP]) {
err = nft_tunnel_obj_ip_init(ctx, tb[NFTA_TUNNEL_KEY_IP], &info);
@@ -483,11 +488,12 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
if (tun_flags & NFT_TUNNEL_F_ZERO_CSUM_TX)
- info.key.tun_flags &= ~TUNNEL_CSUM;
+ __clear_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags);
if (tun_flags & NFT_TUNNEL_F_DONT_FRAGMENT)
- info.key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT,
+ info.key.tun_flags);
if (tun_flags & NFT_TUNNEL_F_SEQ_NUMBER)
- info.key.tun_flags |= TUNNEL_SEQ;
+ __set_bit(IP_TUNNEL_SEQ_BIT, info.key.tun_flags);
}
if (tb[NFTA_TUNNEL_KEY_TOS])
info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
@@ -583,7 +589,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
if (!nest)
return -1;
- if (opts->flags & TUNNEL_VXLAN_OPT) {
+ if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags)) {
inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN);
if (!inner)
goto failure;
@@ -591,7 +597,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
htonl(opts->u.vxlan.gbp)))
goto inner_failure;
nla_nest_end(skb, inner);
- } else if (opts->flags & TUNNEL_ERSPAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags)) {
inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN);
if (!inner)
goto failure;
@@ -613,7 +619,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
break;
}
nla_nest_end(skb, inner);
- } else if (opts->flags & TUNNEL_GENEVE_OPT) {
+ } else if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags)) {
struct geneve_opt *opt;
int offset = 0;
@@ -658,11 +664,11 @@ static int nft_tunnel_flags_dump(struct sk_buff *skb,
{
u32 flags = 0;
- if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_DONT_FRAGMENT;
- if (!(info->key.tun_flags & TUNNEL_CSUM))
+ if (!test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_ZERO_CSUM_TX;
- if (info->key.tun_flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags))
flags |= NFT_TUNNEL_F_SEQ_NUMBER;
if (nla_put_be32(skb, NFTA_TUNNEL_KEY_FLAGS, htonl(flags)) < 0)