aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/nft_hash.c
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2013-10-14 11:00:02 +0200
committerPablo Neira Ayuso <pablo@netfilter.org>2013-10-14 17:15:48 +0200
commit96518518cc417bb0a8c80b9fb736202e28acdf96 (patch)
tree2ac4f939a88f0a8047403d0e07b5167369236f82 /net/netfilter/nft_hash.c
parentf59cb0453cd885736daa11ae2445982c5ab2fc83 (diff)
downloadlinux-96518518cc417bb0a8c80b9fb736202e28acdf96.tar.gz
netfilter: add nftables
This patch adds nftables which is the intended successor of iptables. This packet filtering framework reuses the existing netfilter hooks, the connection tracking system, the NAT subsystem, the transparent proxying engine, the logging infrastructure and the userspace packet queueing facilities. In a nutshell, nftables provides a pseudo-state machine with 4 general purpose registers of 128 bits and 1 specific purpose register to store verdicts. This pseudo-machine comes with an extensible instruction set, a.k.a. "expressions" in the nftables jargon. The expressions included in this patch provide the basic functionality, they are: * bitwise: to perform bitwise operations. * byteorder: to change from host/network endianess. * cmp: to compare data with the content of the registers. * counter: to enable counters on rules. * ct: to store conntrack keys into register. * exthdr: to match IPv6 extension headers. * immediate: to load data into registers. * limit: to limit matching based on packet rate. * log: to log packets. * meta: to match metainformation that usually comes with the skbuff. * nat: to perform Network Address Translation. * payload: to fetch data from the packet payload and store it into registers. * reject (IPv4 only): to explicitly close connection, eg. TCP RST. Using this instruction-set, the userspace utility 'nft' can transform the rules expressed in human-readable text representation (using a new syntax, inspired by tcpdump) to nftables bytecode. nftables also inherits the table, chain and rule objects from iptables, but in a more configurable way, and it also includes the original datatype-agnostic set infrastructure with mapping support. This set infrastructure is enhanced in the follow up patch (netfilter: nf_tables: add netlink set API). This patch includes the following components: * the netlink API: net/netfilter/nf_tables_api.c and include/uapi/netfilter/nf_tables.h * the packet filter core: net/netfilter/nf_tables_core.c * the expressions (described above): net/netfilter/nft_*.c * the filter tables: arp, IPv4, IPv6 and bridge: net/ipv4/netfilter/nf_tables_ipv4.c net/ipv6/netfilter/nf_tables_ipv6.c net/ipv4/netfilter/nf_tables_arp.c net/bridge/netfilter/nf_tables_bridge.c * the NAT table (IPv4 only): net/ipv4/netfilter/nf_table_nat_ipv4.c * the route table (similar to mangle): net/ipv4/netfilter/nf_table_route_ipv4.c net/ipv6/netfilter/nf_table_route_ipv6.c * internal definitions under: include/net/netfilter/nf_tables.h include/net/netfilter/nf_tables_core.h * It also includes an skeleton expression: net/netfilter/nft_expr_template.c and the preliminary implementation of the meta target net/netfilter/nft_meta_target.c It also includes a change in struct nf_hook_ops to add a new pointer to store private data to the hook, that is used to store the rule list per chain. This patch is based on the patch from Patrick McHardy, plus merged accumulated cleanups, fixes and small enhancements to the nftables code that has been done since 2009, which are: From Patrick McHardy: * nf_tables: adjust netlink handler function signatures * nf_tables: only retry table lookup after successful table module load * nf_tables: fix event notification echo and avoid unnecessary messages * nft_ct: add l3proto support * nf_tables: pass expression context to nft_validate_data_load() * nf_tables: remove redundant definition * nft_ct: fix maxattr initialization * nf_tables: fix invalid event type in nf_tables_getrule() * nf_tables: simplify nft_data_init() usage * nf_tables: build in more core modules * nf_tables: fix double lookup expression unregistation * nf_tables: move expression initialization to nf_tables_core.c * nf_tables: build in payload module * nf_tables: use NFPROTO constants * nf_tables: rename pid variables to portid * nf_tables: save 48 bits per rule * nf_tables: introduce chain rename * nf_tables: check for duplicate names on chain rename * nf_tables: remove ability to specify handles for new rules * nf_tables: return error for rule change request * nf_tables: return error for NLM_F_REPLACE without rule handle * nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification * nf_tables: fix NLM_F_MULTI usage in netlink notifications * nf_tables: include NLM_F_APPEND in rule dumps From Pablo Neira Ayuso: * nf_tables: fix stack overflow in nf_tables_newrule * nf_tables: nft_ct: fix compilation warning * nf_tables: nft_ct: fix crash with invalid packets * nft_log: group and qthreshold are 2^16 * nf_tables: nft_meta: fix socket uid,gid handling * nft_counter: allow to restore counters * nf_tables: fix module autoload * nf_tables: allow to remove all rules placed in one chain * nf_tables: use 64-bits rule handle instead of 16-bits * nf_tables: fix chain after rule deletion * nf_tables: improve deletion performance * nf_tables: add missing code in route chain type * nf_tables: rise maximum number of expressions from 12 to 128 * nf_tables: don't delete table if in use * nf_tables: fix basechain release From Tomasz Bursztyka: * nf_tables: Add support for changing users chain's name * nf_tables: Change chain's name to be fixed sized * nf_tables: Add support for replacing a rule by another one * nf_tables: Update uapi nftables netlink header documentation From Florian Westphal: * nft_log: group is u16, snaplen u32 From Phil Oester: * nf_tables: operational limit match Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net/netfilter/nft_hash.c')
-rw-r--r--net/netfilter/nft_hash.c348
1 files changed, 348 insertions, 0 deletions
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 00000000000000..67cc502881f193
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_hash {
+ struct hlist_head *hash;
+ unsigned int hsize;
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ u8 klen;
+ u8 dlen;
+ u16 flags;
+};
+
+struct nft_hash_elem {
+ struct hlist_node hnode;
+ struct nft_data key;
+ struct nft_data data[];
+};
+
+static u32 nft_hash_rnd __read_mostly;
+static bool nft_hash_rnd_initted __read_mostly;
+
+static unsigned int nft_hash_data(const struct nft_data *data,
+ unsigned int hsize, unsigned int len)
+{
+ unsigned int h;
+
+ // FIXME: can we reasonably guarantee the upper bits are fixed?
+ h = jhash2(data->data, len >> 2, nft_hash_rnd);
+ return ((u64)h * hsize) >> 32;
+}
+
+static void nft_hash_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_hash *priv = nft_expr_priv(expr);
+ const struct nft_hash_elem *elem;
+ const struct nft_data *key = &data[priv->sreg];
+ unsigned int h;
+
+ h = nft_hash_data(key, priv->hsize, priv->klen);
+ hlist_for_each_entry(elem, &priv->hash[h], hnode) {
+ if (nft_data_cmp(&elem->key, key, priv->klen))
+ continue;
+ if (priv->flags & NFT_HASH_MAP)
+ nft_data_copy(&data[priv->dreg], elem->data);
+ return;
+ }
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static void nft_hash_elem_destroy(const struct nft_expr *expr,
+ struct nft_hash_elem *elem)
+{
+ const struct nft_hash *priv = nft_expr_priv(expr);
+
+ nft_data_uninit(&elem->key, NFT_DATA_VALUE);
+ if (priv->flags & NFT_HASH_MAP)
+ nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg));
+ kfree(elem);
+}
+
+static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = {
+ [NFTA_HE_KEY] = { .type = NLA_NESTED },
+ [NFTA_HE_DATA] = { .type = NLA_NESTED },
+};
+
+static int nft_hash_elem_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr *nla,
+ struct nft_hash_elem **new)
+{
+ struct nft_hash *priv = nft_expr_priv(expr);
+ struct nlattr *tb[NFTA_HE_MAX + 1];
+ struct nft_hash_elem *elem;
+ struct nft_data_desc d1, d2;
+ unsigned int size;
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_HE_KEY] == NULL)
+ return -EINVAL;
+ size = sizeof(*elem);
+
+ if (priv->flags & NFT_HASH_MAP) {
+ if (tb[NFTA_HE_DATA] == NULL)
+ return -EINVAL;
+ size += sizeof(elem->data[0]);
+ } else {
+ if (tb[NFTA_HE_DATA] != NULL)
+ return -EINVAL;
+ }
+
+ elem = kzalloc(size, GFP_KERNEL);
+ if (elem == NULL)
+ return -ENOMEM;
+
+ err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]);
+ if (err < 0)
+ goto err1;
+ err = -EINVAL;
+ if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen)
+ goto err2;
+
+ if (tb[NFTA_HE_DATA] != NULL) {
+ err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]);
+ if (err < 0)
+ goto err2;
+ err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type);
+ if (err < 0)
+ goto err3;
+ }
+
+ *new = elem;
+ return 0;
+
+err3:
+ nft_data_uninit(elem->data, d2.type);
+err2:
+ nft_data_uninit(&elem->key, d1.type);
+err1:
+ kfree(elem);
+ return err;
+}
+
+static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr,
+ const struct nft_hash_elem *elem)
+
+{
+ const struct nft_hash *priv = nft_expr_priv(expr);
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key,
+ NFT_DATA_VALUE, priv->klen) < 0)
+ goto nla_put_failure;
+
+ if (priv->flags & NFT_HASH_MAP) {
+ if (nft_data_dump(skb, NFTA_HE_DATA, elem->data,
+ NFT_DATA_VALUE, priv->dlen) < 0)
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_hash *priv = nft_expr_priv(expr);
+ const struct hlist_node *next;
+ struct nft_hash_elem *elem;
+ unsigned int i;
+
+ for (i = 0; i < priv->hsize; i++) {
+ hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
+ hlist_del(&elem->hnode);
+ nft_hash_elem_destroy(expr, elem);
+ }
+ }
+ kfree(priv->hash);
+}
+
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+ [NFTA_HASH_FLAGS] = { .type = NLA_U32 },
+ [NFTA_HASH_SREG] = { .type = NLA_U32 },
+ [NFTA_HASH_DREG] = { .type = NLA_U32 },
+ [NFTA_HASH_KLEN] = { .type = NLA_U32 },
+ [NFTA_HASH_ELEMENTS] = { .type = NLA_NESTED },
+};
+
+static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_hash_elem *elem, *uninitialized_var(new);
+ const struct nlattr *nla;
+ unsigned int cnt, i;
+ unsigned int h;
+ int err, rem;
+
+ if (unlikely(!nft_hash_rnd_initted)) {
+ get_random_bytes(&nft_hash_rnd, 4);
+ nft_hash_rnd_initted = true;
+ }
+
+ if (tb[NFTA_HASH_SREG] == NULL ||
+ tb[NFTA_HASH_KLEN] == NULL ||
+ tb[NFTA_HASH_ELEMENTS] == NULL)
+ return -EINVAL;
+
+ if (tb[NFTA_HASH_FLAGS] != NULL) {
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS]));
+ if (priv->flags & ~NFT_HASH_MAP)
+ return -EINVAL;
+ }
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_HASH_DREG] != NULL) {
+ if (!(priv->flags & NFT_HASH_MAP))
+ return -EINVAL;
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ }
+
+ priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN]));
+ if (priv->klen == 0)
+ return -EINVAL;
+
+ cnt = 0;
+ nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) {
+ if (nla_type(nla) != NFTA_LIST_ELEM)
+ return -EINVAL;
+ cnt++;
+ }
+
+ /* Aim for a load factor of 0.75 */
+ cnt = cnt * 4 / 3;
+
+ priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
+ if (priv->hash == NULL)
+ return -ENOMEM;
+ priv->hsize = cnt;
+
+ for (i = 0; i < cnt; i++)
+ INIT_HLIST_HEAD(&priv->hash[i]);
+
+ err = -ENOMEM;
+ nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) {
+ err = nft_hash_elem_init(ctx, expr, nla, &new);
+ if (err < 0)
+ goto err1;
+
+ h = nft_hash_data(&new->key, priv->hsize, priv->klen);
+ hlist_for_each_entry(elem, &priv->hash[h], hnode) {
+ if (nft_data_cmp(&elem->key, &new->key, priv->klen))
+ continue;
+ nft_hash_elem_destroy(expr, new);
+ err = -EEXIST;
+ goto err1;
+ }
+ hlist_add_head(&new->hnode, &priv->hash[h]);
+ }
+ return 0;
+
+err1:
+ nft_hash_destroy(ctx, expr);
+ return err;
+}
+
+static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_hash *priv = nft_expr_priv(expr);
+ const struct nft_hash_elem *elem;
+ struct nlattr *list;
+ unsigned int i;
+
+ if (priv->flags)
+ if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (priv->flags & NFT_HASH_MAP)
+ if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen)))
+ goto nla_put_failure;
+
+ list = nla_nest_start(skb, NFTA_HASH_ELEMENTS);
+ if (list == NULL)
+ goto nla_put_failure;
+
+ for (i = 0; i < priv->hsize; i++) {
+ hlist_for_each_entry(elem, &priv->hash[i], hnode) {
+ if (nft_hash_elem_dump(skb, expr, elem) < 0)
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(skb, list);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_ops nft_hash_ops __read_mostly = {
+ .name = "hash",
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+ .owner = THIS_MODULE,
+ .eval = nft_hash_eval,
+ .init = nft_hash_init,
+ .destroy = nft_hash_destroy,
+ .dump = nft_hash_dump,
+ .policy = nft_hash_policy,
+ .maxattr = NFTA_HASH_MAX,
+};
+
+static int __init nft_hash_module_init(void)
+{
+ return nft_register_expr(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+ nft_unregister_expr(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("hash");