netfilter: add cttimeout infrastructure for fine timeout tuning

This patch adds the infrastructure to add fine timeout tuning over nfnetlink. Now you can use the NFNL_SUBSYS_CTNETLINK_TIMEOUT subsystem to create/delete/dump timeout objects that contain some specific timeout policy for one flow. The follow up patches will allow you attach timeout policy object to conntrack via the CT target and the conntrack extension infrastructure. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
author: Pablo Neira Ayuso <pablo@netfilter.org> 2012-02-28 19:13:48 +0100
committer: Pablo Neira Ayuso <pablo@netfilter.org> 2012-03-07 17:41:22 +0100
commit: 50978462300f74dc48aea4a38471cb69bdf741a5 (patch)
tree: 0d63f721f996d4c4a4e37cd08df949e25d4c980e /net/netfilter/nfnetlink_cttimeout.c
parent: 2c8503f55fbdfbeff4164f133df804cf4d316290 (diff)
download: linux-50978462300f74dc48aea4a38471cb69bdf741a5.tar.gz
1 files changed, 398 insertions, 0 deletions
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
new file mode 100644
index 0000000000000..b860d52171898
--- /dev/null
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -0,0 +1,398 @@
+/*
+ * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2012 by Vyatta Inc. <http://www.vyatta.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/rculist.h>
+#include <linux/rculist_nulls.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/security.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
+
+struct ctnl_timeout {
+	struct list_head	head;
+	struct rcu_head		rcu_head;
+	atomic_t		refcnt;
+	char			name[CTNL_TIMEOUT_NAME_MAX];
+	__u16			l3num;
+	__u8			l4num;
+	char			data[0];
+};
+
+static LIST_HEAD(cttimeout_list);
+
+static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
+	[CTA_TIMEOUT_NAME]	= { .type = NLA_NUL_STRING },
+	[CTA_TIMEOUT_L3PROTO]	= { .type = NLA_U16 },
+	[CTA_TIMEOUT_L4PROTO]	= { .type = NLA_U8 },
+	[CTA_TIMEOUT_DATA]	= { .type = NLA_NESTED },
+};
+
+static int
+ctnl_timeout_parse_policy(struct ctnl_timeout *timeout,
+			       struct nf_conntrack_l4proto *l4proto,
+			       const struct nlattr *attr)
+{
+	int ret = 0;
+
+	if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
+		struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
+
+		nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
+				 attr, l4proto->ctnl_timeout.nla_policy);
+
+		ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, &timeout->data);
+	}
+	return ret;
+}
+
+static int
+cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
+		      const struct nlmsghdr *nlh,
+		      const struct nlattr * const cda[])
+{
+	__u16 l3num;
+	__u8 l4num;
+	struct nf_conntrack_l4proto *l4proto;
+	struct ctnl_timeout *timeout, *matching = NULL;
+	char *name;
+	int ret;
+
+	if (!cda[CTA_TIMEOUT_NAME] ||
+	    !cda[CTA_TIMEOUT_L3PROTO] ||
+	    !cda[CTA_TIMEOUT_L4PROTO] ||
+	    !cda[CTA_TIMEOUT_DATA])
+		return -EINVAL;
+
+	name = nla_data(cda[CTA_TIMEOUT_NAME]);
+	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
+	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
+
+	list_for_each_entry(timeout, &cttimeout_list, head) {
+		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
+			continue;
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+
+		matching = timeout;
+		break;
+	}
+
+	l4proto = __nf_ct_l4proto_find(l3num, l4num);
+
+	/* This protocol is not supportted, skip. */
+	if (l4proto->l4proto != l4num)
+		return -EOPNOTSUPP;
+
+	if (matching) {
+		if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+			/* You cannot replace one timeout policy by another of
+			 * different kind, sorry.
+			 */
+			if (matching->l3num != l3num ||
+			    matching->l4num != l4num)
+				return -EINVAL;
+
+			ret = ctnl_timeout_parse_policy(matching, l4proto,
+							cda[CTA_TIMEOUT_DATA]);
+			return ret;
+		}
+		return -EBUSY;
+	}
+
+	timeout = kzalloc(sizeof(struct ctnl_timeout) +
+			  l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
+	if (timeout == NULL)
+		return -ENOMEM;
+
+	ret = ctnl_timeout_parse_policy(timeout, l4proto,
+					cda[CTA_TIMEOUT_DATA]);
+	if (ret < 0)
+		goto err;
+
+	strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
+	timeout->l3num = l3num;
+	timeout->l4num = l4num;
+	atomic_set(&timeout->refcnt, 1);
+	list_add_tail_rcu(&timeout->head, &cttimeout_list);
+
+	return 0;
+err:
+	kfree(timeout);
+	return ret;
+}
+
+static int
+ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+		       int event, struct ctnl_timeout *timeout)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	unsigned int flags = pid ? NLM_F_MULTI : 0;
+	struct nf_conntrack_l4proto *l4proto;
+
+	event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	if (nlh == NULL)
+		goto nlmsg_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family = AF_UNSPEC;
+	nfmsg->version = NFNETLINK_V0;
+	nfmsg->res_id = 0;
+
+	NLA_PUT_STRING(skb, CTA_TIMEOUT_NAME, timeout->name);
+	NLA_PUT_BE16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num));
+	NLA_PUT_U8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4num);
+	NLA_PUT_BE32(skb, CTA_TIMEOUT_USE,
+			htonl(atomic_read(&timeout->refcnt)));
+
+	l4proto = __nf_ct_l4proto_find(timeout->l3num, timeout->l4num);
+
+	/* If the timeout object does not match the layer 4 protocol tracker,
+	 * then skip dumping the data part since we don't know how to
+	 * interpret it. This may happen for UPDlite, SCTP and DCCP since
+	 * you can unload the module.
+	 */
+	if (timeout->l4num != l4proto->l4proto)
+		goto out;
+
+	if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
+		struct nlattr *nest_parms;
+		int ret;
+
+		nest_parms = nla_nest_start(skb,
+					    CTA_TIMEOUT_DATA | NLA_F_NESTED);
+		if (!nest_parms)
+			goto nla_put_failure;
+
+		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
+		if (ret < 0)
+			goto nla_put_failure;
+
+		nla_nest_end(skb, nest_parms);
+	}
+out:
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ctnl_timeout *cur, *last;
+
+	if (cb->args[2])
+		return 0;
+
+	last = (struct ctnl_timeout *)cb->args[1];
+	if (cb->args[1])
+		cb->args[1] = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &cttimeout_list, head) {
+		if (last && cur != last)
+			continue;
+
+		if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid,
+					   cb->nlh->nlmsg_seq,
+					   NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+					   IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) {
+			cb->args[1] = (unsigned long)cur;
+			break;
+		}
+	}
+	if (!cb->args[1])
+		cb->args[2] = 1;
+	rcu_read_unlock();
+	return skb->len;
+}
+
+static int
+cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
+		      const struct nlmsghdr *nlh,
+		      const struct nlattr * const cda[])
+{
+	int ret = -ENOENT;
+	char *name;
+	struct ctnl_timeout *cur;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = ctnl_timeout_dump,
+		};
+		return netlink_dump_start(ctnl, skb, nlh, &c);
+	}
+
+	if (!cda[CTA_TIMEOUT_NAME])
+		return -EINVAL;
+	name = nla_data(cda[CTA_TIMEOUT_NAME]);
+
+	list_for_each_entry(cur, &cttimeout_list, head) {
+		struct sk_buff *skb2;
+
+		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
+			continue;
+
+		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (skb2 == NULL) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid,
+					     nlh->nlmsg_seq,
+					     NFNL_MSG_TYPE(nlh->nlmsg_type),
+					     IPCTNL_MSG_TIMEOUT_NEW, cur);
+		if (ret <= 0) {
+			kfree_skb(skb2);
+			break;
+		}
+		ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid,
+					MSG_DONTWAIT);
+		if (ret > 0)
+			ret = 0;
+
+		/* this avoids a loop in nfnetlink. */
+		return ret == -EAGAIN ? -ENOBUFS : ret;
+	}
+	return ret;
+}
+
+/* try to delete object, fail if it is still in use. */
+static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
+{
+	int ret = 0;
+
+	/* we want to avoid races with nf_ct_timeout_find_get. */
+	if (atomic_dec_and_test(&timeout->refcnt)) {
+		/* We are protected by nfnl mutex. */
+		list_del_rcu(&timeout->head);
+		kfree_rcu(timeout, rcu_head);
+	} else {
+		/* still in use, restore reference counter. */
+		atomic_inc(&timeout->refcnt);
+		ret = -EBUSY;
+	}
+	return ret;
+}
+
+static int
+cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
+		      const struct nlmsghdr *nlh,
+		      const struct nlattr * const cda[])
+{
+	char *name;
+	struct ctnl_timeout *cur;
+	int ret = -ENOENT;
+
+	if (!cda[CTA_TIMEOUT_NAME]) {
+		list_for_each_entry(cur, &cttimeout_list, head)
+			ctnl_timeout_try_del(cur);
+
+		return 0;
+	}
+	name = nla_data(cda[CTA_TIMEOUT_NAME]);
+
+	list_for_each_entry(cur, &cttimeout_list, head) {
+		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
+			continue;
+
+		ret = ctnl_timeout_try_del(cur);
+		if (ret < 0)
+			return ret;
+
+		break;
+	}
+	return ret;
+}
+
+static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
+	[IPCTNL_MSG_TIMEOUT_NEW]	= { .call = cttimeout_new_timeout,
+					    .attr_count = CTA_TIMEOUT_MAX,
+					    .policy = cttimeout_nla_policy },
+	[IPCTNL_MSG_TIMEOUT_GET]	= { .call = cttimeout_get_timeout,
+					    .attr_count = CTA_TIMEOUT_MAX,
+					    .policy = cttimeout_nla_policy },
+	[IPCTNL_MSG_TIMEOUT_DELETE]	= { .call = cttimeout_del_timeout,
+					    .attr_count = CTA_TIMEOUT_MAX,
+					    .policy = cttimeout_nla_policy },
+};
+
+static const struct nfnetlink_subsystem cttimeout_subsys = {
+	.name				= "conntrack_timeout",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK_TIMEOUT,
+	.cb_count			= IPCTNL_MSG_TIMEOUT_MAX,
+	.cb				= cttimeout_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
+
+static int __init cttimeout_init(void)
+{
+	int ret;
+
+	ret = nfnetlink_subsys_register(&cttimeout_subsys);
+	if (ret < 0) {
+		pr_err("cttimeout_init: cannot register cttimeout with "
+			"nfnetlink.\n");
+		goto err_out;
+	}
+	return 0;
+
+err_out:
+	return ret;
+}
+
+static void __exit cttimeout_exit(void)
+{
+	struct ctnl_timeout *cur, *tmp;
+
+	pr_info("cttimeout: unregistering from nfnetlink.\n");
+
+	nfnetlink_subsys_unregister(&cttimeout_subsys);
+	list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
+		list_del_rcu(&cur->head);
+		/* We are sure that our objects have no clients at this point,
+		 * it's safe to release them all without checking refcnt.
+		 */
+		kfree_rcu(cur, rcu_head);
+	}
+}
+
+module_init(cttimeout_init);
+module_exit(cttimeout_exit);
author	Pablo Neira Ayuso <pablo@netfilter.org>	2012-02-28 19:13:48 +0100
committer	Pablo Neira Ayuso <pablo@netfilter.org>	2012-03-07 17:41:22 +0100
commit	50978462300f74dc48aea4a38471cb69bdf741a5 (patch)
tree	0d63f721f996d4c4a4e37cd08df949e25d4c980e /net/netfilter/nfnetlink_cttimeout.c
parent	2c8503f55fbdfbeff4164f133df804cf4d316290 (diff)
download	linux-50978462300f74dc48aea4a38471cb69bdf741a5.tar.gz