aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-03-29 12:06:38 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-03-29 13:59:31 +0200
commitdfabe04389e6d06a6e011a5b3b73e95a27dde43d (patch)
tree8305d3508b4bd47214e0e367721d20ae78eba303
parent1b28a539c558af6c45c473930e9c9272e37c4f22 (diff)
downloadbpf/redirect-peer.tar.gz
bpf: add redirect peer helperbpf/redirect-peer
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/uapi/linux/bpf.h3
-rw-r--r--net/core/dev.c33
-rw-r--r--net/core/filter.c61
4 files changed, 85 insertions, 14 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 913b1cc882cf0d..0336c556400630 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -98,6 +98,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
#define NET_XMIT_CN 0x02 /* congestion notification */
#define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */
+#define NET_ANOTHER 0x04
+
/* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
* indicates that the device will soon be dropping packets, or already drops
* some packets of the same priority; prompting us to send less aggressively. */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2a66769e58753f..106f32b314876e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -757,7 +757,8 @@ union bpf_attr {
FN(perf_prog_read_value), \
FN(getsockopt), \
FN(override_return), \
- FN(sock_ops_cb_flags_set),
+ FN(sock_ops_cb_flags_set), \
+ FN(redirect_peer),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/net/core/dev.c b/net/core/dev.c
index d8887cc38e7bb1..03950c610f87b5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3316,7 +3316,8 @@ EXPORT_SYMBOL(dev_loopback_xmit);
#ifdef CONFIG_NET_EGRESS
static struct sk_buff *
-sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev,
+ bool *another)
{
struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
@@ -3345,7 +3346,10 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return NULL;
case TC_ACT_REDIRECT:
/* No need to push/pop skb's mac_header here on egress! */
- skb_do_redirect(skb);
+ if (skb_do_redirect(skb) == NET_ANOTHER) {
+ *another = true;
+ break;
+ }
*ret = NET_XMIT_SUCCESS;
return NULL;
default:
@@ -3494,10 +3498,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
#ifdef CONFIG_NET_CLS_ACT
skb->tc_at_ingress = 0;
# ifdef CONFIG_NET_EGRESS
+another_round:
if (static_key_false(&egress_needed)) {
- skb = sch_handle_egress(skb, &rc, dev);
+ bool another = false;
+
+ skb = sch_handle_egress(skb, &rc, dev, &another);
if (!skb)
goto out;
+ if (another) {
+ dev = skb->dev;
+ goto another_round;
+ }
}
# endif
#endif
@@ -4262,7 +4273,7 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
static inline struct sk_buff *
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev)
+ struct net_device *orig_dev, bool *another)
{
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
@@ -4305,7 +4316,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
* redirecting to another netdev
*/
__skb_push(skb, skb->mac_len);
- skb_do_redirect(skb);
+ if (skb_do_redirect(skb) == NET_ANOTHER) {
+ __skb_pull(skb, skb->mac_len);
+ *another = true;
+ break;
+ }
return NULL;
default:
break;
@@ -4478,10 +4493,14 @@ another_round:
skip_taps:
#ifdef CONFIG_NET_INGRESS
if (static_key_false(&ingress_needed)) {
- skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
+ bool another = false;
+
+ skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
+ &another);
if (!skb)
goto out;
-
+ if (another)
+ goto another_round;
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
goto out;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 50c6fe28938784..647ec110887204 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1763,6 +1763,16 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
return __bpf_redirect_no_mac(skb, dev, flags);
}
+static int __bpf_switch_dev(struct sk_buff *skb, struct net_device *dev)
+{
+ bool xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
+
+ skb_scrub_packet(skb, xnet);
+ skb->dev = dev;
+
+ return NET_ANOTHER;
+}
+
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
struct net_device *dev;
@@ -1805,42 +1815,79 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
struct redirect_info {
u32 ifindex;
+ u32 peer_id;
u32 flags;
struct bpf_map *map;
struct bpf_map *map_to_flush;
unsigned long map_owner;
};
+#define BPF_F_DIRECT (1U << 1)
+
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
-BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
+static int bpf_set_redirect_info(u32 ifindex, u32 peer_id, u32 flags)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
- if (unlikely(flags & ~(BPF_F_INGRESS)))
- return TC_ACT_SHOT;
-
ri->ifindex = ifindex;
+ ri->peer_id = peer_id;
ri->flags = flags;
return TC_ACT_REDIRECT;
}
+BPF_CALL_3(bpf_redirect_peer, u32, ifindex, u32, peer_id, u64, flags)
+{
+ if (unlikely(flags))
+ return TC_ACT_SHOT;
+
+ return bpf_set_redirect_info(ifindex, peer_id, BPF_F_DIRECT);
+}
+
+BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
+{
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
+ return TC_ACT_SHOT;
+
+ return bpf_set_redirect_info(ifindex, 0, flags);
+}
+
int skb_do_redirect(struct sk_buff *skb)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ bool direct = ri->flags & BPF_F_DIRECT;
+ struct net *net = dev_net(skb->dev);
struct net_device *dev;
- dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+ if (direct) {
+ net = get_net_ns_by_id(net, ri->peer_id);
+ if (unlikely(!net)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ }
+
+ dev = dev_get_by_index_rcu(net, ri->ifindex);
ri->ifindex = 0;
if (unlikely(!dev)) {
kfree_skb(skb);
return -EINVAL;
}
- return __bpf_redirect(skb, dev, ri->flags);
+ return direct ? __bpf_switch_dev(skb, dev) :
+ __bpf_redirect(skb, dev, ri->flags);
}
+static const struct bpf_func_proto bpf_redirect_peer_proto = {
+ .func = bpf_redirect_peer,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto bpf_redirect_proto = {
.func = bpf_redirect,
.gpl_only = false,
@@ -3501,6 +3548,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return bpf_get_skb_set_tunnel_proto(func_id);
case BPF_FUNC_redirect:
return &bpf_redirect_proto;
+ case BPF_FUNC_redirect_peer:
+ return &bpf_redirect_peer_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
case BPF_FUNC_get_hash_recalc: