diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-29 12:06:38 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-29 13:59:31 +0200 |
commit | dfabe04389e6d06a6e011a5b3b73e95a27dde43d (patch) | |
tree | 8305d3508b4bd47214e0e367721d20ae78eba303 | |
parent | 1b28a539c558af6c45c473930e9c9272e37c4f22 (diff) | |
download | bpf/redirect-peer.tar.gz |
bpf: add redirect peer helperbpf/redirect-peer
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | include/linux/netdevice.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 3 | ||||
-rw-r--r-- | net/core/dev.c | 33 | ||||
-rw-r--r-- | net/core/filter.c | 61 |
4 files changed, 85 insertions, 14 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 913b1cc882cf0d..0336c556400630 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -98,6 +98,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, #define NET_XMIT_CN 0x02 /* congestion notification */ #define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ +#define NET_ANOTHER 0x04 + /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It * indicates that the device will soon be dropping packets, or already drops * some packets of the same priority; prompting us to send less aggressively. */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2a66769e58753f..106f32b314876e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -757,7 +757,8 @@ union bpf_attr { FN(perf_prog_read_value), \ FN(getsockopt), \ FN(override_return), \ - FN(sock_ops_cb_flags_set), + FN(sock_ops_cb_flags_set), \ + FN(redirect_peer), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/dev.c b/net/core/dev.c index d8887cc38e7bb1..03950c610f87b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3316,7 +3316,8 @@ EXPORT_SYMBOL(dev_loopback_xmit); #ifdef CONFIG_NET_EGRESS static struct sk_buff * -sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) +sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev, + bool *another) { struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); struct tcf_result cl_res; @@ -3345,7 +3346,10 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return NULL; case TC_ACT_REDIRECT: /* No need to push/pop skb's mac_header here on egress! */ - skb_do_redirect(skb); + if (skb_do_redirect(skb) == NET_ANOTHER) { + *another = true; + break; + } *ret = NET_XMIT_SUCCESS; return NULL; default: @@ -3494,10 +3498,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) #ifdef CONFIG_NET_CLS_ACT skb->tc_at_ingress = 0; # ifdef CONFIG_NET_EGRESS +another_round: if (static_key_false(&egress_needed)) { - skb = sch_handle_egress(skb, &rc, dev); + bool another = false; + + skb = sch_handle_egress(skb, &rc, dev, &another); if (!skb) goto out; + if (another) { + dev = skb->dev; + goto another_round; + } } # endif #endif @@ -4262,7 +4273,7 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); static inline struct sk_buff * sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev) + struct net_device *orig_dev, bool *another) { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); @@ -4305,7 +4316,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, * redirecting to another netdev */ __skb_push(skb, skb->mac_len); - skb_do_redirect(skb); + if (skb_do_redirect(skb) == NET_ANOTHER) { + __skb_pull(skb, skb->mac_len); + *another = true; + break; + } return NULL; default: break; @@ -4478,10 +4493,14 @@ another_round: skip_taps: #ifdef CONFIG_NET_INGRESS if (static_key_false(&ingress_needed)) { - skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); + bool another = false; + + skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, + &another); if (!skb) goto out; - + if (another) + goto another_round; if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) goto out; } diff --git a/net/core/filter.c b/net/core/filter.c index 50c6fe28938784..647ec110887204 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1763,6 +1763,16 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, return __bpf_redirect_no_mac(skb, dev, flags); } +static int __bpf_switch_dev(struct sk_buff *skb, struct net_device *dev) +{ + bool xnet = !net_eq(dev_net(skb->dev), dev_net(dev)); + + skb_scrub_packet(skb, xnet); + skb->dev = dev; + + return NET_ANOTHER; +} + BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; @@ -1805,42 +1815,79 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { struct redirect_info { u32 ifindex; + u32 peer_id; u32 flags; struct bpf_map *map; struct bpf_map *map_to_flush; unsigned long map_owner; }; +#define BPF_F_DIRECT (1U << 1) + static DEFINE_PER_CPU(struct redirect_info, redirect_info); -BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) +static int bpf_set_redirect_info(u32 ifindex, u32 peer_id, u32 flags) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); - if (unlikely(flags & ~(BPF_F_INGRESS))) - return TC_ACT_SHOT; - ri->ifindex = ifindex; + ri->peer_id = peer_id; ri->flags = flags; return TC_ACT_REDIRECT; } +BPF_CALL_3(bpf_redirect_peer, u32, ifindex, u32, peer_id, u64, flags) +{ + if (unlikely(flags)) + return TC_ACT_SHOT; + + return bpf_set_redirect_info(ifindex, peer_id, BPF_F_DIRECT); +} + +BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) +{ + if (unlikely(flags & ~(BPF_F_INGRESS))) + return TC_ACT_SHOT; + + return bpf_set_redirect_info(ifindex, 0, flags); +} + int skb_do_redirect(struct sk_buff *skb) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); + bool direct = ri->flags & BPF_F_DIRECT; + struct net *net = dev_net(skb->dev); struct net_device *dev; - dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); + if (direct) { + net = get_net_ns_by_id(net, ri->peer_id); + if (unlikely(!net)) { + kfree_skb(skb); + return -EINVAL; + } + } + + dev = dev_get_by_index_rcu(net, ri->ifindex); ri->ifindex = 0; if (unlikely(!dev)) { kfree_skb(skb); return -EINVAL; } - return __bpf_redirect(skb, dev, ri->flags); + return direct ? __bpf_switch_dev(skb, dev) : + __bpf_redirect(skb, dev, ri->flags); } +static const struct bpf_func_proto bpf_redirect_peer_proto = { + .func = bpf_redirect_peer, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto bpf_redirect_proto = { .func = bpf_redirect, .gpl_only = false, @@ -3501,6 +3548,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return bpf_get_skb_set_tunnel_proto(func_id); case BPF_FUNC_redirect: return &bpf_redirect_proto; + case BPF_FUNC_redirect_peer: + return &bpf_redirect_peer_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; case BPF_FUNC_get_hash_recalc: |