diff options
author | Martin KaFai Lau <martin.lau@kernel.org> | 2024-03-21 10:28:04 -0700 |
---|---|---|
committer | Martin KaFai Lau <martin.lau@kernel.org> | 2024-04-16 15:05:23 -0700 |
commit | 1e76506c01368d3b919af2f4edb64a8561bc4781 (patch) | |
tree | 7892d3dd0169b940aa71fa098745dfd774085b47 | |
parent | ad2d22b617b7c0ca2cff4da6dc063183822484bb (diff) | |
download | bpf-next-tw_netbw.tar.gz |
selftests/bpf: Adapt hbm to selfteststw_netbw
* Removed the (*128) optimization, easier to configure limit it Gbps
* Removed the linear ECN hehavior. A const configured % mark instead.
* Mostly removed CWR.
* Added tw original algo but stop taking the per flow tcp
pacing set in skb->tstamp as an input.
It is in tw_netbw_eg, mostly a copy-and-paste from tw
but adapt to some statics collection and variable naming
as the hbm one.
* Added a hbm.c binary to quickly configure a container.
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-rw-r--r-- | tools/testing/selftests/bpf/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/Makefile | 6 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/hbm.c | 124 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/hbm.h | 10 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/hbm_edt_kern.c | 341 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/hbm_kern.h | 237 |
6 files changed, 718 insertions, 1 deletions
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index f1aebabfb0176..0f87f6963651b 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -52,3 +52,4 @@ xdp_redirect_multi xdp_synproxy xdp_hw_metadata xdp_features +hbm
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index edc73f8f5aefb..34464a70811db 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -132,7 +132,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ - xdp_features bpf_test_no_cfi.ko + xdp_features bpf_test_no_cfi.ko hbm TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi @@ -712,6 +712,10 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ +$(OUTPUT)/hbm: hbm.c $(OUTPUT)/hbm_edt_kern.skel.h $(BPFOBJ) + $(call msg,CXX,,$@) + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) $(call msg,CC,,$@) diff --git a/tools/testing/selftests/bpf/hbm.c b/tools/testing/selftests/bpf/hbm.c new file mode 100644 index 0000000000000..6102b1c161f63 --- /dev/null +++ b/tools/testing/selftests/bpf/hbm.c @@ -0,0 +1,124 @@ +#include "test_progs.h" +#include "hbm_edt_kern.skel.h" + +#define BPFFS "/sys/fs/bpf/kafai" +#define CGRPFS "/sys/fs/cgroup" + +void test__fail() +{ + +} + +static int pin_maps(struct hbm_edt_kern *skel, int taskid) +{ + char path[128]; + int err; + + snprintf(path, sizeof(path), BPFFS "/%d/queue_stats", taskid); + unlink(path); + err = bpf_map__pin(skel->maps.queue_stats, path); + if (!ASSERT_OK(err, "bpf_map__pin(queue_stats)")) + return -1; + + snprintf(path, sizeof(path), BPFFS "/%d/queue_state", taskid); + unlink(path); + err = bpf_map__pin(skel->maps.queue_state, path); + if (!ASSERT_OK(err, "bpf_map__pin(queue_state)")) + return -1; + + snprintf(path, sizeof(path), BPFFS "/%d/rodata", taskid); + unlink(path); + err = bpf_map__pin(skel->maps.rodata, path); + if (!ASSERT_OK(err, "bpf_map__pin(rodata)")) + return -1; + + return 0; +} + +int main(int argc, char **argv) +{ + struct hbm_edt_kern *skel = NULL; + int err, taskid, cgrp_fd = -1; + struct bpf_link *link = NULL; + const char *tw_cgrp_path; + struct bpf_program *prog; + uint64_t mark_tenth_pct; + char cgrp_path[1024]; + char link_path[128]; + + if (argc != 11) { + printf("./hbm host_ip tw_cgrp_path svc_ip taskid prog rate_mbps burst_ns mark_ns mark_tenth_pct drop_ns\n"); + return -1; + } + + skel = hbm_edt_kern__open(); + if (!ASSERT_OK_PTR(skel, "hbm_edt_kern__open")) + goto done; + + err = inet_pton(AF_INET6, argv[1], &skel->rodata->host_addr); + if (!ASSERT_EQ(err, 1, "inet_pton(host_addr)")) + goto done; + + tw_cgrp_path = argv[2]; + + err = inet_pton(AF_INET6, argv[3], &skel->rodata->svc_addr); + if (!ASSERT_EQ(err, 1, "inet_pton(host_addr)")) + goto done; + + err = inet_pton(AF_INET6, "::1", &skel->rodata->lo_addr); + if (!ASSERT_EQ(err, 1, "inet_pton(host_addr)")) + goto done; + + taskid = atoi(argv[4]); + snprintf(link_path, sizeof(link_path), BPFFS "/%d/cgroup_link", taskid); + + skel->rodata->rate_bps = strtoull(argv[6], NULL, 10) * 1000 * 1000 ? : UINT64_MAX; + skel->rodata->burst_ns = strtoull(argv[7], NULL, 10) ? : UINT64_MAX; + skel->rodata->mark_ns = strtoull(argv[8], NULL, 10) ? : UINT64_MAX; + mark_tenth_pct = strtoull(argv[9], NULL, 10); + if (!ASSERT_LT(mark_tenth_pct, 1000, "mark_tenth_pct")) + goto done; + skel->rodata->mark_rand = (UINT_MAX / 1000) * (1000 - mark_tenth_pct); + skel->rodata->drop_ns = strtoull(argv[10], NULL, 10) ? : UINT64_MAX; + + printf("rate_bps %lu burst_ns %lu mark_ns %lu drop_ns %lu\n", + skel->rodata->rate_bps, skel->rodata->burst_ns, + skel->rodata->mark_ns, skel->rodata->drop_ns); + + err = hbm_edt_kern__load(skel); + if (!ASSERT_OK(err, "hbm_edt_kern__load")) + goto done; + + if (!strcmp(argv[5], "edt")) + prog = skel->progs.hbm_edt; + else + prog = skel->progs.tw_netbw_eg; + + link = bpf_link__open(link_path); + if (link) { + err = bpf_link__update_program(link, prog); + ASSERT_OK(err, "bpf_link__update_program"); + } else { + snprintf(cgrp_path, sizeof(cgrp_path), CGRPFS "%s", tw_cgrp_path); + cgrp_fd = open(cgrp_path, O_RDONLY); + if (!ASSERT_GE(cgrp_fd, 0, "open(cgroup)")) + goto done; + link = bpf_program__attach_cgroup(prog, cgrp_fd); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_cgroup")) + goto done; + err = bpf_link__pin(link, link_path); + if (!ASSERT_OK(err, "bpf_link__pin")) + goto done; + } + err = pin_maps(skel, taskid); + if (err) + goto done; + bpf_link__disconnect(link); + +done: + bpf_link__destroy(link); + hbm_edt_kern__destroy(skel); + if (cgrp_fd != -1) + close(cgrp_fd); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/hbm.h b/tools/testing/selftests/bpf/progs/hbm.h new file mode 100644 index 0000000000000..af4e6dea8c99d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/hbm.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2019 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Include file for Host Bandwidth Management (HBM) programs + */ diff --git a/tools/testing/selftests/bpf/progs/hbm_edt_kern.c b/tools/testing/selftests/bpf/progs/hbm_edt_kern.c new file mode 100644 index 0000000000000..0de11ae05fb57 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/hbm_edt_kern.c @@ -0,0 +1,341 @@ +#include "hbm_kern.h" +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> + +#define U32_MAX 4294967295U +#define U64_MAX 18446744073709551615ULL + +volatile const struct in6_addr svc_addr; +volatile const struct in6_addr host_addr; +volatile const struct in6_addr lo_addr; + +volatile const s64 burst_ns = 10 * NSEC_PER_MSEC; /* 10ms */ +volatile const u64 mark_ns = 100 * NSEC_PER_MSEC; /* 100ms */ +volatile const u64 drop_ns = 2 * NSEC_PER_SEC; /* 2s */ +volatile const u64 rate_bps = 12500UL * 1000 * 1000; +volatile const u32 mark_rand = (U32_MAX / 100) * 99; /* 1% ECN */ + +#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ + a.s6_addr32[1] == b.s6_addr32[1] && \ + a.s6_addr32[2] == b.s6_addr32[2] && \ + a.s6_addr32[3] == b.s6_addr32[3]) + +static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) +{ + struct bpf_sock *sk; + struct bpf_tcp_sock *tp; + + sk = skb->sk; + if (sk) { + sk = bpf_sk_fullsock(sk); + if (sk) { + if (sk->protocol == IPPROTO_TCP) { + tp = bpf_tcp_sock(sk); + if (tp) { + pkti->cwnd = tp->snd_cwnd; + pkti->rtt = tp->srtt_us >> 3; + pkti->packets_out = tp->packets_out; + return 0; + } + } + } + } + pkti->cwnd = 0; + pkti->rtt = 0; + pkti->packets_out = 0; + return 1; +} + +static void hbm_get_pkt_info(struct __sk_buff *skb, + struct hbm_pkt_info *pkti) +{ + struct iphdr *iph = &pkti->iph; + struct ipv6hdr *ip6h = &pkti->ip6h; + struct tcphdr th; + __u32 ip_len; + + if (skb->protocol == bpf_htons(ETH_P_IPV6)) { + bpf_skb_load_bytes(skb, 0, ip6h, sizeof(*ip6h)); + pkti->ip_protocol = ip6h->nexthdr; + pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; + ip_len = sizeof(*ip6h); + } else if (skb->protocol == bpf_htons(ETH_P_IP)) { + bpf_skb_load_bytes(skb, 0, iph, sizeof(*iph)); + pkti->ip_protocol = iph->protocol; + pkti->ecn = iph->tos & INET_ECN_MASK; + ip_len = sizeof(*iph); + } + if (pkti->ip_protocol == IPPROTO_TCP) { + bpf_skb_load_bytes(skb, ip_len, &th, sizeof(th)); + if (skb->len <= ip_len + th.doff * 4) + pkti->tcp_ack = true; + get_tcp_info(skb, pkti); + } +} + +static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, struct __sk_buff *skb, + int local_port, + u64 now, u64 added_delay, u64 skb_delay, + int ret, struct hbm_pkt_info *pkti) +{ + qsp->burst_bytes += skb->len; + qsp->bytes += skb->len; + qsp->pkts += 1; + qsp->total_pkts += 1; + + if (!qsp->avg_tstamp) { + qsp->avg_tstamp = now; + } else if (now > qsp->avg_tstamp && now - qsp->avg_tstamp > 10 * NSEC_PER_SEC) { + qsp->avg_pkt_sz = qsp->bytes / qsp->pkts; + /* devide by ns => Gbps. (* 1000) to get Mbps. */ + qsp->avg_Mbps = BYTES_TO_BITS(qsp->bytes) * 1000 / (now - qsp->avg_tstamp); + qsp->avg_delay_ns = qsp->delay_ns / qsp->pkts_delayed; + qsp->avg_skb_delay_ns = qsp->skb_delay_ns / qsp->pkts_skb_delayed; + qsp->avg_cwnd = qsp->cwnd / qsp->pkts; + qsp->avg_rtt = qsp->rtt / qsp->pkts; + + qsp->delayed_pct = qsp->pkts_delayed * 100 / qsp->pkts; + qsp->ecn_pct = qsp->pkts_ecn * 100 / qsp->pkts; + qsp->ecn_err_pct = qsp->pkts_ecn_err * 100 / qsp->pkts; + + qsp->pkts = 0; + qsp->bytes = 0; + + qsp->max_burst_Mbps = 0; + qsp->max_burst_port = 0; + + qsp->max_pkt_sz = 0; + qsp->max_pkt_sz_port = 0; + + qsp->delay_ns = 0; + qsp->skb_delay_ns = 0; + qsp->pkts_delayed = 0; + qsp->pkts_skb_delayed = 0; + qsp->pkts_nodelay = 0; + qsp->pkts_ecn = 0; + qsp->pkts_ecn_err = 0; + qsp->pkts_dropped = 0; + + qsp->cwnd = 0; + qsp->rtt = 0; + + qsp->cg_ret[0] = qsp->cg_ret[1] = qsp->cg_ret[2] = qsp->cg_ret[3] = 0; + + qsp->avg_tstamp = now; + } + + if (skb->len > qsp->max_pkt_sz) { + qsp->max_pkt_sz = skb->len; + qsp->max_pkt_sz_port = local_port; + } + + if (now > qsp->burst_tstamp && now - qsp->burst_tstamp > NSEC_PER_MSEC) { + qsp->burst_Mbps = BYTES_TO_BITS(qsp->burst_bytes) * 1000 / (now - qsp->burst_tstamp); + qsp->burst_bytes = 0; + qsp->burst_tstamp = now; + if (qsp->burst_Mbps > qsp->max_burst_Mbps) { + qsp->max_burst_Mbps = qsp->burst_Mbps; + qsp->max_burst_port = local_port; + } + } + + if (added_delay) { + qsp->total_delay_ns += added_delay; + qsp->total_delay_pkts += 1; + qsp->delay_ns += added_delay; + qsp->pkts_delayed += 1; + } else { + qsp->pkts_nodelay += 1; + } + + if (skb_delay) { + qsp->skb_delay_ns += skb_delay; + qsp->pkts_skb_delayed += 1; + qsp->total_skb_delay_pkts += 1; + } + + if (ret == DROP || ret == DROP_CWR) + qsp->pkts_dropped += 1; + + qsp->cwnd += pkti->cwnd; + qsp->rtt += pkti->rtt; + + if (ret <= 4) + qsp->cg_ret[ret] += 1; +} + + +SEC("cgroup_skb/egress") +int hbm_edt(struct __sk_buff *skb) +{ + s64 added_delay = 0, delta = 0, delay_ns; + u64 now, send_tstamp, skb_tstamp; + struct hbm_queue_stats *qsp; + unsigned int zero = 0; + struct hbm_pkt_info pkti = {}; + struct hbm_vqueue *qdp; + int len = skb->len; + int ret = ALLOW; + struct sock *sk = (void *)skb->sk; + int local_port; + + if (!sk) + return ALLOW; + + sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + local_port = sk->sk_num; + + if (skb->ifindex == 1) + return ALLOW; + + hbm_get_pkt_info(skb, &pkti); + if (pkti.ip_protocol != IPPROTO_TCP || pkti.tcp_ack || + skb->protocol != bpf_htons(ETH_P_IPV6) || + !v6_equal(pkti.ip6h.saddr, svc_addr) || + v6_equal(pkti.ip6h.daddr, lo_addr) || + v6_equal(pkti.ip6h.daddr, host_addr)) + return ALLOW; + + qdp = bpf_map_lookup_elem(&queue_state, &zero); + if (!qdp) + return ALLOW; + + now = bpf_ktime_get_ns(); + + qsp = bpf_map_lookup_elem(&queue_stats, &zero); + if (!qsp) + return ALLOW; + + bpf_spin_lock(&qdp->lock); + if (!qdp->lasttime) + qdp->lasttime = now - burst_ns; + + delta = qdp->lasttime - now; + qsp->delta = delta; + if (delta <= -burst_ns) { + qdp->lasttime = now - burst_ns; + delta = -burst_ns; + } + send_tstamp = qdp->lasttime; + delay_ns = BYTES_TO_NS(len); + + skb_tstamp = skb->tstamp; + if (send_tstamp > now && send_tstamp > skb_tstamp) { + added_delay = send_tstamp - skb_tstamp; + skb->tstamp = send_tstamp; + } + + if (delta > 0 && delta > drop_ns) + ret = DROP; + + if (ret == ALLOW || ret == ALLOW_CWR) + qdp->lasttime += delay_ns; + + hbm_update_stats(qsp, skb, local_port, now, added_delay, 0, ret, &pkti); + + bpf_spin_unlock(&qdp->lock); + + if (delta > 0 && delta > mark_ns && bpf_get_prandom_u32() > mark_rand) { + if (bpf_skb_ecn_set_ce(skb)) { + __sync_add_and_fetch(&qsp->pkts_ecn, 1); + __sync_add_and_fetch(&qsp->total_ecn, 1); + } else { + __sync_add_and_fetch(&qsp->pkts_ecn_err, 1); + } + } + + return ret; +} + +SEC("cgroup_skb/egress") +int tw_netbw_eg(struct __sk_buff* skb) +{ + u64 delay_ns, now, skb_tstamp, send_tstamp = 0; + u64 added_delay = 0, skb_delay = 0; + struct hbm_pkt_info pkti = {}; + struct hbm_queue_stats *qsp; + struct hbm_vqueue *qdp; + int len = skb->len; + int ret = ALLOW; + int zero = 0; + struct sock *sk = (void *)skb->sk; + int local_port; + bool ecn = false; + + if (!sk) + return ALLOW; + + sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + local_port = sk->sk_num; + + qdp = bpf_map_lookup_elem(&queue_state, &zero); + if (!qdp) + return ALLOW; + + qsp = bpf_map_lookup_elem(&queue_stats, &zero); + if (!qsp) + return ALLOW; + + hbm_get_pkt_info(skb, &pkti); + if (pkti.ip_protocol != IPPROTO_TCP || pkti.tcp_ack || + skb->protocol != bpf_htons(ETH_P_IPV6) || + !v6_equal(pkti.ip6h.saddr, svc_addr) || + v6_equal(pkti.ip6h.daddr, lo_addr) || + v6_equal(pkti.ip6h.daddr, host_addr) || + skb->ifindex == 1) + return ALLOW; + + if (!qdp) + return ALLOW; + + skb_tstamp = skb->tstamp; + now = bpf_ktime_get_ns(); + if (skb_tstamp < now) + skb_tstamp = now; + + bpf_spin_lock(&qdp->lock); + + delay_ns = BYTES_TO_NS(len); + if (qdp->lasttime) + send_tstamp = qdp->lasttime + delay_ns; + else + send_tstamp = now; + + if (send_tstamp <= skb_tstamp) { + skb_delay = skb_tstamp - send_tstamp; + /* Do take the per flow tcp pacing as + * an input to decide the qdp->lasttime. + */ + /* send_tstamp = skb_tstamp; */ + goto unlock; + } + + added_delay = send_tstamp - skb_tstamp; + skb->tstamp = send_tstamp; + + if (send_tstamp - now > mark_ns) + ecn = true; + + if (send_tstamp - now > drop_ns) + ret = DROP; + +unlock: + hbm_update_stats(qsp, skb, local_port, now, added_delay, skb_delay, ret, &pkti); + if (ret == ALLOW || ret == ALLOW_CWR) + qdp->lasttime = send_tstamp; + + bpf_spin_unlock(&qdp->lock); + + (void)ecn; +/* + if (ecn) { + if (bpf_skb_ecn_set_ce(skb)) + __sync_add_and_fetch(&qsp->pkts_ecn, 1); + else + __sync_add_and_fetch(&qsp->pkts_ecn_err, 1); + } +*/ + return ALLOW; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/hbm_kern.h b/tools/testing/selftests/bpf/progs/hbm_kern.h new file mode 100644 index 0000000000000..467834cf4812d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/hbm_kern.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2019 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Include file for sample Host Bandwidth Manager (HBM) BPF programs + */ +#define KBUILD_MODNAME "foo" +#define iphdr iphdr_unused +#define ipv6hdr ipv6hdr_unused +#define tcphdr tcphdr_unused +#include "vmlinux.h" +#undef tcphdr +#undef ipv6hdr +#undef iphdr +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tracing_net.h" + +#define DROP (0U) +#define ALLOW (1U << 0) +#define CWR (1U << 1) +#define DROP_CWR (DROP | CWR) +#define ALLOW_CWR (ALLOW | CWR) +#define TCP_ECN_OK 1 + +#ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging +#undef bpf_printk +#define bpf_printk(fmt, ...) +#endif + +#define INITIAL_CREDIT_PACKETS 100 +#define MAX_BYTES_PER_PACKET 1500 +#define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) +#define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) +#define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) +#define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) +#define LARGE_PKT_THRESH 120 +#define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) +#define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) + +#define MSEC_PER_SEC 1000UL +#define NSEC_PER_USEC 1000UL +#define NSEC_PER_MSEC 1000000UL +#define NSEC_PER_SEC 1000000000UL + +#define BYTES_TO_BITS(bytes) (((u64)bytes) << 3) +#define BYTES_TO_NS(bytes) (BYTES_TO_BITS(bytes) * NSEC_PER_SEC / rate_bps); + +// Reserve 20us of queuing for small packets (less than 120 bytes) +#define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) +#define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) + +struct hbm_vqueue { + struct bpf_spin_lock lock; + /* 4 byte hole */ + __u64 lasttime; /* In ns */ + __u64 now; + int credit; /* In bytes */ + __u32 rate; /* In bytes per NS << 20 */ +}; + +struct hbm_queue_stats { + __u64 avg_Mbps; + __u64 avg_pkt_sz; + __u64 avg_delay_ns; + __u64 avg_skb_delay_ns; + __u64 burst_Mbps; + __u64 max_burst_Mbps; + __u64 max_pkt_sz; + __u64 avg_cwnd; + __u64 avg_rtt; + + __u64 delayed_pct; + __u64 ecn_pct; + __u64 ecn_err_pct; + __u64 nodelay_pct; + + __u64 max_burst_port; + __u64 max_pkt_sz_port; + + __u64 avg_tstamp; + __u64 burst_tstamp; + + __u64 burst_bytes; + __u64 burst_pkts; + + __s64 delta; + + __u64 bytes; + __u64 pkts; + + __u64 delay_ns; + __u64 pkts_delayed; + __u64 skb_delay_ns; + __u64 pkts_skb_delayed; + __u64 pkts_nodelay; + + __u64 pkts_dropped; + __u64 pkts_ecn; + __u64 pkts_ecn_err; + + __u64 total_delay_ns; + __u64 total_delay_pkts; + __u64 total_skb_delay_pkts; + __u64 total_pkts; + __u64 total_ecn; + + __u64 cwnd; + __u64 rtt; + + __u64 cg_ret[4]; +}; + +static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) +{ + return dividend / divisor; +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct hbm_vqueue); +} queue_state SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct hbm_queue_stats); +} queue_stats SEC(".maps"); + +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } + +#define __LITTLE_ENDIAN_BITFIELD +struct iphdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 ihl:4, + version:4; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:4, + ihl:4; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + __u8 ttl; + __u8 protocol; + __sum16 check; + __struct_group(/* no tag */, addrs, /* no attrs */, + __be32 saddr; + __be32 daddr; + ); + /*The options start here. */ +}; + +struct ipv6hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 priority:4, + version:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version:4, + priority:4; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __u8 flow_lbl[3]; + + __be16 payload_len; + __u8 nexthdr; + __u8 hop_limit; + + __struct_group(/* no tag */, addrs, /* no attrs */, + struct in6_addr saddr; + struct in6_addr daddr; + ); +}; + +struct tcphdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __be16 window; + __sum16 check; + __be16 urg_ptr; +}; +#undef __LITTLE_ENDIAN_BITFIELD + +struct hbm_pkt_info { + int cwnd; + int rtt; + int packets_out; + union { + struct iphdr iph; + struct ipv6hdr ip6h; + }; + short ecn; + u16 ip_protocol; + bool tcp_ack; +}; |