aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2024-03-21 10:28:04 -0700
committerMartin KaFai Lau <martin.lau@kernel.org>2024-04-16 15:05:23 -0700
commit1e76506c01368d3b919af2f4edb64a8561bc4781 (patch)
tree7892d3dd0169b940aa71fa098745dfd774085b47
parentad2d22b617b7c0ca2cff4da6dc063183822484bb (diff)
downloadbpf-next-tw_netbw.tar.gz
selftests/bpf: Adapt hbm to selfteststw_netbw
* Removed the (*128) optimization, easier to configure limit it Gbps * Removed the linear ECN hehavior. A const configured % mark instead. * Mostly removed CWR. * Added tw original algo but stop taking the per flow tcp pacing set in skb->tstamp as an input. It is in tw_netbw_eg, mostly a copy-and-paste from tw but adapt to some statics collection and variable naming as the hbm one. * Added a hbm.c binary to quickly configure a container. Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/hbm.c124
-rw-r--r--tools/testing/selftests/bpf/progs/hbm.h10
-rw-r--r--tools/testing/selftests/bpf/progs/hbm_edt_kern.c341
-rw-r--r--tools/testing/selftests/bpf/progs/hbm_kern.h237
6 files changed, 718 insertions, 1 deletions
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index f1aebabfb0176..0f87f6963651b 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -52,3 +52,4 @@ xdp_redirect_multi
xdp_synproxy
xdp_hw_metadata
xdp_features
+hbm \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index edc73f8f5aefb..34464a70811db 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -132,7 +132,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
- xdp_features bpf_test_no_cfi.ko
+ xdp_features bpf_test_no_cfi.ko hbm
TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
@@ -712,6 +712,10 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
$(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
+$(OUTPUT)/hbm: hbm.c $(OUTPUT)/hbm_edt_kern.skel.h $(BPFOBJ)
+ $(call msg,CXX,,$@)
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
# Benchmark runner
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
$(call msg,CC,,$@)
diff --git a/tools/testing/selftests/bpf/hbm.c b/tools/testing/selftests/bpf/hbm.c
new file mode 100644
index 0000000000000..6102b1c161f63
--- /dev/null
+++ b/tools/testing/selftests/bpf/hbm.c
@@ -0,0 +1,124 @@
+#include "test_progs.h"
+#include "hbm_edt_kern.skel.h"
+
+#define BPFFS "/sys/fs/bpf/kafai"
+#define CGRPFS "/sys/fs/cgroup"
+
+void test__fail()
+{
+
+}
+
+static int pin_maps(struct hbm_edt_kern *skel, int taskid)
+{
+ char path[128];
+ int err;
+
+ snprintf(path, sizeof(path), BPFFS "/%d/queue_stats", taskid);
+ unlink(path);
+ err = bpf_map__pin(skel->maps.queue_stats, path);
+ if (!ASSERT_OK(err, "bpf_map__pin(queue_stats)"))
+ return -1;
+
+ snprintf(path, sizeof(path), BPFFS "/%d/queue_state", taskid);
+ unlink(path);
+ err = bpf_map__pin(skel->maps.queue_state, path);
+ if (!ASSERT_OK(err, "bpf_map__pin(queue_state)"))
+ return -1;
+
+ snprintf(path, sizeof(path), BPFFS "/%d/rodata", taskid);
+ unlink(path);
+ err = bpf_map__pin(skel->maps.rodata, path);
+ if (!ASSERT_OK(err, "bpf_map__pin(rodata)"))
+ return -1;
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct hbm_edt_kern *skel = NULL;
+ int err, taskid, cgrp_fd = -1;
+ struct bpf_link *link = NULL;
+ const char *tw_cgrp_path;
+ struct bpf_program *prog;
+ uint64_t mark_tenth_pct;
+ char cgrp_path[1024];
+ char link_path[128];
+
+ if (argc != 11) {
+ printf("./hbm host_ip tw_cgrp_path svc_ip taskid prog rate_mbps burst_ns mark_ns mark_tenth_pct drop_ns\n");
+ return -1;
+ }
+
+ skel = hbm_edt_kern__open();
+ if (!ASSERT_OK_PTR(skel, "hbm_edt_kern__open"))
+ goto done;
+
+ err = inet_pton(AF_INET6, argv[1], &skel->rodata->host_addr);
+ if (!ASSERT_EQ(err, 1, "inet_pton(host_addr)"))
+ goto done;
+
+ tw_cgrp_path = argv[2];
+
+ err = inet_pton(AF_INET6, argv[3], &skel->rodata->svc_addr);
+ if (!ASSERT_EQ(err, 1, "inet_pton(host_addr)"))
+ goto done;
+
+ err = inet_pton(AF_INET6, "::1", &skel->rodata->lo_addr);
+ if (!ASSERT_EQ(err, 1, "inet_pton(host_addr)"))
+ goto done;
+
+ taskid = atoi(argv[4]);
+ snprintf(link_path, sizeof(link_path), BPFFS "/%d/cgroup_link", taskid);
+
+ skel->rodata->rate_bps = strtoull(argv[6], NULL, 10) * 1000 * 1000 ? : UINT64_MAX;
+ skel->rodata->burst_ns = strtoull(argv[7], NULL, 10) ? : UINT64_MAX;
+ skel->rodata->mark_ns = strtoull(argv[8], NULL, 10) ? : UINT64_MAX;
+ mark_tenth_pct = strtoull(argv[9], NULL, 10);
+ if (!ASSERT_LT(mark_tenth_pct, 1000, "mark_tenth_pct"))
+ goto done;
+ skel->rodata->mark_rand = (UINT_MAX / 1000) * (1000 - mark_tenth_pct);
+ skel->rodata->drop_ns = strtoull(argv[10], NULL, 10) ? : UINT64_MAX;
+
+ printf("rate_bps %lu burst_ns %lu mark_ns %lu drop_ns %lu\n",
+ skel->rodata->rate_bps, skel->rodata->burst_ns,
+ skel->rodata->mark_ns, skel->rodata->drop_ns);
+
+ err = hbm_edt_kern__load(skel);
+ if (!ASSERT_OK(err, "hbm_edt_kern__load"))
+ goto done;
+
+ if (!strcmp(argv[5], "edt"))
+ prog = skel->progs.hbm_edt;
+ else
+ prog = skel->progs.tw_netbw_eg;
+
+ link = bpf_link__open(link_path);
+ if (link) {
+ err = bpf_link__update_program(link, prog);
+ ASSERT_OK(err, "bpf_link__update_program");
+ } else {
+ snprintf(cgrp_path, sizeof(cgrp_path), CGRPFS "%s", tw_cgrp_path);
+ cgrp_fd = open(cgrp_path, O_RDONLY);
+ if (!ASSERT_GE(cgrp_fd, 0, "open(cgroup)"))
+ goto done;
+ link = bpf_program__attach_cgroup(prog, cgrp_fd);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_cgroup"))
+ goto done;
+ err = bpf_link__pin(link, link_path);
+ if (!ASSERT_OK(err, "bpf_link__pin"))
+ goto done;
+ }
+ err = pin_maps(skel, taskid);
+ if (err)
+ goto done;
+ bpf_link__disconnect(link);
+
+done:
+ bpf_link__destroy(link);
+ hbm_edt_kern__destroy(skel);
+ if (cgrp_fd != -1)
+ close(cgrp_fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/hbm.h b/tools/testing/selftests/bpf/progs/hbm.h
new file mode 100644
index 0000000000000..af4e6dea8c99d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/hbm.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Include file for Host Bandwidth Management (HBM) programs
+ */
diff --git a/tools/testing/selftests/bpf/progs/hbm_edt_kern.c b/tools/testing/selftests/bpf/progs/hbm_edt_kern.c
new file mode 100644
index 0000000000000..0de11ae05fb57
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/hbm_edt_kern.c
@@ -0,0 +1,341 @@
+#include "hbm_kern.h"
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#define U32_MAX 4294967295U
+#define U64_MAX 18446744073709551615ULL
+
+volatile const struct in6_addr svc_addr;
+volatile const struct in6_addr host_addr;
+volatile const struct in6_addr lo_addr;
+
+volatile const s64 burst_ns = 10 * NSEC_PER_MSEC; /* 10ms */
+volatile const u64 mark_ns = 100 * NSEC_PER_MSEC; /* 100ms */
+volatile const u64 drop_ns = 2 * NSEC_PER_SEC; /* 2s */
+volatile const u64 rate_bps = 12500UL * 1000 * 1000;
+volatile const u32 mark_rand = (U32_MAX / 100) * 99; /* 1% ECN */
+
+#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
+ a.s6_addr32[1] == b.s6_addr32[1] && \
+ a.s6_addr32[2] == b.s6_addr32[2] && \
+ a.s6_addr32[3] == b.s6_addr32[3])
+
+static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
+{
+ struct bpf_sock *sk;
+ struct bpf_tcp_sock *tp;
+
+ sk = skb->sk;
+ if (sk) {
+ sk = bpf_sk_fullsock(sk);
+ if (sk) {
+ if (sk->protocol == IPPROTO_TCP) {
+ tp = bpf_tcp_sock(sk);
+ if (tp) {
+ pkti->cwnd = tp->snd_cwnd;
+ pkti->rtt = tp->srtt_us >> 3;
+ pkti->packets_out = tp->packets_out;
+ return 0;
+ }
+ }
+ }
+ }
+ pkti->cwnd = 0;
+ pkti->rtt = 0;
+ pkti->packets_out = 0;
+ return 1;
+}
+
+static void hbm_get_pkt_info(struct __sk_buff *skb,
+ struct hbm_pkt_info *pkti)
+{
+ struct iphdr *iph = &pkti->iph;
+ struct ipv6hdr *ip6h = &pkti->ip6h;
+ struct tcphdr th;
+ __u32 ip_len;
+
+ if (skb->protocol == bpf_htons(ETH_P_IPV6)) {
+ bpf_skb_load_bytes(skb, 0, ip6h, sizeof(*ip6h));
+ pkti->ip_protocol = ip6h->nexthdr;
+ pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
+ ip_len = sizeof(*ip6h);
+ } else if (skb->protocol == bpf_htons(ETH_P_IP)) {
+ bpf_skb_load_bytes(skb, 0, iph, sizeof(*iph));
+ pkti->ip_protocol = iph->protocol;
+ pkti->ecn = iph->tos & INET_ECN_MASK;
+ ip_len = sizeof(*iph);
+ }
+ if (pkti->ip_protocol == IPPROTO_TCP) {
+ bpf_skb_load_bytes(skb, ip_len, &th, sizeof(th));
+ if (skb->len <= ip_len + th.doff * 4)
+ pkti->tcp_ack = true;
+ get_tcp_info(skb, pkti);
+ }
+}
+
+static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, struct __sk_buff *skb,
+ int local_port,
+ u64 now, u64 added_delay, u64 skb_delay,
+ int ret, struct hbm_pkt_info *pkti)
+{
+ qsp->burst_bytes += skb->len;
+ qsp->bytes += skb->len;
+ qsp->pkts += 1;
+ qsp->total_pkts += 1;
+
+ if (!qsp->avg_tstamp) {
+ qsp->avg_tstamp = now;
+ } else if (now > qsp->avg_tstamp && now - qsp->avg_tstamp > 10 * NSEC_PER_SEC) {
+ qsp->avg_pkt_sz = qsp->bytes / qsp->pkts;
+ /* devide by ns => Gbps. (* 1000) to get Mbps. */
+ qsp->avg_Mbps = BYTES_TO_BITS(qsp->bytes) * 1000 / (now - qsp->avg_tstamp);
+ qsp->avg_delay_ns = qsp->delay_ns / qsp->pkts_delayed;
+ qsp->avg_skb_delay_ns = qsp->skb_delay_ns / qsp->pkts_skb_delayed;
+ qsp->avg_cwnd = qsp->cwnd / qsp->pkts;
+ qsp->avg_rtt = qsp->rtt / qsp->pkts;
+
+ qsp->delayed_pct = qsp->pkts_delayed * 100 / qsp->pkts;
+ qsp->ecn_pct = qsp->pkts_ecn * 100 / qsp->pkts;
+ qsp->ecn_err_pct = qsp->pkts_ecn_err * 100 / qsp->pkts;
+
+ qsp->pkts = 0;
+ qsp->bytes = 0;
+
+ qsp->max_burst_Mbps = 0;
+ qsp->max_burst_port = 0;
+
+ qsp->max_pkt_sz = 0;
+ qsp->max_pkt_sz_port = 0;
+
+ qsp->delay_ns = 0;
+ qsp->skb_delay_ns = 0;
+ qsp->pkts_delayed = 0;
+ qsp->pkts_skb_delayed = 0;
+ qsp->pkts_nodelay = 0;
+ qsp->pkts_ecn = 0;
+ qsp->pkts_ecn_err = 0;
+ qsp->pkts_dropped = 0;
+
+ qsp->cwnd = 0;
+ qsp->rtt = 0;
+
+ qsp->cg_ret[0] = qsp->cg_ret[1] = qsp->cg_ret[2] = qsp->cg_ret[3] = 0;
+
+ qsp->avg_tstamp = now;
+ }
+
+ if (skb->len > qsp->max_pkt_sz) {
+ qsp->max_pkt_sz = skb->len;
+ qsp->max_pkt_sz_port = local_port;
+ }
+
+ if (now > qsp->burst_tstamp && now - qsp->burst_tstamp > NSEC_PER_MSEC) {
+ qsp->burst_Mbps = BYTES_TO_BITS(qsp->burst_bytes) * 1000 / (now - qsp->burst_tstamp);
+ qsp->burst_bytes = 0;
+ qsp->burst_tstamp = now;
+ if (qsp->burst_Mbps > qsp->max_burst_Mbps) {
+ qsp->max_burst_Mbps = qsp->burst_Mbps;
+ qsp->max_burst_port = local_port;
+ }
+ }
+
+ if (added_delay) {
+ qsp->total_delay_ns += added_delay;
+ qsp->total_delay_pkts += 1;
+ qsp->delay_ns += added_delay;
+ qsp->pkts_delayed += 1;
+ } else {
+ qsp->pkts_nodelay += 1;
+ }
+
+ if (skb_delay) {
+ qsp->skb_delay_ns += skb_delay;
+ qsp->pkts_skb_delayed += 1;
+ qsp->total_skb_delay_pkts += 1;
+ }
+
+ if (ret == DROP || ret == DROP_CWR)
+ qsp->pkts_dropped += 1;
+
+ qsp->cwnd += pkti->cwnd;
+ qsp->rtt += pkti->rtt;
+
+ if (ret <= 4)
+ qsp->cg_ret[ret] += 1;
+}
+
+
+SEC("cgroup_skb/egress")
+int hbm_edt(struct __sk_buff *skb)
+{
+ s64 added_delay = 0, delta = 0, delay_ns;
+ u64 now, send_tstamp, skb_tstamp;
+ struct hbm_queue_stats *qsp;
+ unsigned int zero = 0;
+ struct hbm_pkt_info pkti = {};
+ struct hbm_vqueue *qdp;
+ int len = skb->len;
+ int ret = ALLOW;
+ struct sock *sk = (void *)skb->sk;
+ int local_port;
+
+ if (!sk)
+ return ALLOW;
+
+ sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock));
+ local_port = sk->sk_num;
+
+ if (skb->ifindex == 1)
+ return ALLOW;
+
+ hbm_get_pkt_info(skb, &pkti);
+ if (pkti.ip_protocol != IPPROTO_TCP || pkti.tcp_ack ||
+ skb->protocol != bpf_htons(ETH_P_IPV6) ||
+ !v6_equal(pkti.ip6h.saddr, svc_addr) ||
+ v6_equal(pkti.ip6h.daddr, lo_addr) ||
+ v6_equal(pkti.ip6h.daddr, host_addr))
+ return ALLOW;
+
+ qdp = bpf_map_lookup_elem(&queue_state, &zero);
+ if (!qdp)
+ return ALLOW;
+
+ now = bpf_ktime_get_ns();
+
+ qsp = bpf_map_lookup_elem(&queue_stats, &zero);
+ if (!qsp)
+ return ALLOW;
+
+ bpf_spin_lock(&qdp->lock);
+ if (!qdp->lasttime)
+ qdp->lasttime = now - burst_ns;
+
+ delta = qdp->lasttime - now;
+ qsp->delta = delta;
+ if (delta <= -burst_ns) {
+ qdp->lasttime = now - burst_ns;
+ delta = -burst_ns;
+ }
+ send_tstamp = qdp->lasttime;
+ delay_ns = BYTES_TO_NS(len);
+
+ skb_tstamp = skb->tstamp;
+ if (send_tstamp > now && send_tstamp > skb_tstamp) {
+ added_delay = send_tstamp - skb_tstamp;
+ skb->tstamp = send_tstamp;
+ }
+
+ if (delta > 0 && delta > drop_ns)
+ ret = DROP;
+
+ if (ret == ALLOW || ret == ALLOW_CWR)
+ qdp->lasttime += delay_ns;
+
+ hbm_update_stats(qsp, skb, local_port, now, added_delay, 0, ret, &pkti);
+
+ bpf_spin_unlock(&qdp->lock);
+
+ if (delta > 0 && delta > mark_ns && bpf_get_prandom_u32() > mark_rand) {
+ if (bpf_skb_ecn_set_ce(skb)) {
+ __sync_add_and_fetch(&qsp->pkts_ecn, 1);
+ __sync_add_and_fetch(&qsp->total_ecn, 1);
+ } else {
+ __sync_add_and_fetch(&qsp->pkts_ecn_err, 1);
+ }
+ }
+
+ return ret;
+}
+
+SEC("cgroup_skb/egress")
+int tw_netbw_eg(struct __sk_buff* skb)
+{
+ u64 delay_ns, now, skb_tstamp, send_tstamp = 0;
+ u64 added_delay = 0, skb_delay = 0;
+ struct hbm_pkt_info pkti = {};
+ struct hbm_queue_stats *qsp;
+ struct hbm_vqueue *qdp;
+ int len = skb->len;
+ int ret = ALLOW;
+ int zero = 0;
+ struct sock *sk = (void *)skb->sk;
+ int local_port;
+ bool ecn = false;
+
+ if (!sk)
+ return ALLOW;
+
+ sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock));
+ local_port = sk->sk_num;
+
+ qdp = bpf_map_lookup_elem(&queue_state, &zero);
+ if (!qdp)
+ return ALLOW;
+
+ qsp = bpf_map_lookup_elem(&queue_stats, &zero);
+ if (!qsp)
+ return ALLOW;
+
+ hbm_get_pkt_info(skb, &pkti);
+ if (pkti.ip_protocol != IPPROTO_TCP || pkti.tcp_ack ||
+ skb->protocol != bpf_htons(ETH_P_IPV6) ||
+ !v6_equal(pkti.ip6h.saddr, svc_addr) ||
+ v6_equal(pkti.ip6h.daddr, lo_addr) ||
+ v6_equal(pkti.ip6h.daddr, host_addr) ||
+ skb->ifindex == 1)
+ return ALLOW;
+
+ if (!qdp)
+ return ALLOW;
+
+ skb_tstamp = skb->tstamp;
+ now = bpf_ktime_get_ns();
+ if (skb_tstamp < now)
+ skb_tstamp = now;
+
+ bpf_spin_lock(&qdp->lock);
+
+ delay_ns = BYTES_TO_NS(len);
+ if (qdp->lasttime)
+ send_tstamp = qdp->lasttime + delay_ns;
+ else
+ send_tstamp = now;
+
+ if (send_tstamp <= skb_tstamp) {
+ skb_delay = skb_tstamp - send_tstamp;
+ /* Do take the per flow tcp pacing as
+ * an input to decide the qdp->lasttime.
+ */
+ /* send_tstamp = skb_tstamp; */
+ goto unlock;
+ }
+
+ added_delay = send_tstamp - skb_tstamp;
+ skb->tstamp = send_tstamp;
+
+ if (send_tstamp - now > mark_ns)
+ ecn = true;
+
+ if (send_tstamp - now > drop_ns)
+ ret = DROP;
+
+unlock:
+ hbm_update_stats(qsp, skb, local_port, now, added_delay, skb_delay, ret, &pkti);
+ if (ret == ALLOW || ret == ALLOW_CWR)
+ qdp->lasttime = send_tstamp;
+
+ bpf_spin_unlock(&qdp->lock);
+
+ (void)ecn;
+/*
+ if (ecn) {
+ if (bpf_skb_ecn_set_ce(skb))
+ __sync_add_and_fetch(&qsp->pkts_ecn, 1);
+ else
+ __sync_add_and_fetch(&qsp->pkts_ecn_err, 1);
+ }
+*/
+ return ALLOW;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/hbm_kern.h b/tools/testing/selftests/bpf/progs/hbm_kern.h
new file mode 100644
index 0000000000000..467834cf4812d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/hbm_kern.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Include file for sample Host Bandwidth Manager (HBM) BPF programs
+ */
+#define KBUILD_MODNAME "foo"
+#define iphdr iphdr_unused
+#define ipv6hdr ipv6hdr_unused
+#define tcphdr tcphdr_unused
+#include "vmlinux.h"
+#undef tcphdr
+#undef ipv6hdr
+#undef iphdr
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tracing_net.h"
+
+#define DROP (0U)
+#define ALLOW (1U << 0)
+#define CWR (1U << 1)
+#define DROP_CWR (DROP | CWR)
+#define ALLOW_CWR (ALLOW | CWR)
+#define TCP_ECN_OK 1
+
+#ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
+#undef bpf_printk
+#define bpf_printk(fmt, ...)
+#endif
+
+#define INITIAL_CREDIT_PACKETS 100
+#define MAX_BYTES_PER_PACKET 1500
+#define MARK_THRESH (40 * MAX_BYTES_PER_PACKET)
+#define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET)
+#define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
+#define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH)
+#define LARGE_PKT_THRESH 120
+#define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET)
+#define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
+
+#define MSEC_PER_SEC 1000UL
+#define NSEC_PER_USEC 1000UL
+#define NSEC_PER_MSEC 1000000UL
+#define NSEC_PER_SEC 1000000000UL
+
+#define BYTES_TO_BITS(bytes) (((u64)bytes) << 3)
+#define BYTES_TO_NS(bytes) (BYTES_TO_BITS(bytes) * NSEC_PER_SEC / rate_bps);
+
+// Reserve 20us of queuing for small packets (less than 120 bytes)
+#define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
+#define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
+
+struct hbm_vqueue {
+ struct bpf_spin_lock lock;
+ /* 4 byte hole */
+ __u64 lasttime; /* In ns */
+ __u64 now;
+ int credit; /* In bytes */
+ __u32 rate; /* In bytes per NS << 20 */
+};
+
+struct hbm_queue_stats {
+ __u64 avg_Mbps;
+ __u64 avg_pkt_sz;
+ __u64 avg_delay_ns;
+ __u64 avg_skb_delay_ns;
+ __u64 burst_Mbps;
+ __u64 max_burst_Mbps;
+ __u64 max_pkt_sz;
+ __u64 avg_cwnd;
+ __u64 avg_rtt;
+
+ __u64 delayed_pct;
+ __u64 ecn_pct;
+ __u64 ecn_err_pct;
+ __u64 nodelay_pct;
+
+ __u64 max_burst_port;
+ __u64 max_pkt_sz_port;
+
+ __u64 avg_tstamp;
+ __u64 burst_tstamp;
+
+ __u64 burst_bytes;
+ __u64 burst_pkts;
+
+ __s64 delta;
+
+ __u64 bytes;
+ __u64 pkts;
+
+ __u64 delay_ns;
+ __u64 pkts_delayed;
+ __u64 skb_delay_ns;
+ __u64 pkts_skb_delayed;
+ __u64 pkts_nodelay;
+
+ __u64 pkts_dropped;
+ __u64 pkts_ecn;
+ __u64 pkts_ecn_err;
+
+ __u64 total_delay_ns;
+ __u64 total_delay_pkts;
+ __u64 total_skb_delay_pkts;
+ __u64 total_pkts;
+ __u64 total_ecn;
+
+ __u64 cwnd;
+ __u64 rtt;
+
+ __u64 cg_ret[4];
+};
+
+static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
+{
+ return dividend / divisor;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, struct hbm_vqueue);
+} queue_state SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, struct hbm_queue_stats);
+} queue_stats SEC(".maps");
+
+#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
+ union { \
+ struct { MEMBERS } ATTRS; \
+ struct TAG { MEMBERS } ATTRS NAME; \
+ }
+
+#define __LITTLE_ENDIAN_BITFIELD
+struct iphdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 ihl:4,
+ version:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u8 version:4,
+ ihl:4;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 tos;
+ __be16 tot_len;
+ __be16 id;
+ __be16 frag_off;
+ __u8 ttl;
+ __u8 protocol;
+ __sum16 check;
+ __struct_group(/* no tag */, addrs, /* no attrs */,
+ __be32 saddr;
+ __be32 daddr;
+ );
+ /*The options start here. */
+};
+
+struct ipv6hdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 priority:4,
+ version:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 version:4,
+ priority:4;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 flow_lbl[3];
+
+ __be16 payload_len;
+ __u8 nexthdr;
+ __u8 hop_limit;
+
+ __struct_group(/* no tag */, addrs, /* no attrs */,
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ );
+};
+
+struct tcphdr {
+ __be16 source;
+ __be16 dest;
+ __be32 seq;
+ __be32 ack_seq;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u16 res1:4,
+ doff:4,
+ fin:1,
+ syn:1,
+ rst:1,
+ psh:1,
+ ack:1,
+ urg:1,
+ ece:1,
+ cwr:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u16 doff:4,
+ res1:4,
+ cwr:1,
+ ece:1,
+ urg:1,
+ ack:1,
+ psh:1,
+ rst:1,
+ syn:1,
+ fin:1;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+ __be16 window;
+ __sum16 check;
+ __be16 urg_ptr;
+};
+#undef __LITTLE_ENDIAN_BITFIELD
+
+struct hbm_pkt_info {
+ int cwnd;
+ int rtt;
+ int packets_out;
+ union {
+ struct iphdr iph;
+ struct ipv6hdr ip6h;
+ };
+ short ecn;
+ u16 ip_protocol;
+ bool tcp_ack;
+};