aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Ahern <dsahern@kernel.org>2024-02-27 04:08:34 +0000
committerDavid Ahern <dsahern@kernel.org>2024-02-27 04:08:34 +0000
commit7c05edd3eceee354f0a2fca7c90f72971a0daccf (patch)
treec928cfb1135a821f74b1e0ec5e2f41d24e6e0796
parentd2f1c3c9a8a38493cdec9fb93534ccec76c48fe2 (diff)
parent50763d53310c1f95bd846ce9961d40e0e0e66376 (diff)
downloadiproute2-7c05edd3eceee354f0a2fca7c90f72971a0daccf.tar.gz
Merge branch 'ss-socket-local-storage' into next
Quentin Deslandes says: ==================== BPF allows programs to store socket-specific data using BPF_MAP_TYPE_SK_STORAGE maps. The data is attached to the socket itself, and Martin added INET_DIAG_REQ_SK_BPF_STORAGES, so it can be fetched using the INET_DIAG mechanism. Currently, ss doesn't request the socket-local data, this patch aims to fix this. The first patch requests the socket-local data for the requested map ID (--bpf-map-id=) or all the maps (--bpf-maps). It then prints the map_id in COL_EXT. Patch #2 uses libbpf and BTF to pretty print the map's content, like `bpftool map dump` would do. Patch #3 updates ss' man page to explain new options. While I think it makes sense for ss to provide the socket-local storage content for the sockets, it's difficult to conciliate the column-based output of ss and having readable socket-local data. Hence, the socket-local data is printed in a readable fashion over multiple lines under its socket statistics, independently of the column-based approach. Here is an example of ss' output with --bpf-maps: [...] ESTAB 340116 0 [...] map_id: 114 [ (struct my_sk_storage){ .field_hh = (char)3, (union){ .a = (int)17, .b = (int)17, }, } ] Changed this series to an RFC as the merging window for net-next is closed. Changes from v8: * Remove usage of libbpf_bpf_map_type_str() which requires libbpf-1.0+ and provide very little added value (David). * Use ENABLE_BPF_SKSTORAGE_SUPPORT to gate the BPF socket-local storage support, instead of HAVE_LIBBPF. iproute2 depends on libbpf-0.1, but this change needs libbpf-0.5+. If the requirements are not met, ss can still be compiled and used without BPF socket-local storage support, but a warning will be printed at compile time. Changes from v7: * Fix comment format and checkpatch warnings (Stephen, David). * Replaced Co-authored-by with Co-developed-by + Signed-off-by for Martin's contribution on patch #1 to follow checkpatch requirements, with Martin's approval. Changes from v6: * Remove column dedicated to BPF socket-local storage (COL_SKSTOR), use COL_EXT instead (Matthieu). Changes from v5: * Add support for --oneline when printing socket-local data. * Use \t to indent instead of " " to be consistent with other columns. * Removed Martin's ack on patch #2 due to amount of lines changed. Changes from v4: * Fix return code for 2 calls. * Fix issue when inet_show_netlink() retries a request. * BPF dump object is created in bpf_map_opts_load_info(). Changes from v3: * Minor refactoring to reduce number of HAVE_LIBBF usage. * Update ss' man page. * btf_dump structure created to print the socket-local data is cached in bpf_map_opts. Creation of the btf_dump structure is performed if needed, before printing the data. * If a map can't be pretty-printed, print its ID and a message instead of skipping it. * If show_all=true, send an empty message to the kernel to retrieve all the maps (as Martin suggested). Changes from v2: * bpf_map_opts_is_enabled is not inline anymore. * Add more #ifdef HAVE_LIBBPF to prevent compilation error if libbpf support is disabled. * Fix erroneous usage of args instead of _args in vout(). * Add missing btf__free() and close(fd). Changes from v1: * Remove the first patch from the series (fix) and submit it separately. * Remove double allocation of struct rtattr. * Close BPF map FDs on exit. * If bpf_map_get_fd_by_id() fails with ENOENT, print an error message and continue to the next map ID. * Fix typo in new command line option documentation. * Only use bpf_map_info.btf_value_type_id and ignore bpf_map_info.btf_vmlinux_value_type_id (unused for socket-local storage). * Use btf_dump__dump_type_data() instead of manually using BTF to pretty-print socket-local storage data. This change alone divides the size of the patch series by 2. ==================== Signed-off-by: David Ahern <dsahern@kernel.org>
-rw-r--r--man/man8/ss.86
-rw-r--r--misc/ss.c410
2 files changed, 407 insertions, 9 deletions
diff --git a/man/man8/ss.8 b/man/man8/ss.8
index b014cde1d..e23af826f 100644
--- a/man/man8/ss.8
+++ b/man/man8/ss.8
@@ -426,6 +426,12 @@ to FILE after applying filters. If FILE is - stdout is used.
Read filter information from FILE. Each line of FILE is interpreted
like single command line option. If FILE is - stdin is used.
.TP
+.B \-\-bpf-maps
+Pretty-print all the BPF socket-local data entries for each socket.
+.TP
+.B \-\-bpf-map-id=MAP_ID
+Pretty-print the BPF socket-local data entries for the requested map ID. Can be used more than once.
+.TP
.B FILTER := [ state STATE-FILTER ] [ EXPRESSION ]
Please take a look at the official documentation for details regarding filters.
diff --git a/misc/ss.c b/misc/ss.c
index 72a841be8..3ebac1323 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -51,6 +51,26 @@
#include <linux/tls.h>
#include <linux/mptcp.h>
+#ifdef HAVE_LIBBPF
+/* If libbpf is new enough (0.5+), support for pretty-printing BPF socket-local
+ * storage is enabled, otherwise we emit a warning and disable it.
+ * ENABLE_BPF_SKSTORAGE_SUPPORT is only used to gate the socket-local storage
+ * feature, so this wouldn't prevent any feature relying on HAVE_LIBBPF to be
+ * usable.
+ */
+#define ENABLE_BPF_SKSTORAGE_SUPPORT
+
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+
+#if (LIBBPF_MAJOR_VERSION == 0) && (LIBBPF_MINOR_VERSION < 5)
+#warning "libbpf version 0.5 or later is required, disabling BPF socket-local storage support"
+#undef ENABLE_BPF_SKSTORAGE_SUPPORT
+#endif
+#endif
+
#if HAVE_RPC
#include <rpc/rpc.h>
#include <rpc/xdr.h>
@@ -1035,11 +1055,10 @@ static int buf_update(int len)
}
/* Append content to buffer as part of the current field */
-__attribute__((format(printf, 1, 2)))
-static void out(const char *fmt, ...)
+static void vout(const char *fmt, va_list args)
{
struct column *f = current_field;
- va_list args;
+ va_list _args;
char *pos;
int len;
@@ -1050,18 +1069,27 @@ static void out(const char *fmt, ...)
buffer.head = buf_chunk_new();
again: /* Append to buffer: if we have a new chunk, print again */
+ va_copy(_args, args);
pos = buffer.cur->data + buffer.cur->len;
- va_start(args, fmt);
/* Limit to tail room. If we hit the limit, buf_update() will tell us */
- len = vsnprintf(pos, buf_chunk_avail(buffer.tail), fmt, args);
- va_end(args);
+ len = vsnprintf(pos, buf_chunk_avail(buffer.tail), fmt, _args);
if (buf_update(len))
goto again;
}
+__attribute__((format(printf, 1, 2)))
+static void out(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vout(fmt, args);
+ va_end(args);
+}
+
static int print_left_spacing(struct column *f, int stored, int printed)
{
int s;
@@ -3384,6 +3412,318 @@ static void parse_diag_msg(struct nlmsghdr *nlh, struct sockstat *s)
memcpy(s->remote.data, r->id.idiag_dst, s->local.bytelen);
}
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+
+#define MAX_NR_BPF_MAP_ID_OPTS 32
+
+struct btf;
+
+static struct bpf_map_opts {
+ unsigned int nr_maps;
+ struct bpf_sk_storage_map_info {
+ unsigned int id;
+ int fd;
+ struct bpf_map_info info;
+ struct btf *btf;
+ struct btf_dump *dump;
+ } maps[MAX_NR_BPF_MAP_ID_OPTS];
+ bool show_all;
+} bpf_map_opts;
+
+static void bpf_map_opts_mixed_error(void)
+{
+ fprintf(stderr,
+ "ss: --bpf-maps and --bpf-map-id cannot be used together\n");
+}
+
+static int bpf_maps_opts_load_btf(struct bpf_map_info *info, struct btf **btf)
+{
+ if (info->btf_value_type_id) {
+ *btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (!*btf) {
+ fprintf(stderr, "ss: failed to load BTF for map ID %u\n",
+ info->id);
+ return -1;
+ }
+ } else {
+ *btf = NULL;
+ }
+
+ return 0;
+}
+
+static void out_bpf_sk_storage_print_fn(void *ctx, const char *fmt, va_list args)
+{
+ vout(fmt, args);
+}
+
+static int bpf_map_opts_load_info(unsigned int map_id)
+{
+ struct btf_dump_opts dopts = {
+ .sz = sizeof(struct btf_dump_opts)
+ };
+ struct bpf_map_info info = {};
+ uint32_t len = sizeof(info);
+ struct btf_dump *dump;
+ struct btf *btf;
+ int fd;
+ int r;
+
+ if (bpf_map_opts.nr_maps == MAX_NR_BPF_MAP_ID_OPTS) {
+ fprintf(stderr,
+ "ss: too many (> %u) BPF socket-local storage maps found, skipping map ID %u\n",
+ MAX_NR_BPF_MAP_ID_OPTS, map_id);
+ return 0;
+ }
+
+ fd = bpf_map_get_fd_by_id(map_id);
+ if (fd < 0) {
+ if (errno == -ENOENT)
+ return 0;
+
+ fprintf(stderr, "ss: cannot get fd for BPF map ID %u%s\n",
+ map_id, errno == EPERM ?
+ ": missing root permissions, CAP_BPF, or CAP_SYS_ADMIN" : "");
+ return -1;
+ }
+
+ r = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (r) {
+ fprintf(stderr, "ss: failed to get info for BPF map ID %u\n",
+ map_id);
+ close(fd);
+ return -1;
+ }
+
+ if (info.type != BPF_MAP_TYPE_SK_STORAGE) {
+ fprintf(stderr,
+ "ss: BPF map with ID %s has type ID %d, expecting %d ('sk_storage')\n",
+ optarg, info.type, BPF_MAP_TYPE_SK_STORAGE);
+ close(fd);
+ return -1;
+ }
+
+ r = bpf_maps_opts_load_btf(&info, &btf);
+ if (r) {
+ close(fd);
+ return -1;
+ }
+
+ dump = btf_dump__new(btf, out_bpf_sk_storage_print_fn, NULL, &dopts);
+ if (!dump) {
+ btf__free(btf);
+ close(fd);
+ fprintf(stderr, "Failed to create btf_dump object\n");
+ return -1;
+ }
+
+ bpf_map_opts.maps[bpf_map_opts.nr_maps].id = map_id;
+ bpf_map_opts.maps[bpf_map_opts.nr_maps].fd = fd;
+ bpf_map_opts.maps[bpf_map_opts.nr_maps].info = info;
+ bpf_map_opts.maps[bpf_map_opts.nr_maps].btf = btf;
+ bpf_map_opts.maps[bpf_map_opts.nr_maps++].dump = dump;
+
+ return 0;
+}
+
+static struct bpf_sk_storage_map_info *bpf_map_opts_get_info(
+ unsigned int map_id)
+{
+ unsigned int i;
+ int r;
+
+ for (i = 0; i < bpf_map_opts.nr_maps; ++i) {
+ if (bpf_map_opts.maps[i].id == map_id)
+ return &bpf_map_opts.maps[i];
+ }
+
+ r = bpf_map_opts_load_info(map_id);
+ if (r)
+ return NULL;
+
+ return &bpf_map_opts.maps[bpf_map_opts.nr_maps - 1];
+}
+
+static int bpf_map_opts_add_id(const char *optarg)
+{
+ size_t optarg_len;
+ unsigned long id;
+ char *end;
+
+ if (bpf_map_opts.show_all) {
+ bpf_map_opts_mixed_error();
+ return -1;
+ }
+
+ optarg_len = strlen(optarg);
+ id = strtoul(optarg, &end, 0);
+ if (end != optarg + optarg_len || id == 0 || id >= UINT32_MAX) {
+ fprintf(stderr, "ss: invalid BPF map ID %s\n", optarg);
+ return -1;
+ }
+
+ /* Force lazy loading of the map's data. */
+ if (!bpf_map_opts_get_info(id))
+ return -1;
+
+ return 0;
+}
+
+static void bpf_map_opts_destroy(void)
+{
+ int i;
+
+ for (i = 0; i < bpf_map_opts.nr_maps; ++i) {
+ btf_dump__free(bpf_map_opts.maps[i].dump);
+ btf__free(bpf_map_opts.maps[i].btf);
+ close(bpf_map_opts.maps[i].fd);
+ }
+}
+
+static struct rtattr *bpf_map_opts_alloc_rta(void)
+{
+ struct rtattr *stgs_rta, *fd_rta;
+ size_t total_size;
+ unsigned int i;
+ void *buf;
+
+ /* If bpf_map_opts.show_all == true, we will send an empty message to
+ * the kernel, which will return all the socket-local data attached to
+ * a socket, no matter their map ID
+ */
+ if (bpf_map_opts.show_all) {
+ total_size = RTA_LENGTH(0);
+ } else {
+ total_size = RTA_LENGTH(RTA_LENGTH(sizeof(int)) *
+ bpf_map_opts.nr_maps);
+ }
+
+ buf = malloc(total_size);
+ if (!buf)
+ return NULL;
+
+ stgs_rta = buf;
+ stgs_rta->rta_type = INET_DIAG_REQ_SK_BPF_STORAGES | NLA_F_NESTED;
+ stgs_rta->rta_len = total_size;
+
+ /* If inet_show_netlink() retries fetching socket data, nr_maps might
+ * be different from 0, even with show_all == true, so we return early
+ * to avoid inserting specific map IDs into the request.
+ */
+ if (bpf_map_opts.show_all)
+ return stgs_rta;
+
+ buf = RTA_DATA(stgs_rta);
+ for (i = 0; i < bpf_map_opts.nr_maps; i++) {
+ int *fd;
+
+ fd_rta = buf;
+ fd_rta->rta_type = SK_DIAG_BPF_STORAGE_REQ_MAP_FD;
+ fd_rta->rta_len = RTA_LENGTH(sizeof(int));
+
+ fd = RTA_DATA(fd_rta);
+ *fd = bpf_map_opts.maps[i].fd;
+
+ buf += fd_rta->rta_len;
+ }
+
+ return stgs_rta;
+}
+
+static void out_bpf_sk_storage_oneline(struct bpf_sk_storage_map_info *info,
+ const void *data, size_t len)
+{
+ struct btf_dump_type_data_opts opts = {
+ .sz = sizeof(struct btf_dump_type_data_opts),
+ .emit_zeroes = 1,
+ .compact = 1
+ };
+ int r;
+
+ out(" map_id:%d", info->id);
+ r = btf_dump__dump_type_data(info->dump, info->info.btf_value_type_id,
+ data, len, &opts);
+ if (r < 0)
+ out("failed to dump data: %d", r);
+}
+
+static void out_bpf_sk_storage_multiline(struct bpf_sk_storage_map_info *info,
+ const void *data, size_t len)
+{
+ struct btf_dump_type_data_opts opts = {
+ .sz = sizeof(struct btf_dump_type_data_opts),
+ .indent_level = 2,
+ .emit_zeroes = 1
+ };
+ int r;
+
+ out("\n\tmap_id:%d [\n", info->id);
+
+ r = btf_dump__dump_type_data(info->dump, info->info.btf_value_type_id,
+ data, len, &opts);
+ if (r < 0)
+ out("\t\tfailed to dump data: %d", r);
+
+ out("\n\t]");
+}
+
+static void out_bpf_sk_storage(int map_id, const void *data, size_t len)
+{
+ struct bpf_sk_storage_map_info *map_info;
+
+ map_info = bpf_map_opts_get_info(map_id);
+ if (!map_info) {
+ /* The kernel might return a map we can't get info for, skip
+ * it but print the other ones.
+ */
+ out("\n\tmap_id: %d failed to fetch info, skipping\n", map_id);
+ return;
+ }
+
+ if (map_info->info.value_size != len) {
+ fprintf(stderr,
+ "map_id: %d: invalid value size, expecting %u, got %lu\n",
+ map_id, map_info->info.value_size, len);
+ return;
+ }
+
+ if (oneline)
+ out_bpf_sk_storage_oneline(map_info, data, len);
+ else
+ out_bpf_sk_storage_multiline(map_info, data, len);
+}
+
+static void show_sk_bpf_storages(struct rtattr *bpf_stgs)
+{
+ struct rtattr *tb[SK_DIAG_BPF_STORAGE_MAX + 1], *bpf_stg;
+ unsigned int rem, map_id;
+ struct rtattr *value;
+
+ for (bpf_stg = RTA_DATA(bpf_stgs), rem = RTA_PAYLOAD(bpf_stgs);
+ RTA_OK(bpf_stg, rem); bpf_stg = RTA_NEXT(bpf_stg, rem)) {
+
+ if ((bpf_stg->rta_type & NLA_TYPE_MASK) != SK_DIAG_BPF_STORAGE)
+ continue;
+
+ parse_rtattr_nested(tb, SK_DIAG_BPF_STORAGE_MAX,
+ (struct rtattr *)bpf_stg);
+
+ if (tb[SK_DIAG_BPF_STORAGE_MAP_ID]) {
+ map_id = rta_getattr_u32(tb[SK_DIAG_BPF_STORAGE_MAP_ID]);
+ value = tb[SK_DIAG_BPF_STORAGE_MAP_VALUE];
+
+ out_bpf_sk_storage(map_id, RTA_DATA(value),
+ RTA_PAYLOAD(value));
+ }
+ }
+}
+
+static bool bpf_map_opts_is_enabled(void)
+{
+ return bpf_map_opts.nr_maps || bpf_map_opts.show_all;
+}
+#endif
+
static int inet_show_sock(struct nlmsghdr *nlh,
struct sockstat *s)
{
@@ -3391,8 +3731,9 @@ static int inet_show_sock(struct nlmsghdr *nlh,
struct inet_diag_msg *r = NLMSG_DATA(nlh);
unsigned char v6only = 0;
- parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr *)(r+1),
- nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+ parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r+1),
+ nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
+ NLA_F_NESTED);
if (tb[INET_DIAG_PROTOCOL])
s->type = rta_getattr_u8(tb[INET_DIAG_PROTOCOL]);
@@ -3489,6 +3830,11 @@ static int inet_show_sock(struct nlmsghdr *nlh,
}
sctp_ino = s->ino;
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ if (tb[INET_DIAG_SK_BPF_STORAGES])
+ show_sk_bpf_storages(tb[INET_DIAG_SK_BPF_STORAGES]);
+#endif
+
return 0;
}
@@ -3570,13 +3916,14 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
{
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
DIAG_REQUEST(req, struct inet_diag_req_v2 r);
+ struct rtattr *bpf_rta = NULL;
char *bc = NULL;
int bclen;
__u32 proto;
struct msghdr msg;
struct rtattr rta_bc;
struct rtattr rta_proto;
- struct iovec iov[5];
+ struct iovec iov[6];
int iovlen = 1;
if (family == PF_UNSPEC)
@@ -3629,6 +3976,20 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
iovlen += 2;
}
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ if (bpf_map_opts_is_enabled()) {
+ bpf_rta = bpf_map_opts_alloc_rta();
+ if (!bpf_rta) {
+ fprintf(stderr,
+ "ss: cannot alloc request for --bpf-map\n");
+ return -1;
+ }
+
+ iov[iovlen++] = (struct iovec){ bpf_rta, bpf_rta->rta_len };
+ req.nlh.nlmsg_len += bpf_rta->rta_len;
+ }
+#endif
+
msg = (struct msghdr) {
.msg_name = (void *)&nladdr,
.msg_namelen = sizeof(nladdr),
@@ -3637,10 +3998,13 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
};
if (sendmsg(fd, &msg, 0) < 0) {
+ free(bpf_rta);
close(fd);
return -1;
}
+ free(bpf_rta);
+
return 0;
}
@@ -5361,6 +5725,10 @@ static void _usage(FILE *dest)
" --tos show tos and priority information\n"
" --cgroup show cgroup information\n"
" -b, --bpf show bpf filter socket information\n"
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+" --bpf-maps show all BPF socket-local storage maps\n"
+" --bpf-map-id=MAP-ID show a BPF socket-local storage map\n"
+#endif
" -E, --events continually display sockets as they are destroyed\n"
" -Z, --context display task SELinux security contexts\n"
" -z, --contexts display task and socket SELinux security contexts\n"
@@ -5487,6 +5855,9 @@ wrong_state:
#define OPT_INET_SOCKOPT 262
+#define OPT_BPF_MAPS 263
+#define OPT_BPF_MAP_ID 264
+
static const struct option long_opts[] = {
{ "numeric", 0, 0, 'n' },
{ "resolve", 0, 0, 'r' },
@@ -5533,6 +5904,10 @@ static const struct option long_opts[] = {
{ "mptcp", 0, 0, 'M' },
{ "oneline", 0, 0, 'O' },
{ "inet-sockopt", 0, 0, OPT_INET_SOCKOPT },
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ { "bpf-maps", 0, 0, OPT_BPF_MAPS},
+ { "bpf-map-id", 1, 0, OPT_BPF_MAP_ID},
+#endif
{ 0 }
};
@@ -5741,6 +6116,19 @@ int main(int argc, char *argv[])
case OPT_INET_SOCKOPT:
show_inet_sockopt = 1;
break;
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ case OPT_BPF_MAPS:
+ if (bpf_map_opts.nr_maps) {
+ bpf_map_opts_mixed_error();
+ return -1;
+ }
+ bpf_map_opts.show_all = true;
+ break;
+ case OPT_BPF_MAP_ID:
+ if (bpf_map_opts_add_id(optarg))
+ exit(1);
+ break;
+#endif
case 'h':
help();
case '?':
@@ -5880,6 +6268,10 @@ int main(int argc, char *argv[])
if (show_processes || show_threads || show_proc_ctx || show_sock_ctx)
user_ent_destroy();
+#ifdef ENABLE_BPF_SKSTORAGE_SUPPORT
+ bpf_map_opts_destroy();
+#endif
+
render();
return 0;