From 60433a9d038db006ca2f49e3c5f050dc46aaad3a Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Thu, 23 Nov 2023 10:19:43 +0300 Subject: samples: introduce new samples subdir for cgroup Patch series "samples: introduce cgroup events listeners", v3. To begin with, this patch series relocates the cgroup example code to the samples/cgroup directory, which is the appropriate location for such code snippets. Furthermore, a new memcg events listener is introduced. This listener is a simple yet effective tool for monitoring memory events and managing counter changes during runtime. Additionally, as per Andrew Morton's suggestion, a helpful reminder comment is included in the memcontrol implementation. This comment serves to ensure that the samples code is updated whenever new events are added. This patch (of 3): Move the cgroup_event_listener for cgroup v1 to the samples directory. This suggestion was proposed by Andrew Morton during the discussion [1]. Link: https://lore.kernel.org/all/20231106140934.3f5d4960141562fe8da53906@linux-foundation.org/ [1] Link: https://lkml.kernel.org/r/20231123071945.25811-1-ddrokosov@salutedevices.com Link: https://lkml.kernel.org/r/20231123071945.25811-2-ddrokosov@salutedevices.com Signed-off-by: Dmitry Rokosov Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Signed-off-by: Andrew Morton --- samples/Kconfig | 6 +++ samples/Makefile | 1 + samples/cgroup/Makefile | 5 ++ samples/cgroup/cgroup_event_listener.c | 83 ++++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 samples/cgroup/Makefile create mode 100644 samples/cgroup/cgroup_event_listener.c (limited to 'samples') diff --git a/samples/Kconfig b/samples/Kconfig index b0ddf5f3673887..b288d9991d27ae 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -285,6 +285,12 @@ config SAMPLE_KMEMLEAK Build a sample program which have explicitly leaks memory to test kmemleak +config SAMPLE_CGROUP + bool "Build cgroup sample code" + depends on CGROUPS && CC_CAN_LINK && HEADERS_INSTALL + help + Build samples that demonstrate the usage of the cgroup API. + source "samples/rust/Kconfig" endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 0a551c2b33f430..b85fa64390c515 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -3,6 +3,7 @@ subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs +subdir-$(CONFIG_SAMPLE_CGROUP) += cgroup obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/ obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/ obj-$(CONFIG_SAMPLE_FANOTIFY_ERROR) += fanotify/ diff --git a/samples/cgroup/Makefile b/samples/cgroup/Makefile new file mode 100644 index 00000000000000..deef4530f5e776 --- /dev/null +++ b/samples/cgroup/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +userprogs-always-y += cgroup_event_listener + +userccflags += -I usr/include diff --git a/samples/cgroup/cgroup_event_listener.c b/samples/cgroup/cgroup_event_listener.c new file mode 100644 index 00000000000000..3d70dc831a7667 --- /dev/null +++ b/samples/cgroup/cgroup_event_listener.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * cgroup_event_listener.c - Simple listener of cgroup events + * + * Copyright (C) Kirill A. Shutemov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define USAGE_STR "Usage: cgroup_event_listener " + +int main(int argc, char **argv) +{ + int efd = -1; + int cfd = -1; + int event_control = -1; + char event_control_path[PATH_MAX]; + char line[LINE_MAX]; + int ret; + + if (argc != 3) + errx(1, "%s", USAGE_STR); + + cfd = open(argv[1], O_RDONLY); + if (cfd == -1) + err(1, "Cannot open %s", argv[1]); + + ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", + dirname(argv[1])); + if (ret >= PATH_MAX) + errx(1, "Path to cgroup.event_control is too long"); + + event_control = open(event_control_path, O_WRONLY); + if (event_control == -1) + err(1, "Cannot open %s", event_control_path); + + efd = eventfd(0, 0); + if (efd == -1) + err(1, "eventfd() failed"); + + ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); + if (ret >= LINE_MAX) + errx(1, "Arguments string is too long"); + + ret = write(event_control, line, strlen(line) + 1); + if (ret == -1) + err(1, "Cannot write to cgroup.event_control"); + + while (1) { + uint64_t result; + + ret = read(efd, &result, sizeof(result)); + if (ret == -1) { + if (errno == EINTR) + continue; + err(1, "Cannot read from eventfd"); + } + assert(ret == sizeof(result)); + + ret = access(event_control_path, W_OK); + if ((ret == -1) && (errno == ENOENT)) { + puts("The cgroup seems to have removed."); + break; + } + + if (ret == -1) + err(1, "cgroup.event_control is not accessible any more"); + + printf("%s %s: crossed\n", argv[1], argv[2]); + } + + return 0; +} -- cgit 1.2.3-korg From becf6529603589c9cb8cfda398a585c0c2e01aae Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Thu, 23 Nov 2023 10:19:44 +0300 Subject: samples/cgroup: introduce memcg memory.events listener This is a simple listener for memory events that handles counter changes in runtime. It can be set up for a specific memory cgroup v2. The output example: ===== $ /tmp/memcg_event_listener test Initialized MEMCG events with counters: MEMCG events: low: 0 high: 0 max: 0 oom: 0 oom_kill: 0 oom_group_kill: 0 Started monitoring memory events from '/sys/fs/cgroup/test/memory.events'... Received event in /sys/fs/cgroup/test/memory.events: *** 1 MEMCG oom_kill event, change counter 0 => 1 Received event in /sys/fs/cgroup/test/memory.events: *** 1 MEMCG oom_kill event, change counter 1 => 2 Received event in /sys/fs/cgroup/test/memory.events: *** 1 MEMCG oom_kill event, change counter 2 => 3 Received event in /sys/fs/cgroup/test/memory.events: *** 1 MEMCG oom_kill event, change counter 3 => 4 Received event in /sys/fs/cgroup/test/memory.events: *** 2 MEMCG max events, change counter 0 => 2 Received event in /sys/fs/cgroup/test/memory.events: *** 8 MEMCG max events, change counter 2 => 10 *** 1 MEMCG oom event, change counter 0 => 1 Received event in /sys/fs/cgroup/test/memory.events: *** 1 MEMCG oom_kill event, change counter 4 => 5 ^CExiting memcg event listener... ===== Link: https://lkml.kernel.org/r/20231123071945.25811-3-ddrokosov@salutedevices.com Signed-off-by: Dmitry Rokosov Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Signed-off-by: Andrew Morton --- samples/cgroup/Makefile | 2 +- samples/cgroup/memcg_event_listener.c | 330 ++++++++++++++++++++++++++++++++++ 2 files changed, 331 insertions(+), 1 deletion(-) create mode 100644 samples/cgroup/memcg_event_listener.c (limited to 'samples') diff --git a/samples/cgroup/Makefile b/samples/cgroup/Makefile index deef4530f5e776..526c8569707cd4 100644 --- a/samples/cgroup/Makefile +++ b/samples/cgroup/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -userprogs-always-y += cgroup_event_listener +userprogs-always-y += cgroup_event_listener memcg_event_listener userccflags += -I usr/include diff --git a/samples/cgroup/memcg_event_listener.c b/samples/cgroup/memcg_event_listener.c new file mode 100644 index 00000000000000..a1667fe2489a60 --- /dev/null +++ b/samples/cgroup/memcg_event_listener.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * memcg_event_listener.c - Simple listener of memcg memory.events + * + * Copyright (c) 2023, SaluteDevices. All Rights Reserved. + * + * Author: Dmitry Rokosov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MEMCG_EVENTS "memory.events" + +/* Size of buffer to use when reading inotify events */ +#define INOTIFY_BUFFER_SIZE 8192 + +#define INOTIFY_EVENT_NEXT(event, length) ({ \ + (length) -= sizeof(*(event)) + (event)->len; \ + (event)++; \ +}) + +#define INOTIFY_EVENT_OK(event, length) ((length) >= (ssize_t)sizeof(*(event))) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +struct memcg_counters { + long low; + long high; + long max; + long oom; + long oom_kill; + long oom_group_kill; +}; + +struct memcg_events { + struct memcg_counters counters; + char path[PATH_MAX]; + int inotify_fd; + int inotify_wd; +}; + +static void print_memcg_counters(const struct memcg_counters *counters) +{ + printf("MEMCG events:\n"); + printf("\tlow: %ld\n", counters->low); + printf("\thigh: %ld\n", counters->high); + printf("\tmax: %ld\n", counters->max); + printf("\toom: %ld\n", counters->oom); + printf("\toom_kill: %ld\n", counters->oom_kill); + printf("\toom_group_kill: %ld\n", counters->oom_group_kill); +} + +static int get_memcg_counter(char *line, const char *name, long *counter) +{ + size_t len = strlen(name); + char *endptr; + long tmp; + + if (memcmp(line, name, len)) { + warnx("Counter line %s has wrong name, %s is expected", + line, name); + return -EINVAL; + } + + /* skip the whitespace delimiter */ + len += 1; + + errno = 0; + tmp = strtol(&line[len], &endptr, 10); + if (((tmp == LONG_MAX || tmp == LONG_MIN) && errno == ERANGE) || + (errno && !tmp)) { + warnx("Failed to parse: %s", &line[len]); + return -ERANGE; + } + + if (endptr == &line[len]) { + warnx("Not digits were found in line %s", &line[len]); + return -EINVAL; + } + + if (!(*endptr == '\0' || (*endptr == '\n' && *++endptr == '\0'))) { + warnx("Further characters after number: %s", endptr); + return -EINVAL; + } + + *counter = tmp; + + return 0; +} + +static int read_memcg_events(struct memcg_events *events, bool show_diff) +{ + FILE *fp = fopen(events->path, "re"); + size_t i; + int ret = 0; + bool any_new_events = false; + char *line = NULL; + size_t len = 0; + struct memcg_counters new_counters; + struct memcg_counters *counters = &events->counters; + struct { + const char *name; + long *new; + long *old; + } map[] = { + { + .name = "low", + .new = &new_counters.low, + .old = &counters->low, + }, + { + .name = "high", + .new = &new_counters.high, + .old = &counters->high, + }, + { + .name = "max", + .new = &new_counters.max, + .old = &counters->max, + }, + { + .name = "oom", + .new = &new_counters.oom, + .old = &counters->oom, + }, + { + .name = "oom_kill", + .new = &new_counters.oom_kill, + .old = &counters->oom_kill, + }, + { + .name = "oom_group_kill", + .new = &new_counters.oom_group_kill, + .old = &counters->oom_group_kill, + }, + }; + + if (!fp) { + warn("Failed to open memcg events file %s", events->path); + return -EBADF; + } + + /* Read new values for memcg counters */ + for (i = 0; i < ARRAY_SIZE(map); ++i) { + ssize_t nread; + + errno = 0; + nread = getline(&line, &len, fp); + if (nread == -1) { + if (errno) { + warn("Failed to read line for counter %s", + map[i].name); + ret = -EIO; + goto exit; + } + + break; + } + + ret = get_memcg_counter(line, map[i].name, map[i].new); + if (ret) { + warnx("Failed to get counter value from line %s", line); + goto exit; + } + } + + for (i = 0; i < ARRAY_SIZE(map); ++i) { + long diff; + + if (*map[i].new > *map[i].old) { + diff = *map[i].new - *map[i].old; + + if (show_diff) + printf("*** %ld MEMCG %s event%s, " + "change counter %ld => %ld\n", + diff, map[i].name, + (diff == 1) ? "" : "s", + *map[i].old, *map[i].new); + + *map[i].old += diff; + any_new_events = true; + } + } + + if (show_diff && !any_new_events) + printf("*** No new untracked memcg events available\n"); + +exit: + free(line); + fclose(fp); + + return ret; +} + +static void process_memcg_events(struct memcg_events *events, + struct inotify_event *event) +{ + int ret; + + if (events->inotify_wd != event->wd) { + warnx("Unknown inotify event %d, should be %d", event->wd, + events->inotify_wd); + return; + } + + printf("Received event in %s:\n", events->path); + + if (!(event->mask & IN_MODIFY)) { + warnx("No IN_MODIFY event, skip it"); + return; + } + + ret = read_memcg_events(events, /* show_diff = */true); + if (ret) + warnx("Can't read memcg events"); +} + +static void monitor_events(struct memcg_events *events) +{ + struct pollfd fds[1]; + int ret; + + printf("Started monitoring memory events from '%s'...\n", events->path); + + fds[0].fd = events->inotify_fd; + fds[0].events = POLLIN; + + for (;;) { + ret = poll(fds, ARRAY_SIZE(fds), -1); + if (ret < 0 && errno != EAGAIN) + err(EXIT_FAILURE, "Can't poll memcg events (%d)", ret); + + if (fds[0].revents & POLLERR) + err(EXIT_FAILURE, "Got POLLERR during monitor events"); + + if (fds[0].revents & POLLIN) { + struct inotify_event *event; + char buffer[INOTIFY_BUFFER_SIZE]; + ssize_t length; + + length = read(fds[0].fd, buffer, INOTIFY_BUFFER_SIZE); + if (length <= 0) + continue; + + event = (struct inotify_event *)buffer; + while (INOTIFY_EVENT_OK(event, length)) { + process_memcg_events(events, event); + event = INOTIFY_EVENT_NEXT(event, length); + } + } + } +} + +static int initialize_memcg_events(struct memcg_events *events, + const char *cgroup) +{ + int ret; + + memset(events, 0, sizeof(struct memcg_events)); + + ret = snprintf(events->path, PATH_MAX, + "/sys/fs/cgroup/%s/memory.events", cgroup); + if (ret >= PATH_MAX) { + warnx("Path to cgroup memory.events is too long"); + return -EMSGSIZE; + } else if (ret < 0) { + warn("Can't generate cgroup event full name"); + return ret; + } + + ret = read_memcg_events(events, /* show_diff = */false); + if (ret) { + warnx("Failed to read initial memcg events state (%d)", ret); + return ret; + } + + events->inotify_fd = inotify_init(); + if (events->inotify_fd < 0) { + warn("Failed to setup new inotify device"); + return -EMFILE; + } + + events->inotify_wd = inotify_add_watch(events->inotify_fd, + events->path, IN_MODIFY); + if (events->inotify_wd < 0) { + warn("Couldn't add monitor in dir %s", events->path); + return -EIO; + } + + printf("Initialized MEMCG events with counters:\n"); + print_memcg_counters(&events->counters); + + return 0; +} + +static void cleanup_memcg_events(struct memcg_events *events) +{ + inotify_rm_watch(events->inotify_fd, events->inotify_wd); + close(events->inotify_fd); +} + +int main(int argc, const char **argv) +{ + struct memcg_events events; + ssize_t ret; + + if (argc != 2) + errx(EXIT_FAILURE, "Usage: %s ", argv[0]); + + ret = initialize_memcg_events(&events, argv[1]); + if (ret) + errx(EXIT_FAILURE, "Can't initialize memcg events (%zd)", ret); + + monitor_events(&events); + + cleanup_memcg_events(&events); + + printf("Exiting memcg event listener...\n"); + + return EXIT_SUCCESS; +} -- cgit 1.2.3-korg