aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile13
-rw-r--r--tools/bpf/bpftool/gen.c2
-rw-r--r--tools/hv/hv_kvp_daemon.c213
-rw-r--r--tools/include/linux/btf_ids.h2
-rw-r--r--tools/include/linux/kernel.h1
-rw-r--r--tools/include/linux/mm.h5
-rw-r--r--tools/include/linux/panic.h19
-rw-r--r--tools/lib/bpf/libbpf.c10
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py7
-rw-r--r--tools/objtool/check.c2
-rw-r--r--tools/power/x86/turbostat/turbostat.818
-rw-r--r--tools/power/x86/turbostat/turbostat.c2017
-rw-r--r--tools/testing/cxl/test/cxl.c10
-rw-r--r--tools/testing/kunit/configs/all_tests.config3
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_htab.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c2
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c10
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c10
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c68
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/settings1
-rw-r--r--tools/testing/selftests/exec/Makefile4
-rwxr-xr-xtools/testing/selftests/exec/binfmt_script.py10
-rw-r--r--tools/testing/selftests/exec/execveat.c12
-rw-r--r--tools/testing/selftests/exec/load_address.c34
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c53
-rw-r--r--tools/testing/selftests/kselftest.h33
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c4
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h11
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c39
-rw-r--r--tools/testing/selftests/mm/gup_test.c2
-rw-r--r--tools/testing/selftests/mm/protection_keys.c6
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c2
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c2
-rw-r--r--tools/testing/selftests/mm/uffd-common.c3
-rw-r--r--tools/testing/selftests/mm/uffd-common.h2
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c13
-rw-r--r--tools/testing/selftests/mm/vm_util.h2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c783
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh9
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh4
-rw-r--r--tools/testing/selftests/net/reuseaddr_conflict.c2
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh205
-rw-r--r--tools/testing/selftests/net/tls.c34
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh10
-rw-r--r--tools/testing/selftests/timers/posix_timers.c105
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c73
-rwxr-xr-xtools/testing/selftests/turbostat/defcolumns.py60
52 files changed, 3197 insertions, 760 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 37e9f680483264..276f5d0d53a447 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -11,7 +11,6 @@ help:
@echo ''
@echo ' acpi - ACPI tools'
@echo ' bpf - misc BPF tools'
- @echo ' cgroup - cgroup tools'
@echo ' counter - counter tools'
@echo ' cpupower - a tool for all things x86 CPU power'
@echo ' debugging - tools for debugging'
@@ -69,7 +68,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-cgroup counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
+counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
@@ -116,7 +115,7 @@ freefall: FORCE
kvm_stat: FORCE
$(call descend,kvm/$@)
-all: acpi cgroup counter cpupower gpio hv firewire \
+all: acpi counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
@@ -128,7 +127,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
+counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
selftests_install:
@@ -155,7 +154,7 @@ freefall_install:
kvm_stat_install:
$(call descend,kvm/$(@:_install=),install)
-install: acpi_install cgroup_install counter_install cpupower_install gpio_install \
+install: acpi_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
@@ -169,7 +168,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
+counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
libapi_clean:
@@ -209,7 +208,7 @@ freefall_clean:
build_clean:
$(call descend,build,clean)
-clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \
+clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4fa4ade1ce7445..540c0f2c4fda07 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -121,7 +121,7 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
int i, n;
/* recognize hard coded LLVM section name */
- if (strcmp(sec_name, ".arena.1") == 0) {
+ if (strcmp(sec_name, ".addr_space.1") == 0) {
/* this is the name to use in skeleton */
snprintf(buf, buf_sz, "arena");
return true;
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 318e2dad27e048..ae57bf69ad4af3 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -76,6 +76,12 @@ enum {
DNS
};
+enum {
+ IPV4 = 1,
+ IPV6,
+ IP_TYPE_MAX
+};
+
static int in_hand_shake;
static char *os_name = "";
@@ -102,6 +108,11 @@ static struct utsname uts_buf;
#define MAX_FILE_NAME 100
#define ENTRIES_PER_BLOCK 50
+/*
+ * Change this entry if the number of addresses increases in future
+ */
+#define MAX_IP_ENTRIES 64
+#define OUTSTR_BUF_SIZE ((INET6_ADDRSTRLEN + 1) * MAX_IP_ENTRIES)
struct kvp_record {
char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
@@ -1171,6 +1182,18 @@ static int process_ip_string(FILE *f, char *ip_string, int type)
return 0;
}
+int ip_version_check(const char *input_addr)
+{
+ struct in6_addr addr;
+
+ if (inet_pton(AF_INET, input_addr, &addr))
+ return IPV4;
+ else if (inet_pton(AF_INET6, input_addr, &addr))
+ return IPV6;
+
+ return -EINVAL;
+}
+
/*
* Only IPv4 subnet strings needs to be converted to plen
* For IPv6 the subnet is already privided in plen format
@@ -1197,14 +1220,75 @@ static int kvp_subnet_to_plen(char *subnet_addr_str)
return plen;
}
+static int process_dns_gateway_nm(FILE *f, char *ip_string, int type,
+ int ip_sec)
+{
+ char addr[INET6_ADDRSTRLEN], *output_str;
+ int ip_offset = 0, error = 0, ip_ver;
+ char *param_name;
+
+ if (type == DNS)
+ param_name = "dns";
+ else if (type == GATEWAY)
+ param_name = "gateway";
+ else
+ return -EINVAL;
+
+ output_str = (char *)calloc(OUTSTR_BUF_SIZE, sizeof(char));
+ if (!output_str)
+ return -ENOMEM;
+
+ while (1) {
+ memset(addr, 0, sizeof(addr));
+
+ if (!parse_ip_val_buffer(ip_string, &ip_offset, addr,
+ (MAX_IP_ADDR_SIZE * 2)))
+ break;
+
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if ((ip_ver == IPV4 && ip_sec == IPV4) ||
+ (ip_ver == IPV6 && ip_sec == IPV6)) {
+ /*
+ * do a bound check to avoid out-of bound writes
+ */
+ if ((OUTSTR_BUF_SIZE - strlen(output_str)) >
+ (strlen(addr) + 1)) {
+ strncat(output_str, addr,
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ strncat(output_str, ",",
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ }
+ } else {
+ continue;
+ }
+ }
+
+ if (strlen(output_str)) {
+ /*
+ * This is to get rid of that extra comma character
+ * in the end of the string
+ */
+ output_str[strlen(output_str) - 1] = '\0';
+ error = fprintf(f, "%s=%s\n", param_name, output_str);
+ }
+
+ free(output_str);
+ return error;
+}
+
static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
- int is_ipv6)
+ int ip_sec)
{
char addr[INET6_ADDRSTRLEN];
char subnet_addr[INET6_ADDRSTRLEN];
- int error, i = 0;
+ int error = 0, i = 0;
int ip_offset = 0, subnet_offset = 0;
- int plen;
+ int plen, ip_ver;
memset(addr, 0, sizeof(addr));
memset(subnet_addr, 0, sizeof(subnet_addr));
@@ -1216,10 +1300,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
subnet_addr,
(MAX_IP_ADDR_SIZE *
2))) {
- if (!is_ipv6)
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if (ip_ver == IPV4 && ip_sec == IPV4)
plen = kvp_subnet_to_plen((char *)subnet_addr);
- else
+ else if (ip_ver == IPV6 && ip_sec == IPV6)
plen = atoi(subnet_addr);
+ else
+ continue;
if (plen < 0)
return plen;
@@ -1233,17 +1323,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
memset(subnet_addr, 0, sizeof(subnet_addr));
}
- return 0;
+ return error;
}
static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
{
- int error = 0;
+ int error = 0, ip_ver;
char if_filename[PATH_MAX];
char nm_filename[PATH_MAX];
FILE *ifcfg_file, *nmfile;
char cmd[PATH_MAX];
- int is_ipv6 = 0;
char *mac_addr;
int str_len;
@@ -1421,52 +1510,94 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
if (error)
goto setval_error;
- if (new_val->addr_family & ADDR_FAMILY_IPV6) {
- error = fprintf(nmfile, "\n[ipv6]\n");
- if (error < 0)
- goto setval_error;
- is_ipv6 = 1;
- } else {
- error = fprintf(nmfile, "\n[ipv4]\n");
- if (error < 0)
- goto setval_error;
- }
-
/*
* Now we populate the keyfile format
+ *
+ * The keyfile format expects the IPv6 and IPv4 configuration in
+ * different sections. Therefore we iterate through the list twice,
+ * once to populate the IPv4 section and the next time for IPv6
*/
+ ip_ver = IPV4;
+ do {
+ if (ip_ver == IPV4) {
+ error = fprintf(nmfile, "\n[ipv4]\n");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = fprintf(nmfile, "\n[ipv6]\n");
+ if (error < 0)
+ goto setval_error;
+ }
- if (new_val->dhcp_enabled) {
- error = kvp_write_file(nmfile, "method", "", "auto");
- if (error < 0)
- goto setval_error;
- } else {
- error = kvp_write_file(nmfile, "method", "", "manual");
+ /*
+ * Write the configuration for ipaddress, netmask, gateway and
+ * name services
+ */
+ error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
+ (char *)new_val->sub_net,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
- /*
- * Write the configuration for ipaddress, netmask, gateway and
- * name services
- */
- error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
- (char *)new_val->sub_net, is_ipv6);
- if (error < 0)
- goto setval_error;
+ /*
+ * As dhcp_enabled is only valid for ipv4, we do not set dhcp
+ * methods for ipv6 based on dhcp_enabled flag.
+ *
+ * For ipv4, set method to manual only when dhcp_enabled is
+ * false and specific ipv4 addresses are configured. If neither
+ * dhcp_enabled is true and no ipv4 addresses are configured,
+ * set method to 'disabled'.
+ *
+ * For ipv6, set method to manual when we configure ipv6
+ * addresses. Otherwise set method to 'auto' so that SLAAC from
+ * RA may be used.
+ */
+ if (ip_ver == IPV4) {
+ if (new_val->dhcp_enabled) {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ } else if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "disabled");
+ if (error < 0)
+ goto setval_error;
+ }
+ } else if (ip_ver == IPV6) {
+ if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ }
+ }
- /* we do not want ipv4 addresses in ipv6 section and vice versa */
- if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
- error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->gate_way,
+ GATEWAY, ip_ver);
if (error < 0)
goto setval_error;
- }
- if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
- error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->dns_addr, DNS,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
+
+ ip_ver++;
+ } while (ip_ver < IP_TYPE_MAX);
+
fclose(nmfile);
fclose(ifcfg_file);
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 72535f00572f6e..72ea363d434db0 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,8 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+#include <linux/types.h> /* for u32 */
+
struct btf_id_set {
u32 cnt;
u32 ids[];
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 4b0673bf52c2e6..07cfad817d5390 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -8,6 +8,7 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/math.h>
+#include <linux/panic.h>
#include <endian.h>
#include <byteswap.h>
diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h
index f3c82ab5b14cd7..7d73da0980473f 100644
--- a/tools/include/linux/mm.h
+++ b/tools/include/linux/mm.h
@@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count)
{
}
+static inline int early_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
#endif
diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h
new file mode 100644
index 00000000000000..9c8f17a41ce8ed
--- /dev/null
+++ b/tools/include/linux/panic.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_PANIC_H
+#define _TOOLS_LINUX_PANIC_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void panic(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ vfprintf(stderr, fmt, argp);
+ va_end(argp);
+ exit(-1);
+}
+
+#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index efab29b8935bd9..a2061fcd612d7f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -498,7 +498,7 @@ struct bpf_struct_ops {
#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
-#define ARENA_SEC ".arena.1"
+#define ARENA_SEC ".addr_space.1"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
@@ -1650,6 +1650,10 @@ static int sys_memfd_create(const char *name, unsigned flags)
return syscall(__NR_memfd_create, name, flags);
}
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
static int create_placeholder_fd(void)
{
int fd;
@@ -5352,8 +5356,8 @@ retry:
goto err_out;
}
if (map->def.type == BPF_MAP_TYPE_ARENA) {
- map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
- PROT_READ | PROT_WRITE,
+ map->mmaped = mmap((void *)(long)map->map_extra,
+ bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
map->fd, 0);
if (map->mmaped == MAP_FAILED) {
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 6b7eb2d2aaf188..a451cbfbd781d9 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -228,8 +228,11 @@ class Type(SpecAttr):
presence = ''
for i in range(0, len(ref)):
presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
- if self.presence_type() == 'bit':
- code.append(presence + ' = 1;')
+ # Every layer below last is a nest, so we know it uses bit presence
+ # last layer is "self" and may be a complex type
+ if i == len(ref) - 1 and self.presence_type() != 'bit':
+ continue
+ code.append(presence + ' = 1;')
code += self._setter_lines(ri, member, presence)
func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0b10ad00866867..0a33d9195b7a91 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -585,7 +585,7 @@ static int add_dead_ends(struct objtool_file *file)
struct section *rsec;
struct reloc *reloc;
struct instruction *insn;
- unsigned long offset;
+ uint64_t offset;
/*
* Check for manually annotated dead ends.
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 8f08c3fd498d5b..0d3672e5d9ed15 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -67,6 +67,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--quiet\fP Do not decode and print the system configuration header information.
.PP
++\fB--no-msr\fP Disable all the uses of the MSR driver.
++.PP
++\fB--no-perf\fP Disable all the uses of the perf API.
++.PP
\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
.PP
\fB--num_iterations num\fP number of the measurement iterations.
@@ -125,9 +129,17 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
-\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
-\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBGFXAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
+.PP
+\fBSAM%mc6\fP The percentage of time the SA Media is in the "module C6" state, mc6, during the measurement interval. From /sys/class/drm/card0/gt/gt1/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
+.PP
+\fBSAMMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
.PP
\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
.PP
@@ -370,7 +382,7 @@ below the processor's base frequency.
Busy% = MPERF_delta/TSC_delta
-Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
+Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval
Note that these calculations depend on TSC_delta, so they
are not reliable during intervals when TSC_MHz is not running at the base frequency.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7a334377f92b97..98256468e24806 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2023 Intel Corporation.
+ * Copyright (c) 2024 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -36,6 +36,8 @@
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdbool.h>
+#include <assert.h>
+#include <linux/kernel.h>
#define UNUSED(x) (void)(x)
@@ -53,9 +55,13 @@
#define NAME_BYTES 20
#define PATH_BYTES 128
+#define MAX_NOFILE 0x8000
+
enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR };
+enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR };
struct msr_counter {
unsigned int msr_num;
@@ -127,6 +133,9 @@ struct msr_counter bic[] = {
{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
{ 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -185,11 +194,14 @@ struct msr_counter bic[] = {
#define BIC_IPC (1ULL << 52)
#define BIC_CORE_THROT_CNT (1ULL << 53)
#define BIC_UNCORE_MHZ (1ULL << 54)
+#define BIC_SAM_mc6 (1ULL << 55)
+#define BIC_SAMMHz (1ULL << 56)
+#define BIC_SAMACTMHz (1ULL << 57)
#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
@@ -204,10 +216,13 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
+struct amperf_group_fd;
+
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
int *fd_instr_count_percpu;
+struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
@@ -242,11 +257,8 @@ char *output_buffer, *outp;
unsigned int do_dts;
unsigned int do_ptm;
unsigned int do_ipc;
-unsigned long long gfx_cur_rc6_ms;
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
-unsigned int gfx_cur_mhz;
-unsigned int gfx_act_mhz;
unsigned int tj_max;
unsigned int tj_max_override;
double rapl_power_units, rapl_time_units;
@@ -263,6 +275,28 @@ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int first_counter_read = 1;
int ignore_stdin;
+bool no_msr;
+bool no_perf;
+enum amperf_source amperf_source;
+
+enum gfx_sysfs_idx {
+ GFX_rc6,
+ GFX_MHz,
+ GFX_ACTMHz,
+ SAM_mc6,
+ SAM_MHz,
+ SAM_ACTMHz,
+ GFX_MAX
+};
+
+struct gfx_sysfs_info {
+ const char *path;
+ FILE *fp;
+ unsigned int val;
+ unsigned long long val_ull;
+};
+
+static struct gfx_sysfs_info gfx_info[GFX_MAX];
int get_msr(int cpu, off_t offset, unsigned long long *msr);
@@ -652,6 +686,7 @@ static const struct platform_features icx_features = {
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_ICX,
+ .has_msr_core_c1_res = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
@@ -948,6 +983,175 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi
#define MAX_ADDED_THREAD_COUNTERS 24
#define BITMASK_SIZE 32
+/* Indexes used to map data read from perf and MSRs into global variables */
+enum rapl_rci_index {
+ RAPL_RCI_INDEX_ENERGY_PKG = 0,
+ RAPL_RCI_INDEX_ENERGY_CORES = 1,
+ RAPL_RCI_INDEX_DRAM = 2,
+ RAPL_RCI_INDEX_GFX = 3,
+ RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
+ RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
+ RAPL_RCI_INDEX_CORE_ENERGY = 6,
+ NUM_RAPL_COUNTERS,
+};
+
+enum rapl_unit {
+ RAPL_UNIT_INVALID,
+ RAPL_UNIT_JOULES,
+ RAPL_UNIT_WATTS,
+};
+
+struct rapl_counter_info_t {
+ unsigned long long data[NUM_RAPL_COUNTERS];
+ enum rapl_source source[NUM_RAPL_COUNTERS];
+ unsigned long long flags[NUM_RAPL_COUNTERS];
+ double scale[NUM_RAPL_COUNTERS];
+ enum rapl_unit unit[NUM_RAPL_COUNTERS];
+
+ union {
+ /* Active when source == RAPL_SOURCE_MSR */
+ struct {
+ unsigned long long msr[NUM_RAPL_COUNTERS];
+ unsigned long long msr_mask[NUM_RAPL_COUNTERS];
+ int msr_shift[NUM_RAPL_COUNTERS];
+ };
+ };
+
+ int fd_perf;
+};
+
+/* struct rapl_counter_info_t for each RAPL domain */
+struct rapl_counter_info_t *rapl_counter_info_perdomain;
+
+#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
+
+struct rapl_counter_arch_info {
+ int feature_mask; /* Mask for testing if the counter is supported on host */
+ const char *perf_subsys;
+ const char *perf_name;
+ unsigned long long msr;
+ unsigned long long msr_mask;
+ int msr_shift; /* Positive mean shift right, negative mean shift left */
+ double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
+ unsigned int rci_index; /* Maps data from perf counters to global variables */
+ unsigned long long bic;
+ double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
+ unsigned long long flags;
+};
+
+static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
+ {
+ .feature_mask = RAPL_PKG,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_CORE_ENERGY_STATUS,
+ .perf_subsys = "power",
+ .perf_name = "energy-cores",
+ .msr = MSR_PP0_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM,
+ .perf_subsys = "power",
+ .perf_name = "energy-ram",
+ .msr = MSR_DRAM_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_dram_energy_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM,
+ .bic = BIC_RAMWatt | BIC_RAM_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_GFX,
+ .perf_subsys = "power",
+ .perf_name = "energy-gpu",
+ .msr = MSR_PP1_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_GFX,
+ .bic = BIC_GFXWatt | BIC_GFX_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_PKG_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_PKG_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
+ .bic = BIC_PKG__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_DRAM_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
+ .bic = BIC_RAM__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_CORE_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = 0,
+ },
+};
+
+struct rapl_counter {
+ unsigned long long raw_value;
+ enum rapl_unit unit;
+ double scale;
+};
+
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
@@ -974,7 +1178,7 @@ struct core_data {
unsigned long long c7;
unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
unsigned int core_temp_c;
- unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
+ struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
unsigned long long core_throt_cnt;
unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -989,8 +1193,8 @@ struct pkg_data {
unsigned long long pc8;
unsigned long long pc9;
unsigned long long pc10;
- unsigned long long cpu_lpi;
- unsigned long long sys_lpi;
+ long long cpu_lpi;
+ long long sys_lpi;
unsigned long long pkg_wtd_core_c0;
unsigned long long pkg_any_core_c0;
unsigned long long pkg_any_gfxe_c0;
@@ -998,13 +1202,16 @@ struct pkg_data {
long long gfx_rc6_ms;
unsigned int gfx_mhz;
unsigned int gfx_act_mhz;
+ long long sam_mc6_ms;
+ unsigned int sam_mhz;
+ unsigned int sam_act_mhz;
unsigned int package_id;
- unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */
- unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
- unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */
- unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */
- unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
- unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
+ struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+ struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
+ struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
+ struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+ struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
+ struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
unsigned int uncore_mhz;
unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -1150,6 +1357,38 @@ struct sys_counters {
struct msr_counter *pp;
} sys;
+void free_sys_counters(void)
+{
+ struct msr_counter *p = sys.tp, *pnext = NULL;
+
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.cp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.pp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ sys.added_thread_counters = 0;
+ sys.added_core_counters = 0;
+ sys.added_package_counters = 0;
+ sys.tp = NULL;
+ sys.cp = NULL;
+ sys.pp = NULL;
+}
+
struct system_summary {
struct thread_data threads;
struct core_data cores;
@@ -1280,34 +1519,60 @@ int get_msr_fd(int cpu)
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
+ "or run with --no-msr, or run as root", pathname);
fd_percpu[cpu] = fd;
return fd;
}
+static void bic_disable_msr_access(void)
+{
+ const unsigned long bic_msrs =
+ BIC_SMI |
+ BIC_CPU_c1 |
+ BIC_CPU_c3 |
+ BIC_CPU_c6 |
+ BIC_CPU_c7 |
+ BIC_Mod_c6 |
+ BIC_CoreTmp |
+ BIC_Totl_c0 |
+ BIC_Any_c0 |
+ BIC_GFX_c0 |
+ BIC_CPUGFX |
+ BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
+
+ bic_enabled &= ~bic_msrs;
+
+ free_sys_counters();
+}
+
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
+ assert(!no_perf);
+
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
-static int perf_instr_count_open(int cpu_num)
+static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
{
- struct perf_event_attr pea;
- int fd;
+ struct perf_event_attr attr;
+ const pid_t pid = -1;
+ const unsigned long flags = 0;
- memset(&pea, 0, sizeof(struct perf_event_attr));
- pea.type = PERF_TYPE_HARDWARE;
- pea.size = sizeof(struct perf_event_attr);
- pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+ assert(!no_perf);
- /* counter for cpu_num, including user + kernel and all processes */
- fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
- if (fd == -1) {
- warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
- BIC_NOT_PRESENT(BIC_IPC);
- }
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+
+ attr.type = type;
+ attr.size = sizeof(struct perf_event_attr);
+ attr.config = config;
+ attr.disabled = 0;
+ attr.sample_type = PERF_SAMPLE_IDENTIFIER;
+ attr.read_format = read_format;
+
+ const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
return fd;
}
@@ -1317,7 +1582,7 @@ int get_instr_count_fd(int cpu)
if (fd_instr_count_percpu[cpu])
return fd_instr_count_percpu[cpu];
- fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+ fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
return fd_instr_count_percpu[cpu];
}
@@ -1326,6 +1591,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
ssize_t retval;
+ assert(!no_msr);
+
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr)
@@ -1334,6 +1601,21 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
+int probe_msr(int cpu, off_t offset)
+{
+ ssize_t retval;
+ unsigned long long dummy;
+
+ assert(!no_msr);
+
+ retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+
+ if (retval != sizeof(dummy))
+ return 1;
+
+ return 0;
+}
+
#define MAX_DEFERRED 16
char *deferred_add_names[MAX_DEFERRED];
char *deferred_skip_names[MAX_DEFERRED];
@@ -1369,6 +1651,8 @@ void help(void)
" Override default 5-second measurement interval\n"
" -J, --Joules displays energy in Joules instead of Watts\n"
" -l, --list list column headers only\n"
+ " -M, --no-msr Disable all uses of the MSR driver\n"
+ " -P, --no-perf Disable all uses of the perf API\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -N, --header_iterations num\n"
@@ -1573,6 +1857,15 @@ void print_header(char *delim)
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_SAM_mc6))
+ outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
+
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_Any_c0))
@@ -1671,26 +1964,35 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
+ outp +=
+ sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ t->counter[i], mp->path);
}
}
- if (c) {
+ if (c && is_cpu_first_thread_in_core(t, c, p)) {
outp += sprintf(outp, "core: %d\n", c->core_id);
outp += sprintf(outp, "c3: %016llX\n", c->c3);
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
- outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
+
+ const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
+ const double energy_scale = c->core_energy.scale;
+
+ if (c->core_energy.unit == RAPL_UNIT_JOULES)
+ outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
+ outp +=
+ sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ c->counter[i], mp->path);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
- if (p) {
+ if (p && is_cpu_first_core_in_package(t, c, p)) {
outp += sprintf(outp, "package: %d\n", p->package_id);
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
@@ -1710,16 +2012,18 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
- outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
- outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
- outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
- outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
- outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
- outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
+ outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
+ outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
+ outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
+ outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
+ outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
+ outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
+ outp +=
+ sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ p->counter[i], mp->path);
}
}
@@ -1728,6 +2032,23 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0;
}
+double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
+{
+ assert(desired_unit != RAPL_UNIT_INVALID);
+
+ /*
+ * For now we don't expect anything other than joules,
+ * so just simplify the logic.
+ */
+ assert(c->unit == RAPL_UNIT_JOULES);
+
+ const double scaled = c->raw_value * c->scale;
+
+ if (desired_unit == RAPL_UNIT_WATTS)
+ return scaled / interval;
+ return scaled;
+}
+
/*
* column formatting convention & formats
*/
@@ -1921,9 +2242,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
/* print per-package data only for 1st core in package */
if (!is_cpu_first_core_in_package(t, c, p))
@@ -1951,6 +2274,24 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
+ /* SAMmc6 */
+ if (DO_BIC(BIC_SAM_mc6)) {
+ if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
+ outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
+ } else {
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ p->sam_mc6_ms / 10.0 / interval_float);
+ }
+ }
+
+ /* SAMMHz */
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
+
+ /* SAMACTMHz */
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
+
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
@@ -1976,43 +2317,59 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_Pkgpc10))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
- if (DO_BIC(BIC_CPU_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
- if (DO_BIC(BIC_SYS_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ if (DO_BIC(BIC_CPU_LPI)) {
+ if (p->cpu_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
+ if (DO_BIC(BIC_SYS_LPI)) {
+ if (p->sys_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
if (DO_BIC(BIC_PkgWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
-
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_GFXWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAMWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- p->energy_dram * rapl_dram_energy_units / interval_float);
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Pkg_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_GFX_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_RAM_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_PKG__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAM__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
/* UncMHz */
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
@@ -2121,12 +2478,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
old->gfx_mhz = new->gfx_mhz;
old->gfx_act_mhz = new->gfx_act_mhz;
- old->energy_pkg = new->energy_pkg - old->energy_pkg;
- old->energy_cores = new->energy_cores - old->energy_cores;
- old->energy_gfx = new->energy_gfx - old->energy_gfx;
- old->energy_dram = new->energy_dram - old->energy_dram;
- old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
- old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
+ /* flag an error when mc6 counter resets/wraps */
+ if (old->sam_mc6_ms > new->sam_mc6_ms)
+ old->sam_mc6_ms = -1;
+ else
+ old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
+
+ old->sam_mhz = new->sam_mhz;
+ old->sam_act_mhz = new->sam_act_mhz;
+
+ old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
+ old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
+ old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
+ old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
+ old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
+ old->rapl_dram_perf_status.raw_value =
+ new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2150,7 +2517,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->core_throt_cnt = new->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
- DELTA_WRAP32(new->core_energy, old->core_energy);
+ DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2277,6 +2644,13 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
return retval;
}
+void rapl_counter_clear(struct rapl_counter *c)
+{
+ c->raw_value = 0;
+ c->scale = 0.0;
+ c->unit = RAPL_UNIT_INVALID;
+}
+
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
@@ -2304,7 +2678,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->c7 = 0;
c->mc6_us = 0;
c->core_temp_c = 0;
- c->core_energy = 0;
+ rapl_counter_clear(&c->core_energy);
c->core_throt_cnt = 0;
p->pkg_wtd_core_c0 = 0;
@@ -2325,18 +2699,21 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->cpu_lpi = 0;
p->sys_lpi = 0;
- p->energy_pkg = 0;
- p->energy_dram = 0;
- p->energy_cores = 0;
- p->energy_gfx = 0;
- p->rapl_pkg_perf_status = 0;
- p->rapl_dram_perf_status = 0;
+ rapl_counter_clear(&p->energy_pkg);
+ rapl_counter_clear(&p->energy_dram);
+ rapl_counter_clear(&p->energy_cores);
+ rapl_counter_clear(&p->energy_gfx);
+ rapl_counter_clear(&p->rapl_pkg_perf_status);
+ rapl_counter_clear(&p->rapl_dram_perf_status);
p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
p->uncore_mhz = 0;
p->gfx_mhz = 0;
p->gfx_act_mhz = 0;
+ p->sam_mc6_ms = 0;
+ p->sam_mhz = 0;
+ p->sam_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
t->counter[i] = 0;
@@ -2347,6 +2724,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->counter[i] = 0;
}
+void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
+{
+ /* Copy unit and scale from src if dst is not initialized */
+ if (dst->unit == RAPL_UNIT_INVALID) {
+ dst->unit = src->unit;
+ dst->scale = src->scale;
+ }
+
+ assert(dst->unit == src->unit);
+ assert(dst->scale == src->scale);
+
+ dst->raw_value += src->raw_value;
+}
+
int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
@@ -2393,7 +2784,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
- average.cores.core_energy += c->core_energy;
+ rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2428,25 +2819,29 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.packages.cpu_lpi = p->cpu_lpi;
average.packages.sys_lpi = p->sys_lpi;
- average.packages.energy_pkg += p->energy_pkg;
- average.packages.energy_dram += p->energy_dram;
- average.packages.energy_cores += p->energy_cores;
- average.packages.energy_gfx += p->energy_gfx;
+ rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
+ rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
+ rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
+ rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
average.packages.uncore_mhz = p->uncore_mhz;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.gfx_act_mhz = p->gfx_act_mhz;
+ average.packages.sam_mc6_ms = p->sam_mc6_ms;
+ average.packages.sam_mhz = p->sam_mhz;
+ average.packages.sam_act_mhz = p->sam_act_mhz;
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
- average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
- average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+ rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
+ rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW)
- continue;
- average.packages.counter[i] += p->counter[i];
+ if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
+ average.packages.counter[i] = p->counter[i];
+ else
+ average.packages.counter[i] += p->counter[i];
}
return 0;
}
@@ -2578,6 +2973,7 @@ unsigned long long snapshot_sysfs_counter(char *path)
int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
{
if (mp->msr_num != 0) {
+ assert(!no_msr);
if (get_msr(cpu, mp->msr_num, counterp))
return -1;
} else {
@@ -2599,7 +2995,7 @@ unsigned long long get_uncore_mhz(int package, int die)
{
char path[128];
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package,
die);
return (snapshot_sysfs_counter(path) / 1000);
@@ -2627,6 +3023,9 @@ int get_epb(int cpu)
return epb;
msr_fallback:
+ if (no_msr)
+ return -1;
+
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
return msr & 0xf;
@@ -2700,6 +3099,351 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
return 0;
}
+struct amperf_group_fd {
+ int aperf; /* Also the group descriptor */
+ int mperf;
+};
+
+static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
+{
+ int fdmt;
+ int bytes_read;
+ char buf[64];
+ int ret = -1;
+
+ fdmt = open(path, O_RDONLY, 0);
+ if (fdmt == -1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ bytes_read = read(fdmt, buf, sizeof(buf) - 1);
+ if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ buf[bytes_read] = '\0';
+
+ if (sscanf(buf, parse_format, value_ptr) != 1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ ret = 0;
+
+cleanup_and_exit:
+ close(fdmt);
+ return ret;
+}
+
+static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
+{
+ unsigned int v;
+ int status;
+
+ status = read_perf_counter_info(path, parse_format, &v);
+ if (status)
+ v = -1;
+
+ return v;
+}
+
+static unsigned int read_msr_type(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/type";
+ const char *const format = "%u";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_aperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/aperf";
+ const char *const format = "event=%x";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_mperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/mperf";
+ const char *const format = "event=%x";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_perf_type(const char *subsys)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/type";
+ const char *const format = "%u";
+ char path[128];
+
+ snprintf(path, sizeof(path), path_format, subsys);
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_rapl_config(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
+ const char *const format = "event=%x";
+ char path[128];
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
+ const char *const format = "%s";
+ char path[128];
+ char unit_buffer[16];
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ read_perf_counter_info(path, format, &unit_buffer);
+ if (strcmp("Joules", unit_buffer) == 0)
+ return RAPL_UNIT_JOULES;
+
+ return RAPL_UNIT_INVALID;
+}
+
+static double read_perf_rapl_scale(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
+ const char *const format = "%lf";
+ char path[128];
+ double scale;
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ if (read_perf_counter_info(path, format, &scale))
+ return 0.0;
+
+ return scale;
+}
+
+static struct amperf_group_fd open_amperf_fd(int cpu)
+{
+ const unsigned int msr_type = read_msr_type();
+ const unsigned int aperf_config = read_aperf_config();
+ const unsigned int mperf_config = read_mperf_config();
+ struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 };
+
+ fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
+ fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
+
+ return fds;
+}
+
+static int get_amperf_fd(int cpu)
+{
+ assert(fd_amperf_percpu);
+
+ if (fd_amperf_percpu[cpu].aperf)
+ return fd_amperf_percpu[cpu].aperf;
+
+ fd_amperf_percpu[cpu] = open_amperf_fd(cpu);
+
+ return fd_amperf_percpu[cpu].aperf;
+}
+
+/* Read APERF, MPERF and TSC using the perf API. */
+static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu)
+{
+ union {
+ struct {
+ unsigned long nr_entries;
+ unsigned long aperf;
+ unsigned long mperf;
+ };
+
+ unsigned long as_array[3];
+ } cnt;
+
+ const int fd_amperf = get_amperf_fd(cpu);
+
+ /*
+ * Read the TSC with rdtsc, because we want the absolute value and not
+ * the offset from the start of the counter.
+ */
+ t->tsc = rdtsc();
+
+ const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array));
+
+ if (n != sizeof(cnt.as_array))
+ return -2;
+
+ t->aperf = cnt.aperf * aperf_mperf_multiplier;
+ t->mperf = cnt.mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
+static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu)
+{
+ unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+ int aperf_mperf_retry_count = 0;
+
+ /*
+ * The TSC, APERF and MPERF must be read together for
+ * APERF/MPERF and MPERF/TSC to give accurate results.
+ *
+ * Unfortunately, APERF and MPERF are read by
+ * individual system call, so delays may occur
+ * between them. If the time to read them
+ * varies by a large amount, we re-read them.
+ */
+
+ /*
+ * This initial dummy APERF read has been seen to
+ * reduce jitter in the subsequent reads.
+ */
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+retry:
+ t->tsc = rdtsc(); /* re-read close to APERF */
+
+ tsc_before = t->tsc;
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+ tsc_between = rdtsc();
+
+ if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
+ return -4;
+
+ tsc_after = rdtsc();
+
+ aperf_time = tsc_between - tsc_before;
+ mperf_time = tsc_after - tsc_between;
+
+ /*
+ * If the system call latency to read APERF and MPERF
+ * differ by more than 2x, then try again.
+ */
+ if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+ aperf_mperf_retry_count++;
+ if (aperf_mperf_retry_count < 5)
+ goto retry;
+ else
+ warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ }
+ aperf_mperf_retry_count = 0;
+
+ t->aperf = t->aperf * aperf_mperf_multiplier;
+ t->mperf = t->mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
+{
+ size_t ret = 0;
+
+ for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
+ if (rci->source[i] == RAPL_SOURCE_PERF)
+ ++ret;
+
+ return ret;
+}
+
+void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
+{
+ rc->raw_value = rci->data[idx];
+ rc->unit = rci->unit[idx];
+ rc->scale = rci->scale[idx];
+}
+
+int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain];
+
+ if (debug)
+ fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
+
+ assert(rapl_counter_info_perdomain);
+
+ /*
+ * If we have any perf counters to read, read them all now, in bulk
+ */
+ if (rci->fd_perf != -1) {
+ size_t num_perf_counters = rapl_counter_info_count_perf(rci);
+ const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
+ const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
+
+ if (actual_read_size != expected_read_size)
+ err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
+ actual_read_size);
+ }
+
+ for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
+ switch (rci->source[i]) {
+ case RAPL_SOURCE_NONE:
+ break;
+
+ case RAPL_SOURCE_PERF:
+ assert(pi < ARRAY_SIZE(perf_data));
+ assert(rci->fd_perf != -1);
+
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
+ i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
+
+ rci->data[i] = perf_data[pi];
+
+ ++pi;
+ break;
+
+ case RAPL_SOURCE_MSR:
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
+
+ assert(!no_msr);
+ if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
+ if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ } else {
+ if (get_msr(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ }
+
+ rci->data[i] &= rci->msr_mask[i];
+ if (rci->msr_shift[i] >= 0)
+ rci->data[i] >>= abs(rci->msr_shift[i]);
+ else
+ rci->data[i] <<= abs(rci->msr_shift[i]);
+
+ break;
+ }
+ }
+
+ _Static_assert(NUM_RAPL_COUNTERS == 7);
+ write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
+ write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
+ write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
+ write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
+ write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
+ write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
+ write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
+
+ return 0;
+}
+
/*
* get_counters(...)
* migrate to cpu
@@ -2709,12 +3453,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int cpu = t->cpu_id;
unsigned long long msr;
- int aperf_mperf_retry_count = 0;
struct msr_counter *mp;
int i;
+ int status;
if (cpu_migrate(cpu)) {
- fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
return -1;
}
@@ -2722,63 +3466,26 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (first_counter_read)
get_apic_id(t);
-retry:
+
t->tsc = rdtsc(); /* we are running on local CPU of interest */
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
|| soft_c1_residency_display(BIC_Avg_MHz)) {
- unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
-
- /*
- * The TSC, APERF and MPERF must be read together for
- * APERF/MPERF and MPERF/TSC to give accurate results.
- *
- * Unfortunately, APERF and MPERF are read by
- * individual system call, so delays may occur
- * between them. If the time to read them
- * varies by a large amount, we re-read them.
- */
-
- /*
- * This initial dummy APERF read has been seen to
- * reduce jitter in the subsequent reads.
- */
-
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
-
- t->tsc = rdtsc(); /* re-read close to APERF */
-
- tsc_before = t->tsc;
+ int status = -1;
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
+ assert(!no_perf || !no_msr);
- tsc_between = rdtsc();
-
- if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
- return -4;
-
- tsc_after = rdtsc();
-
- aperf_time = tsc_between - tsc_before;
- mperf_time = tsc_after - tsc_between;
-
- /*
- * If the system call latency to read APERF and MPERF
- * differ by more than 2x, then try again.
- */
- if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
- aperf_mperf_retry_count++;
- if (aperf_mperf_retry_count < 5)
- goto retry;
- else
- warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ switch (amperf_source) {
+ case AMPERF_SOURCE_PERF:
+ status = read_aperf_mperf_tsc_perf(t, cpu);
+ break;
+ case AMPERF_SOURCE_MSR:
+ status = read_aperf_mperf_tsc_msr(t, cpu);
+ break;
}
- aperf_mperf_retry_count = 0;
- t->aperf = t->aperf * aperf_mperf_multiplier;
- t->mperf = t->mperf * aperf_mperf_multiplier;
+ if (status != 0)
+ return status;
}
if (DO_BIC(BIC_IPC))
@@ -2806,6 +3513,12 @@ retry:
if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
+ if (platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, c->core_id, c, p);
+ if (status != 0)
+ return status;
+ }
+
if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
@@ -2846,12 +3559,6 @@ retry:
if (DO_BIC(BIC_CORE_THROT_CNT))
get_core_throt_cnt(cpu, &c->core_throt_cnt);
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
- return -14;
- c->core_energy = msr & 0xFFFFFFFF;
- }
-
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &c->counter[i]))
return -10;
@@ -2911,59 +3618,39 @@ retry:
if (DO_BIC(BIC_SYS_LPI))
p->sys_lpi = cpuidle_cur_sys_lpi_us;
- if (platform->rapl_msrs & RAPL_PKG) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
- return -13;
- p->energy_pkg = msr;
- }
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
- if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
- return -14;
- p->energy_cores = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM) {
- if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
- return -15;
- p->energy_dram = msr;
- }
- if (platform->rapl_msrs & RAPL_GFX) {
- if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
- return -16;
- p->energy_gfx = msr;
- }
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
- return -16;
- p->rapl_pkg_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
- return -16;
- p->rapl_dram_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
- return -13;
- p->energy_pkg = msr;
+ if (!platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, p->package_id, c, p);
+ if (status != 0)
+ return status;
}
+
if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return -17;
p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
- if (DO_BIC(BIC_GFX_rc6))
- p->gfx_rc6_ms = gfx_cur_rc6_ms;
-
/* n.b. assume die0 uncore frequency applies to whole package */
if (DO_BIC(BIC_UNCORE_MHZ))
p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
+ if (DO_BIC(BIC_GFX_rc6))
+ p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
+
if (DO_BIC(BIC_GFXMHz))
- p->gfx_mhz = gfx_cur_mhz;
+ p->gfx_mhz = gfx_info[GFX_MHz].val;
if (DO_BIC(BIC_GFXACTMHz))
- p->gfx_act_mhz = gfx_act_mhz;
+ p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
+
+ if (DO_BIC(BIC_SAM_mc6))
+ p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
+
+ if (DO_BIC(BIC_SAMMHz))
+ p->sam_mhz = gfx_info[SAM_MHz].val;
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &p->counter[i]))
@@ -3053,7 +3740,7 @@ void probe_cst_limit(void)
unsigned long long msr;
int *pkg_cstate_limits;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
switch (platform->cst_limit) {
@@ -3097,7 +3784,7 @@ static void dump_platform_info(void)
unsigned long long msr;
unsigned int ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -3115,7 +3802,7 @@ static void dump_power_ctl(void)
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -3321,7 +4008,7 @@ static void dump_cst_cfg(void)
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
@@ -3393,7 +4080,7 @@ void print_irtl(void)
{
unsigned long long msr;
- if (!platform->has_irtl_msrs)
+ if (!platform->has_irtl_msrs || no_msr)
return;
if (platform->supported_cstates & PC3) {
@@ -3443,12 +4130,64 @@ void free_fd_percpu(void)
{
int i;
+ if (!fd_percpu)
+ return;
+
for (i = 0; i < topo.max_cpu_num + 1; ++i) {
if (fd_percpu[i] != 0)
close(fd_percpu[i]);
}
free(fd_percpu);
+ fd_percpu = NULL;
+}
+
+void free_fd_amperf_percpu(void)
+{
+ int i;
+
+ if (!fd_amperf_percpu)
+ return;
+
+ for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_amperf_percpu[i].mperf != 0)
+ close(fd_amperf_percpu[i].mperf);
+
+ if (fd_amperf_percpu[i].aperf != 0)
+ close(fd_amperf_percpu[i].aperf);
+ }
+
+ free(fd_amperf_percpu);
+ fd_amperf_percpu = NULL;
+}
+
+void free_fd_instr_count_percpu(void)
+{
+ if (!fd_instr_count_percpu)
+ return;
+
+ for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_instr_count_percpu[i] != 0)
+ close(fd_instr_count_percpu[i]);
+ }
+
+ free(fd_instr_count_percpu);
+ fd_instr_count_percpu = NULL;
+}
+
+void free_fd_rapl_percpu(void)
+{
+ if (!rapl_counter_info_perdomain)
+ return;
+
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
+ close(rapl_counter_info_perdomain[domain_id].fd_perf);
+ }
+
+ free(rapl_counter_info_perdomain);
}
void free_all_buffers(void)
@@ -3492,6 +4231,9 @@ void free_all_buffers(void)
outp = NULL;
free_fd_percpu();
+ free_fd_instr_count_percpu();
+ free_fd_amperf_percpu();
+ free_fd_rapl_percpu();
free(irq_column_2_cpu);
free(irqs_per_cpu);
@@ -3825,11 +4567,17 @@ static void update_effective_set(bool startup)
err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
}
+void linux_perf_init(void);
+void rapl_perf_init(void);
+
void re_initialize(void)
{
free_all_buffers();
setup_all_buffers(false);
- fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
+ linux_perf_init();
+ rapl_perf_init();
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
+ topo.allowed_cpus);
}
void set_max_cpu_num(void)
@@ -3940,85 +4688,43 @@ int snapshot_proc_interrupts(void)
}
/*
- * snapshot_gfx_rc6_ms()
+ * snapshot_graphics()
*
- * record snapshot of
- * /sys/class/drm/card0/power/rc6_residency_ms
+ * record snapshot of specified graphics sysfs knob
*
* return 1 if config change requires a restart, else return 0
*/
-int snapshot_gfx_rc6_ms(void)
+int snapshot_graphics(int idx)
{
FILE *fp;
int retval;
- fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
-
- retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
- if (retval != 1)
- err(1, "GFX rc6");
-
- fclose(fp);
-
- return 0;
-}
-
-/*
- * snapshot_gfx_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
- * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
- }
-
- retval = fscanf(fp, "%d", &gfx_cur_mhz);
- if (retval != 1)
- err(1, "GFX MHz");
-
- return 0;
-}
-
-/*
- * snapshot_gfx_cur_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
- * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_act_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
+ switch (idx) {
+ case GFX_rc6:
+ case SAM_mc6:
+ fp = fopen_or_die(gfx_info[idx].path, "r");
+ retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull);
+ if (retval != 1)
+ err(1, "rc6");
+ fclose(fp);
+ return 0;
+ case GFX_MHz:
+ case GFX_ACTMHz:
+ case SAM_MHz:
+ case SAM_ACTMHz:
+ if (gfx_info[idx].fp == NULL) {
+ gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r");
+ } else {
+ rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
+ }
+ retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
+ if (retval != 1)
+ err(1, "MHz");
+ return 0;
+ default:
+ return -EINVAL;
}
-
- retval = fscanf(fp, "%d", &gfx_act_mhz);
- if (retval != 1)
- err(1, "GFX ACT MHz");
-
- return 0;
}
/*
@@ -4083,13 +4789,22 @@ int snapshot_proc_sysfs_files(void)
return 1;
if (DO_BIC(BIC_GFX_rc6))
- snapshot_gfx_rc6_ms();
+ snapshot_graphics(GFX_rc6);
if (DO_BIC(BIC_GFXMHz))
- snapshot_gfx_mhz();
+ snapshot_graphics(GFX_MHz);
if (DO_BIC(BIC_GFXACTMHz))
- snapshot_gfx_act_mhz();
+ snapshot_graphics(GFX_ACTMHz);
+
+ if (DO_BIC(BIC_SAM_mc6))
+ snapshot_graphics(SAM_mc6);
+
+ if (DO_BIC(BIC_SAMMHz))
+ snapshot_graphics(SAM_MHz);
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ snapshot_graphics(SAM_ACTMHz);
if (DO_BIC(BIC_CPU_LPI))
snapshot_cpu_lpi_us();
@@ -4173,6 +4888,8 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
int ret, idx;
unsigned long long msr_cur, msr_last;
+ assert(!no_msr);
+
if (!per_cpu_msr_sum)
return 1;
@@ -4201,6 +4918,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
UNUSED(c);
UNUSED(p);
+ assert(!no_msr);
+
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
unsigned long long msr_cur, msr_last;
off_t offset;
@@ -4280,7 +4999,8 @@ release_msr:
/*
* set_my_sched_priority(pri)
- * return previous
+ * return previous priority on success
+ * return value < -20 on failure
*/
int set_my_sched_priority(int priority)
{
@@ -4290,16 +5010,16 @@ int set_my_sched_priority(int priority)
errno = 0;
original_priority = getpriority(PRIO_PROCESS, 0);
if (errno && (original_priority == -1))
- err(errno, "getpriority");
+ return -21;
retval = setpriority(PRIO_PROCESS, 0, priority);
if (retval)
- errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
+ return -21;
errno = 0;
retval = getpriority(PRIO_PROCESS, 0);
if (retval != priority)
- err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
+ return -21;
return original_priority;
}
@@ -4314,6 +5034,9 @@ void turbostat_loop()
/*
* elevate own priority for interval mode
+ *
+ * ignore on error - we probably don't have permission to set it, but
+ * it's not a big deal
*/
set_my_sched_priority(-20);
@@ -4399,10 +5122,13 @@ void check_dev_msr()
struct stat sb;
char pathname[32];
+ if (no_msr)
+ return;
+
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
- err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ no_msr = 1;
}
/*
@@ -4414,47 +5140,51 @@ int check_for_cap_sys_rawio(void)
{
cap_t caps;
cap_flag_value_t cap_flag_value;
+ int ret = 0;
caps = cap_get_proc();
if (caps == NULL)
- err(-6, "cap_get_proc\n");
+ return 1;
- if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
- err(-6, "cap_get\n");
+ if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
+ ret = 1;
+ goto free_and_exit;
+ }
if (cap_flag_value != CAP_SET) {
- warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
- return 1;
+ ret = 1;
+ goto free_and_exit;
}
+free_and_exit:
if (cap_free(caps) == -1)
err(-6, "cap_free\n");
- return 0;
+ return ret;
}
-void check_permissions(void)
+void check_msr_permission(void)
{
- int do_exit = 0;
+ int failed = 0;
char pathname[32];
+ if (no_msr)
+ return;
+
/* check for CAP_SYS_RAWIO */
- do_exit += check_for_cap_sys_rawio();
+ failed += check_for_cap_sys_rawio();
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
- do_exit++;
- warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+ failed++;
}
- /* if all else fails, thell them to be root */
- if (do_exit)
- if (getuid() != 0)
- warnx("... or simply run as root");
-
- if (do_exit)
- exit(-6);
+ if (failed) {
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
+ no_msr = 1;
+ }
}
void probe_bclk(void)
@@ -4462,7 +5192,7 @@ void probe_bclk(void)
unsigned long long msr;
unsigned int base_ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->bclk_freq == BCLK_100MHZ)
@@ -4502,7 +5232,7 @@ static void dump_turbo_ratio_info(void)
if (!has_turbo)
return;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->trl_msrs & TRL_LIMIT2)
@@ -4567,20 +5297,15 @@ static void dump_sysfs_file(char *path)
static void probe_intel_uncore_frequency(void)
{
int i, j;
- char path[128];
+ char path[256];
if (!genuine_intel)
return;
- if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
- return;
-
- /* Cluster level sysfs not supported yet. */
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
- return;
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+ goto probe_cluster;
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
- BIC_PRESENT(BIC_UNCORE_MHZ);
+ BIC_PRESENT(BIC_UNCORE_MHZ);
if (quiet)
return;
@@ -4588,40 +5313,178 @@ static void probe_intel_uncore_frequency(void)
for (i = 0; i < topo.num_packages; ++i) {
for (j = 0; j < topo.num_die; ++j) {
int k, l;
+ char path_base[128];
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
+ j);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
- i, j);
+ sprintf(path, "%s/min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
- i, j);
+ sprintf(path, "%s/max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+ fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
}
}
+ return;
+
+probe_cluster:
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
+ return;
+
+ if (quiet)
+ return;
+
+ for (i = 0;; ++i) {
+ int k, l;
+ char path_base[128];
+ int package_id, domain_id, cluster_id;
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
+
+ if (access(path_base, R_OK))
+ break;
+
+ sprintf(path, "%s/package_id", path_base);
+ package_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/domain_id", path_base);
+ domain_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/fabric_cluster_id", path_base);
+ cluster_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
+ cluster_id, k / 1000, l / 1000);
+
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
+ }
}
static void probe_graphics(void)
{
+ /* Xe graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
+ FILE *fp;
+ char buf[8];
+ bool gt0_is_gt;
+ int idx;
+
+ fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
+ if (!fp)
+ goto next;
+
+ if (!fread(buf, sizeof(char), 7, fp)) {
+ fclose(fp);
+ goto next;
+ }
+ fclose(fp);
+
+ if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
+ gt0_is_gt = true;
+ else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
+ gt0_is_gt = false;
+ else
+ goto next;
+
+ idx = gt0_is_gt ? GFX_rc6 : SAM_mc6;
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? GFX_MHz : SAM_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
+
+ idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
+
+ idx = gt0_is_gt ? SAM_mc6 : GFX_rc6;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? SAM_MHz : GFX_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
+
+ idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
+
+ goto end;
+ }
+
+next:
+ /* New i915 graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) {
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK))
+ gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK))
+ gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK))
+ gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
+
+ goto end;
+ }
+
+ /* Fall back to traditional i915 graphics sysfs knobs */
if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
- BIC_PRESENT(BIC_GFX_rc6);
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
- if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXMHz);
- if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
+
+end:
+ if (gfx_info[GFX_rc6].path)
+ BIC_PRESENT(BIC_GFX_rc6);
+ if (gfx_info[GFX_MHz].path)
+ BIC_PRESENT(BIC_GFXMHz);
+ if (gfx_info[GFX_ACTMHz].path)
BIC_PRESENT(BIC_GFXACTMHz);
+ if (gfx_info[SAM_mc6].path)
+ BIC_PRESENT(BIC_SAM_mc6);
+ if (gfx_info[SAM_MHz].path)
+ BIC_PRESENT(BIC_SAMMHz);
+ if (gfx_info[SAM_ACTMHz].path)
+ BIC_PRESENT(BIC_SAMACTMHz);
}
static void dump_sysfs_cstate_config(void)
@@ -4783,6 +5646,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!has_hwp)
return 0;
@@ -4869,6 +5735,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
cpu = t->cpu_id;
/* per-package */
@@ -4983,31 +5852,18 @@ void rapl_probe_intel(void)
unsigned long long msr;
unsigned int time_unit;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
- if (rapl_joules) {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_Pkg_J);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_Cor_J);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAM_J);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFX_J);
- } else {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_PkgWatt);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_CorWatt);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAMWatt);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFXWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
- BIC_PRESENT(BIC_PKG__);
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
- BIC_PRESENT(BIC_RAM__);
+ if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
+ bic_enabled &= ~BIC_PKG__;
+ if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
+ bic_enabled &= ~BIC_RAM__;
/* units on package 0, verify later other packages match */
if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
@@ -5041,14 +5897,13 @@ void rapl_probe_amd(void)
{
unsigned long long msr;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
return;
@@ -5202,7 +6057,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
*/
void probe_rapl(void)
{
- if (!platform->rapl_msrs)
+ if (!platform->rapl_msrs || no_msr)
return;
if (genuine_intel)
@@ -5258,7 +6113,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
}
/* Temperature Target MSR is Nehalem and newer only */
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
goto guess;
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5305,6 +6160,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!(do_dts || do_ptm))
return 0;
@@ -5402,6 +6260,9 @@ void decode_feature_control_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
@@ -5411,6 +6272,9 @@ void decode_misc_enable_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!genuine_intel)
return;
@@ -5428,6 +6292,9 @@ void decode_misc_feature_control(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_feature_control)
return;
@@ -5449,6 +6316,9 @@ void decode_misc_pwr_mgmt_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_pwr_mgmt)
return;
@@ -5468,6 +6338,9 @@ void decode_c6_demotion_policy_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_c6_demotion_policy_config)
return;
@@ -5489,7 +6362,8 @@ void print_dev_latency(void)
fd = open(path, O_RDONLY);
if (fd < 0) {
- warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
+ if (debug)
+ warnx("Read %s failed", path);
return;
}
@@ -5504,23 +6378,260 @@ void print_dev_latency(void)
close(fd);
}
+static int has_instr_count_access(void)
+{
+ int fd;
+ int has_access;
+
+ if (no_perf)
+ return 0;
+
+ fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
+ has_access = fd != -1;
+
+ if (fd != -1)
+ close(fd);
+
+ if (!has_access)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "instructions retired perf counter", "--no-perf");
+
+ return has_access;
+}
+
+bool is_aperf_access_required(void)
+{
+ return BIC_IS_ENABLED(BIC_Avg_MHz)
+ || BIC_IS_ENABLED(BIC_Busy)
+ || BIC_IS_ENABLED(BIC_Bzy_MHz)
+ || BIC_IS_ENABLED(BIC_IPC);
+}
+
+int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale_, enum rapl_unit *unit_)
+{
+ if (no_perf)
+ return -1;
+
+ const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name);
+
+ if (scale == 0.0)
+ return -1;
+
+ const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
+
+ if (unit == RAPL_UNIT_INVALID)
+ return -1;
+
+ const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
+ const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name);
+
+ const int fd_counter =
+ open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+ if (fd_counter == -1)
+ return -1;
+
+ /* If it's the first counter opened, make it a group descriptor */
+ if (rci->fd_perf == -1)
+ rci->fd_perf = fd_counter;
+
+ *scale_ = scale;
+ *unit_ = unit;
+ return fd_counter;
+}
+
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale, enum rapl_unit *unit)
+{
+ int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+
+ if (debug)
+ fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
+
+ return ret;
+}
+
/*
* Linux-perf manages the HW instructions-retired counter
* by enabling when requested, and hiding rollover
*/
void linux_perf_init(void)
{
- if (!BIC_IS_ENABLED(BIC_IPC))
- return;
-
if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
return;
- fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
- if (fd_instr_count_percpu == NULL)
- err(-1, "calloc fd_instr_count_percpu");
+ if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
+ fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (fd_instr_count_percpu == NULL)
+ err(-1, "calloc fd_instr_count_percpu");
+ }
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) {
+ fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu));
+ if (fd_amperf_percpu == NULL)
+ err(-1, "calloc fd_amperf_percpu");
+ }
+}
+
+void rapl_perf_init(void)
+{
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+ bool *domain_visited = calloc(num_domains, sizeof(bool));
+
+ rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
+ if (rapl_counter_info_perdomain == NULL)
+ err(-1, "calloc rapl_counter_info_percpu");
+
+ /*
+ * Initialize rapl_counter_info_percpu
+ */
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
+
+ rci->fd_perf = -1;
+ for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
+ rci->data[i] = 0;
+ rci->source[i] = RAPL_SOURCE_NONE;
+ }
+ }
- BIC_PRESENT(BIC_IPC);
+ /*
+ * Open/probe the counters
+ * If can't get it via perf, fallback to MSR
+ */
+ for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
+
+ const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
+ bool has_counter = 0;
+ double scale;
+ enum rapl_unit unit;
+ int next_domain;
+
+ memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
+
+ for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
+
+ if (cpu_is_not_allowed(cpu))
+ continue;
+
+ /* Skip already seen and handled RAPL domains */
+ next_domain =
+ platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+
+ if (domain_visited[next_domain])
+ continue;
+
+ domain_visited[next_domain] = 1;
+
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
+
+ /* Check if the counter is enabled and accessible */
+ if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+
+ /* Use perf API for this counter */
+ if (!no_perf && cai->perf_name
+ && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_PERF;
+ rci->scale[cai->rci_index] = scale * cai->compat_scale;
+ rci->unit[cai->rci_index] = unit;
+ rci->flags[cai->rci_index] = cai->flags;
+
+ /* Use MSR for this counter */
+ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_MSR;
+ rci->msr[cai->rci_index] = cai->msr;
+ rci->msr_mask[cai->rci_index] = cai->msr_mask;
+ rci->msr_shift[cai->rci_index] = cai->msr_shift;
+ rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+ rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+ rci->flags[cai->rci_index] = cai->flags;
+ }
+ }
+
+ if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE)
+ has_counter = 1;
+ }
+
+ /* If any CPU has access to the counter, make it present */
+ if (has_counter)
+ BIC_PRESENT(cai->bic);
+ }
+
+ free(domain_visited);
+}
+
+static int has_amperf_access_via_msr(void)
+{
+ if (no_msr)
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_APERF))
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_MPERF))
+ return 0;
+
+ return 1;
+}
+
+static int has_amperf_access_via_perf(void)
+{
+ struct amperf_group_fd fds;
+
+ /*
+ * Cache the last result, so we don't warn the user multiple times
+ *
+ * Negative means cached, no access
+ * Zero means not cached
+ * Positive means cached, has access
+ */
+ static int has_access_cached;
+
+ if (no_perf)
+ return 0;
+
+ if (has_access_cached != 0)
+ return has_access_cached > 0;
+
+ fds = open_amperf_fd(base_cpu);
+ has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
+
+ if (fds.aperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "APERF perf counter", "--no-perf");
+ else
+ close(fds.aperf);
+
+ if (fds.mperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "MPERF perf counter", "--no-perf");
+ else
+ close(fds.mperf);
+
+ if (has_access_cached == 0)
+ has_access_cached = -1;
+
+ return has_access_cached > 0;
+}
+
+/* Check if we can access APERF and MPERF */
+static int has_amperf_access(void)
+{
+ if (!is_aperf_access_required())
+ return 0;
+
+ if (!no_msr && has_amperf_access_via_msr())
+ return 1;
+
+ if (!no_perf && has_amperf_access_via_perf())
+ return 1;
+
+ return 0;
}
void probe_cstates(void)
@@ -5563,7 +6674,7 @@ void probe_cstates(void)
if (platform->has_msr_module_c6_res_ms)
BIC_PRESENT(BIC_Mod_c6);
- if (platform->has_ext_cst_msrs) {
+ if (platform->has_ext_cst_msrs && !no_msr) {
BIC_PRESENT(BIC_Totl_c0);
BIC_PRESENT(BIC_Any_c0);
BIC_PRESENT(BIC_GFX_c0);
@@ -5623,6 +6734,7 @@ void process_cpuid()
unsigned int eax, ebx, ecx, edx;
unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
unsigned long long ucode_patch = 0;
+ bool ucode_patch_valid = false;
eax = ebx = ecx = edx = 0;
@@ -5650,8 +6762,12 @@ void process_cpuid()
ecx_flags = ecx;
edx_flags = edx;
- if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
- warnx("get_msr(UCODE)");
+ if (!no_msr) {
+ if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+ warnx("get_msr(UCODE)");
+ else
+ ucode_patch_valid = true;
+ }
/*
* check max extended function levels of CPUID.
@@ -5662,9 +6778,12 @@ void process_cpuid()
__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
if (!quiet) {
- fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
- family, model, stepping, family, model, stepping,
- (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
+ family, model, stepping, family, model, stepping);
+ if (ucode_patch_valid)
+ fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fputc('\n', outf);
+
fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
ecx_flags & (1 << 0) ? "SSE3" : "-",
@@ -5700,10 +6819,11 @@ void process_cpuid()
__cpuid(0x6, eax, ebx, ecx, edx);
has_aperf = ecx & (1 << 0);
- if (has_aperf) {
+ if (has_aperf && has_amperf_access()) {
BIC_PRESENT(BIC_Avg_MHz);
BIC_PRESENT(BIC_Busy);
BIC_PRESENT(BIC_Bzy_MHz);
+ BIC_PRESENT(BIC_IPC);
}
do_dts = eax & (1 << 0);
if (do_dts)
@@ -5786,6 +6906,15 @@ void process_cpuid()
base_mhz = max_mhz = bus_mhz = edx = 0;
__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+
+ bclk = bus_mhz;
+
+ base_hz = base_mhz * 1000000;
+ has_base_hz = 1;
+
+ if (platform->enable_tsc_tweak)
+ tsc_tweak = base_hz / tsc_hz;
+
if (!quiet)
fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
base_mhz, max_mhz, bus_mhz);
@@ -5814,7 +6943,7 @@ void probe_pm_features(void)
probe_thermal();
- if (platform->has_nhm_msrs)
+ if (platform->has_nhm_msrs && !no_msr)
BIC_PRESENT(BIC_SMI);
if (!quiet)
@@ -6142,6 +7271,7 @@ void topology_update(void)
topo.allowed_packages = 0;
for_all_cpus(update_topo, ODD_COUNTERS);
}
+
void setup_all_buffers(bool startup)
{
topology_probe(startup);
@@ -6169,21 +7299,129 @@ void set_base_cpu(void)
err(-ENODEV, "No valid cpus found");
}
+static void set_amperf_source(void)
+{
+ amperf_source = AMPERF_SOURCE_PERF;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (no_perf || !aperf_required || !has_amperf_access_via_perf())
+ amperf_source = AMPERF_SOURCE_MSR;
+
+ if (quiet || !debug)
+ return;
+
+ fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
+}
+
+bool has_added_counters(void)
+{
+ /*
+ * It only makes sense to call this after the command line is parsed,
+ * otherwise sys structure is not populated.
+ */
+
+ return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
+}
+
+bool is_msr_access_required(void)
+{
+ if (no_msr)
+ return false;
+
+ if (has_added_counters())
+ return true;
+
+ return BIC_IS_ENABLED(BIC_SMI)
+ || BIC_IS_ENABLED(BIC_CPU_c1)
+ || BIC_IS_ENABLED(BIC_CPU_c3)
+ || BIC_IS_ENABLED(BIC_CPU_c6)
+ || BIC_IS_ENABLED(BIC_CPU_c7)
+ || BIC_IS_ENABLED(BIC_Mod_c6)
+ || BIC_IS_ENABLED(BIC_CoreTmp)
+ || BIC_IS_ENABLED(BIC_Totl_c0)
+ || BIC_IS_ENABLED(BIC_Any_c0)
+ || BIC_IS_ENABLED(BIC_GFX_c0)
+ || BIC_IS_ENABLED(BIC_CPUGFX)
+ || BIC_IS_ENABLED(BIC_Pkgpc3)
+ || BIC_IS_ENABLED(BIC_Pkgpc6)
+ || BIC_IS_ENABLED(BIC_Pkgpc2)
+ || BIC_IS_ENABLED(BIC_Pkgpc7)
+ || BIC_IS_ENABLED(BIC_Pkgpc8)
+ || BIC_IS_ENABLED(BIC_Pkgpc9)
+ || BIC_IS_ENABLED(BIC_Pkgpc10)
+ /* TODO: Multiplex access with perf */
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_PkgWatt)
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_GFXWatt)
+ || BIC_IS_ENABLED(BIC_RAMWatt)
+ || BIC_IS_ENABLED(BIC_Pkg_J)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_GFX_J)
+ || BIC_IS_ENABLED(BIC_RAM_J)
+ || BIC_IS_ENABLED(BIC_PKG__)
+ || BIC_IS_ENABLED(BIC_RAM__)
+ || BIC_IS_ENABLED(BIC_PkgTmp)
+ || (is_aperf_access_required() && !has_amperf_access_via_perf());
+}
+
+void check_msr_access(void)
+{
+ if (!is_msr_access_required())
+ no_msr = 1;
+
+ check_dev_msr();
+ check_msr_permission();
+
+ if (no_msr)
+ bic_disable_msr_access();
+}
+
+void check_perf_access(void)
+{
+ const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC);
+
+ if (no_perf || !intrcount_required || !has_instr_count_access())
+ bic_enabled &= ~BIC_IPC;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (!aperf_required || !has_amperf_access()) {
+ bic_enabled &= ~BIC_Avg_MHz;
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ bic_enabled &= ~BIC_IPC;
+ }
+}
+
void turbostat_init()
{
setup_all_buffers(true);
set_base_cpu();
- check_dev_msr();
- check_permissions();
+ check_msr_access();
+ check_perf_access();
process_cpuid();
probe_pm_features();
+ set_amperf_source();
linux_perf_init();
+ rapl_perf_init();
for_all_cpus(get_cpu_type, ODD_COUNTERS);
for_all_cpus(get_cpu_type, EVEN_COUNTERS);
if (DO_BIC(BIC_IPC))
(void)get_instr_count_fd(base_cpu);
+
+ /*
+ * If TSC tweak is needed, but couldn't get it,
+ * disable more BICs, since it can't be reported accurately.
+ */
+ if (platform->enable_tsc_tweak && !has_base_hz) {
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ }
}
int fork_it(char **argv)
@@ -6259,7 +7497,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2024.04.08 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -6291,6 +7529,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
{
struct msr_counter *msrp;
+ if (no_msr && msr_num)
+ errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
+
msrp = calloc(1, sizeof(struct msr_counter));
if (msrp == NULL) {
perror("calloc");
@@ -6595,6 +7836,8 @@ void cmdline(int argc, char **argv)
{ "list", no_argument, 0, 'l' },
{ "out", required_argument, 0, 'o' },
{ "quiet", no_argument, 0, 'q' },
+ { "no-msr", no_argument, 0, 'M' },
+ { "no-perf", no_argument, 0, 'P' },
{ "show", required_argument, 0, 's' },
{ "Summary", no_argument, 0, 'S' },
{ "TCC", required_argument, 0, 'T' },
@@ -6604,7 +7847,25 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
+ /*
+ * Parse some options early, because they may make other options invalid,
+ * like adding the MSR counter with --add and at the same time using --no-msr.
+ */
+ while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'M':
+ no_msr = 1;
+ break;
+ case 'P':
+ no_perf = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ optind = 0;
+
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
parse_add_command(optarg);
@@ -6662,6 +7923,10 @@ void cmdline(int argc, char **argv)
case 'q':
quiet = 1;
break;
+ case 'M':
+ case 'P':
+ /* Parsed earlier */
+ break;
case 'n':
num_iterations = strtod(optarg, NULL);
@@ -6704,6 +7969,22 @@ void cmdline(int argc, char **argv)
}
}
+void set_rlimit(void)
+{
+ struct rlimit limit;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to get rlimit");
+
+ if (limit.rlim_max < MAX_NOFILE)
+ limit.rlim_max = MAX_NOFILE;
+ if (limit.rlim_cur < MAX_NOFILE)
+ limit.rlim_cur = MAX_NOFILE;
+
+ if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to set rlimit");
+}
+
int main(int argc, char **argv)
{
int fd, ret;
@@ -6729,9 +8010,13 @@ skip_cgroup_setting:
probe_sysfs();
+ if (!getuid())
+ set_rlimit();
+
turbostat_init();
- msr_sum_record();
+ if (!no_msr)
+ msr_sum_record();
/* dump counters and exit */
if (dump_only)
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 908e0d0839369c..61c69297e7978f 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -986,10 +986,12 @@ static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
{
dpa_perf->qos_class = FAKE_QTG_ID;
dpa_perf->dpa_range = *range;
- dpa_perf->coord.read_latency = 500;
- dpa_perf->coord.write_latency = 500;
- dpa_perf->coord.read_bandwidth = 1000;
- dpa_perf->coord.write_bandwidth = 1000;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ dpa_perf->coord[i].read_latency = 500;
+ dpa_perf->coord[i].write_latency = 500;
+ dpa_perf->coord[i].read_bandwidth = 1000;
+ dpa_perf->coord[i].write_bandwidth = 1000;
+ }
}
static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index aa5ec149f96c16..b3b00269a52aac 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -28,6 +28,8 @@ CONFIG_MCTP_FLOWS=y
CONFIG_INET=y
CONFIG_MPTCP=y
+CONFIG_NETDEVICES=y
+CONFIG_WLAN=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_WLAN_VENDOR_INTEL=y
@@ -38,6 +40,7 @@ CONFIG_DAMON_VADDR=y
CONFIG_DAMON_PADDR=y
CONFIG_DEBUG_FS=y
CONFIG_DAMON_DBGFS=y
+CONFIG_DAMON_DBGFS_DEPRECATED=y
CONFIG_REGMAP_BUILD=y
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
index bcf195c64a45c1..567491f3e1b51b 100644
--- a/tools/testing/selftests/bpf/bpf_arena_common.h
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -32,7 +32,7 @@
*/
#endif
-#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
#define __arena __attribute__((address_space(1)))
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
index 0766702de84657..d69fd2465f5367 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_htab.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
@@ -3,12 +3,14 @@
#include <test_progs.h>
#include <sys/mman.h>
#include <network_helpers.h>
-
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
#include "arena_htab_asm.skel.h"
#include "arena_htab.skel.h"
-#define PAGE_SIZE 4096
-
#include "bpf_arena_htab.h"
static void test_arena_htab_common(struct htab *htab)
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
index e61886debab127..d15867cddde06a 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -3,8 +3,11 @@
#include <test_progs.h>
#include <sys/mman.h>
#include <network_helpers.h>
-
-#define PAGE_SIZE 4096
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
#include "bpf_arena_list.h"
#include "arena_list.skel.h"
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index 053f4d6da77a48..cc184e4420f6e3 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2021 Facebook */
#include <sys/syscall.h>
+#include <limits.h>
#include <test_progs.h>
#include "bloom_filter_map.skel.h"
@@ -21,6 +22,11 @@ static void test_fail_cases(void)
if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0"))
close(fd);
+ /* Invalid value size: too big */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, INT32_MAX, 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value too large"))
+ close(fd);
+
/* Invalid max entries size */
fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL);
if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size"))
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 985273832f891c..c4f9f306646ed3 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -5,6 +5,7 @@
#include "cap_helpers.h"
#include "verifier_and.skel.h"
#include "verifier_arena.skel.h"
+#include "verifier_arena_large.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_basic_stack.skel.h"
#include "verifier_bitfield_write.skel.h"
@@ -120,6 +121,7 @@ static void run_tests_aux(const char *skel_name,
void test_verifier_and(void) { RUN(verifier_and); }
void test_verifier_arena(void) { RUN(verifier_arena); }
+void test_verifier_arena_large(void) { RUN(verifier_arena_large); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
index b7bb712cacfdcc..1e6ac187a6a0ce 100644
--- a/tools/testing/selftests/bpf/progs/arena_htab.c
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -22,7 +22,7 @@ int zero = 0;
SEC("syscall")
int arena_htab_llvm(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM)
struct htab __arena *htab;
__u64 i;
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
index cd35b844843560..c0422c58cee2c5 100644
--- a/tools/testing/selftests/bpf/progs/arena_list.c
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -30,13 +30,13 @@ int list_sum;
int cnt;
bool skip = false;
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
long __arena arena_sum;
int __arena test_val = 1;
struct arena_list_head __arena global_head;
#else
-long arena_sum SEC(".arena.1");
-int test_val SEC(".arena.1");
+long arena_sum SEC(".addr_space.1");
+int test_val SEC(".addr_space.1");
#endif
int zero;
@@ -44,7 +44,7 @@ int zero;
SEC("syscall")
int arena_list_add(void *ctx)
{
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
__u64 i;
list_head = &global_head;
@@ -66,7 +66,7 @@ int arena_list_add(void *ctx)
SEC("syscall")
int arena_list_del(void *ctx)
{
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
struct elem __arena *n;
int sum = 0;
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 5540b05ff9ee13..93144ae6df7412 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -12,14 +12,18 @@ struct {
__uint(type, BPF_MAP_TYPE_ARENA);
__uint(map_flags, BPF_F_MMAPABLE);
__uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
- __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#endif
} arena SEC(".maps");
SEC("syscall")
__success __retval(0)
int basic_alloc1(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile int __arena *page1, *page2, *no_page, *page3;
page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
@@ -58,7 +62,7 @@ SEC("syscall")
__success __retval(0)
int basic_alloc2(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page1, *page2, *page3, *page4;
page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
new file mode 100644
index 00000000000000..ef66ea460264c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+#define ARENA_SIZE (1ull << 32)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_SIZE / PAGE_SIZE);
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int big_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page1, *page2, *no_page, *page3;
+ void __arena *base;
+
+ page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 1);
+ if (*page2 != 2)
+ return 6;
+ if (*page1 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page1 != page3)
+ return 9;
+ if (*page2 != 2)
+ return 10;
+ if (*(page1 + PAGE_SIZE) != 0)
+ return 11;
+ if (*(page1 - PAGE_SIZE) != 0)
+ return 12;
+ if (*(page2 + PAGE_SIZE) != 0)
+ return 13;
+ if (*(page2 - PAGE_SIZE) != 0)
+ return 14;
+#endif
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/drivers/net/netdevsim/settings b/tools/testing/selftests/drivers/net/netdevsim/settings
new file mode 100644
index 00000000000000..a62d2fa1275c6b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/settings
@@ -0,0 +1 @@
+timeout=600
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index a0b8688b083694..fb4472ddffd81b 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -19,8 +19,8 @@ include ../lib.mk
$(OUTPUT)/subdir:
mkdir -p $@
-$(OUTPUT)/script:
- echo '#!/bin/sh' > $@
+$(OUTPUT)/script: Makefile
+ echo '#!/bin/bash' > $@
echo 'exit $$*' >> $@
chmod +x $@
$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
diff --git a/tools/testing/selftests/exec/binfmt_script.py b/tools/testing/selftests/exec/binfmt_script.py
index 05f94a741c7aa0..2c575a2c0eab41 100755
--- a/tools/testing/selftests/exec/binfmt_script.py
+++ b/tools/testing/selftests/exec/binfmt_script.py
@@ -16,6 +16,8 @@ SIZE=256
NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
test_num=0
+pass_num=0
+fail_num=0
code='''#!/usr/bin/perl
print "Executed interpreter! Args:\n";
@@ -42,7 +44,7 @@ foreach my $a (@ARGV) {
# ...
def test(name, size, good=True, leading="", root="./", target="/perl",
fill="A", arg="", newline="\n", hashbang="#!"):
- global test_num, tests, NAME_MAX
+ global test_num, pass_num, fail_num, tests, NAME_MAX
test_num += 1
if test_num > tests:
raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
@@ -80,16 +82,20 @@ def test(name, size, good=True, leading="", root="./", target="/perl",
if good:
print("ok %d - binfmt_script %s (successful good exec)"
% (test_num, name))
+ pass_num += 1
else:
print("not ok %d - binfmt_script %s succeeded when it should have failed"
% (test_num, name))
+ fail_num = 1
else:
if good:
print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
% (test_num, name, proc.returncode))
+ fail_num = 1
else:
print("ok %d - binfmt_script %s (correctly failed bad exec)"
% (test_num, name))
+ pass_num += 1
# Clean up crazy binaries
os.unlink(script)
@@ -166,6 +172,8 @@ test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
test(name="two-under-leading", size=int(SIZE/2), leading=" ")
test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+print("# Totals: pass:%d fail:%d xfail:0 xpass:0 skip:0 error:0" % (pass_num, fail_num))
+
if test_num != tests:
raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
% (test_num, tests))
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 0546ca24f2b20c..6418ded40bdddc 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -98,10 +98,9 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags,
if (child == 0) {
/* Child: do execveat(). */
rc = execveat_(fd, path, argv, envp, flags);
- ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n",
+ ksft_print_msg("child execveat() failed, rc=%d errno=%d (%s)\n",
rc, errno, strerror(errno));
- ksft_test_result_fail("%s\n", test_name);
- exit(1); /* should not reach here */
+ exit(errno);
}
/* Parent: wait for & check child's exit status. */
rc = waitpid(child, &status, 0);
@@ -226,11 +225,14 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
* "If the command name is found, but it is not an executable utility,
* the exit status shall be 126."), so allow either.
*/
- if (is_script)
+ if (is_script) {
+ ksft_print_msg("Invoke script via root_dfd and relative filename\n");
fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0,
127, 126);
- else
+ } else {
+ ksft_print_msg("Invoke exec via root_dfd and relative filename\n");
fail += check_execveat(root_dfd, longpath + 1, 0);
+ }
return fail;
}
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
index d487c2f6a61509..17e3207d34ae7e 100644
--- a/tools/testing/selftests/exec/load_address.c
+++ b/tools/testing/selftests/exec/load_address.c
@@ -5,6 +5,7 @@
#include <link.h>
#include <stdio.h>
#include <stdlib.h>
+#include "../kselftest.h"
struct Statistics {
unsigned long long load_address;
@@ -41,28 +42,23 @@ int main(int argc, char **argv)
unsigned long long misalign;
int ret;
+ ksft_print_header();
+ ksft_set_plan(1);
+
ret = dl_iterate_phdr(ExtractStatistics, &extracted);
- if (ret != 1) {
- fprintf(stderr, "FAILED\n");
- return 1;
- }
+ if (ret != 1)
+ ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n");
- if (extracted.alignment == 0) {
- fprintf(stderr, "No alignment found\n");
- return 1;
- } else if (extracted.alignment & (extracted.alignment - 1)) {
- fprintf(stderr, "Alignment is not a power of 2\n");
- return 1;
- }
+ if (extracted.alignment == 0)
+ ksft_exit_fail_msg("FAILED: No alignment found\n");
+ else if (extracted.alignment & (extracted.alignment - 1))
+ ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n");
misalign = extracted.load_address & (extracted.alignment - 1);
- if (misalign) {
- printf("alignment = %llu, load_address = %llu\n",
- extracted.alignment, extracted.load_address);
- fprintf(stderr, "FAILED\n");
- return 1;
- }
+ if (misalign)
+ ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n",
+ extracted.alignment, extracted.load_address);
- fprintf(stderr, "PASS\n");
- return 0;
+ ksft_test_result_pass("Completed\n");
+ ksft_finished();
}
diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c
index 2dbd5bc45b3ed0..b2f37d86a5f623 100644
--- a/tools/testing/selftests/exec/recursion-depth.c
+++ b/tools/testing/selftests/exec/recursion-depth.c
@@ -23,45 +23,44 @@
#include <fcntl.h>
#include <sys/mount.h>
#include <unistd.h>
+#include "../kselftest.h"
int main(void)
{
+ int fd, rv;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
if (unshare(CLONE_NEWNS) == -1) {
if (errno == ENOSYS || errno == EPERM) {
- fprintf(stderr, "error: unshare, errno %d\n", errno);
- return 4;
+ ksft_test_result_skip("error: unshare, errno %d\n", errno);
+ ksft_finished();
}
- fprintf(stderr, "error: unshare, errno %d\n", errno);
- return 1;
- }
- if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
- fprintf(stderr, "error: mount '/', errno %d\n", errno);
- return 1;
+ ksft_exit_fail_msg("error: unshare, errno %d\n", errno);
}
+
+ if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1)
+ ksft_exit_fail_msg("error: mount '/', errno %d\n", errno);
+
/* Require "exec" filesystem. */
- if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) {
- fprintf(stderr, "error: mount ramfs, errno %d\n", errno);
- return 1;
- }
+ if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1)
+ ksft_exit_fail_msg("error: mount ramfs, errno %d\n", errno);
#define FILENAME "/tmp/1"
- int fd = creat(FILENAME, 0700);
- if (fd == -1) {
- fprintf(stderr, "error: creat, errno %d\n", errno);
- return 1;
- }
+ fd = creat(FILENAME, 0700);
+ if (fd == -1)
+ ksft_exit_fail_msg("error: creat, errno %d\n", errno);
+
#define S "#!" FILENAME "\n"
- if (write(fd, S, strlen(S)) != strlen(S)) {
- fprintf(stderr, "error: write, errno %d\n", errno);
- return 1;
- }
+ if (write(fd, S, strlen(S)) != strlen(S))
+ ksft_exit_fail_msg("error: write, errno %d\n", errno);
+
close(fd);
- int rv = execve(FILENAME, NULL, NULL);
- if (rv == -1 && errno == ELOOP) {
- return 0;
- }
- fprintf(stderr, "error: execve, rv %d, errno %d\n", rv, errno);
- return 1;
+ rv = execve(FILENAME, NULL, NULL);
+ ksft_test_result(rv == -1 && errno == ELOOP,
+ "execve failed as expected (ret %d, errno %d)\n", rv, errno);
+ ksft_finished();
}
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 541bf192e30e6b..7b89362da4bfd3 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -51,6 +51,7 @@
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
+#include <sys/utsname.h>
#endif
#ifndef ARRAY_SIZE
@@ -79,6 +80,9 @@
#define KSFT_XPASS 3
#define KSFT_SKIP 4
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
#define __printf(a, b) __attribute__((format(printf, a, b)))
/* counters */
@@ -299,13 +303,13 @@ void ksft_test_result_code(int exit_code, const char *test_name,
va_end(args);
}
-static inline int ksft_exit_pass(void)
+static inline __noreturn int ksft_exit_pass(void)
{
ksft_print_cnts();
exit(KSFT_PASS);
}
-static inline int ksft_exit_fail(void)
+static inline __noreturn int ksft_exit_fail(void)
{
ksft_print_cnts();
exit(KSFT_FAIL);
@@ -332,7 +336,7 @@ static inline int ksft_exit_fail(void)
ksft_cnt.ksft_xfail + \
ksft_cnt.ksft_xskip)
-static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -347,19 +351,19 @@ static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
exit(KSFT_FAIL);
}
-static inline int ksft_exit_xfail(void)
+static inline __noreturn int ksft_exit_xfail(void)
{
ksft_print_cnts();
exit(KSFT_XFAIL);
}
-static inline int ksft_exit_xpass(void)
+static inline __noreturn int ksft_exit_xpass(void)
{
ksft_print_cnts();
exit(KSFT_XPASS);
}
-static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -388,4 +392,21 @@ static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
exit(KSFT_SKIP);
}
+static inline int ksft_min_kernel_version(unsigned int min_major,
+ unsigned int min_minor)
+{
+#ifdef NOLIBC
+ ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
+ return 0;
+#else
+ unsigned int major, minor;
+ struct utsname info;
+
+ if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+ ksft_exit_fail_msg("Can't parse kernel version\n");
+
+ return major > min_major || (major == min_major && minor >= min_minor);
+#endif
+}
+
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index ddba2c2fb5deb1..4eaba83cdcf3d2 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -135,8 +135,8 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
irq_iter = READ_ONCE(shared_data->nr_iter);
__GUEST_ASSERT(config_iter + 1 == irq_iter,
- "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n"
- " Guest timer interrupt was not trigged within the specified\n"
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
" interval, try to increase the error margin by [-e] option.\n",
config_iter + 1, irq_iter);
}
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 3bd03b088dda60..81ce37ec407dd1 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1037,8 +1037,19 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_property property,
uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_feature feature,
bool set);
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index e22848f747c015..0f9cabd99fd451 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -60,7 +60,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data)
irq_iter = READ_ONCE(shared_data->nr_iter);
__GUEST_ASSERT(config_iter + 1 == irq_iter,
"config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
- " Guest timer interrupt was not trigged within the specified\n"
+ " Guest timer interrupt was not triggered within the specified\n"
" interval, try to increase the error margin by [-e] option.\n",
config_iter + 1, irq_iter);
}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 9e2879af7c201f..40cc59f4e65013 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -133,6 +133,43 @@ static void enter_guest(struct kvm_vcpu *vcpu)
}
}
+static void test_pv_unhalt(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_cpuid_entry2 *ent;
+ u32 kvm_sig_old;
+
+ pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+ TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+
+ /* KVM_PV_UNHALT test */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+ /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ kvm_sig_old = ent->ebx;
+ ent->ebx = 0xdeadbeef;
+ vcpu_set_cpuid(vcpu);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ ent->ebx = kvm_sig_old;
+ vcpu_set_cpuid(vcpu);
+
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+
+ /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+ kvm_vm_free(vm);
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
@@ -151,4 +188,6 @@ int main(void)
enter_guest(vcpu);
kvm_vm_free(vm);
+
+ test_pv_unhalt();
}
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index cbe99594d319b4..18a49c70d4c635 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
ksft_print_header();
ksft_set_plan(nthreads);
- filed = open(file, O_RDWR|O_CREAT);
+ filed = open(file, O_RDWR|O_CREAT, 0664);
if (filed < 0)
ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno));
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index f822ae31af22e2..374a308174d2ba 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -1745,9 +1745,12 @@ void pkey_setup_shadow(void)
shadow_pkey_reg = __read_pkey_reg();
}
+pid_t parent_pid;
+
void restore_settings_atexit(void)
{
- cat_into_file(buf, "/proc/sys/vm/nr_hugepages");
+ if (parent_pid == getpid())
+ cat_into_file(buf, "/proc/sys/vm/nr_hugepages");
}
void save_settings(void)
@@ -1773,6 +1776,7 @@ void save_settings(void)
exit(__LINE__);
}
+ parent_pid = getpid();
atexit(restore_settings_atexit);
close(fd);
}
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index cc5f144430d4d2..7dbfa53d93a05f 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -137,7 +137,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
if (!map)
ksft_exit_fail_msg("anon mmap failed\n");
} else {
- test_fd = open(fname, O_RDWR | O_CREAT);
+ test_fd = open(fname, O_RDWR | O_CREAT, 0664);
if (test_fd < 0) {
ksft_test_result_skip("Test %s open() file failed\n", __func__);
return;
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 856662d2f87a1b..6c988bd2f33567 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -223,7 +223,7 @@ void split_file_backed_thp(void)
ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n");
}
- fd = open(testfile, O_CREAT|O_WRONLY);
+ fd = open(testfile, O_CREAT|O_WRONLY, 0664);
if (fd == -1) {
ksft_perror("Cannot open testing file");
goto cleanup;
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index b0ac0ec2356d65..7ad6ba660c7d6f 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -18,6 +18,7 @@ bool test_uffdio_wp = true;
unsigned long long *count_verify;
uffd_test_ops_t *uffd_test_ops;
uffd_test_case_ops_t *uffd_test_case_ops;
+atomic_bool ready_for_fork;
static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
{
@@ -518,6 +519,8 @@ void *uffd_poll_thread(void *arg)
pollfd[1].fd = pipefd[cpu*2];
pollfd[1].events = POLLIN;
+ ready_for_fork = true;
+
for (;;) {
ret = poll(pollfd, 2, -1);
if (ret <= 0) {
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index cb055282c89c96..cc5629c3d2aa10 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -32,6 +32,7 @@
#include <inttypes.h>
#include <stdint.h>
#include <sys/random.h>
+#include <stdatomic.h>
#include "../kselftest.h"
#include "vm_util.h"
@@ -103,6 +104,7 @@ extern bool map_shared;
extern bool test_uffdio_wp;
extern unsigned long long *count_verify;
extern volatile bool test_uffdio_copy_eexist;
+extern atomic_bool ready_for_fork;
extern uffd_test_ops_t anon_uffd_test_ops;
extern uffd_test_ops_t shmem_uffd_test_ops;
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 2b9f8cc52639d1..21ec23206ab44a 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -775,6 +775,8 @@ static void uffd_sigbus_test_common(bool wp)
char c;
struct uffd_args args = { 0 };
+ ready_for_fork = false;
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
if (uffd_register(uffd, area_dst, nr_pages * page_size,
@@ -790,6 +792,9 @@ static void uffd_sigbus_test_common(bool wp)
if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
err("uffd_poll_thread create");
+ while (!ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+
pid = fork();
if (pid < 0)
err("fork");
@@ -829,6 +834,8 @@ static void uffd_events_test_common(bool wp)
char c;
struct uffd_args args = { 0 };
+ ready_for_fork = false;
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
if (uffd_register(uffd, area_dst, nr_pages * page_size,
true, wp, false))
@@ -838,6 +845,9 @@ static void uffd_events_test_common(bool wp)
if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
err("uffd_poll_thread create");
+ while (!ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+
pid = fork();
if (pid < 0)
err("fork");
@@ -1427,7 +1437,8 @@ uffd_test_case_t uffd_tests[] = {
.uffd_fn = uffd_sigbus_wp_test,
.mem_targets = MEM_ALL,
.uffd_feature_required = UFFD_FEATURE_SIGBUS |
- UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP,
+ UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
},
{
.name = "events",
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index c02990bbd56f4c..9007c420d52c52 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -3,7 +3,7 @@
#include <stdbool.h>
#include <sys/mman.h>
#include <err.h>
-#include <string.h> /* ffsl() */
+#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
#define BIT_ULL(nr) (1ULL << (nr))
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index a2662348cdb1a2..b7b54d646b937c 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -6,7 +6,9 @@
#include "../kselftest_harness.h"
-struct in6_addr in6addr_v4mapped_any = {
+static const __u32 in4addr_any = INADDR_ANY;
+static const __u32 in4addr_loopback = INADDR_LOOPBACK;
+static const struct in6_addr in6addr_v4mapped_any = {
.s6_addr = {
0, 0, 0, 0,
0, 0, 0, 0,
@@ -14,8 +16,7 @@ struct in6_addr in6addr_v4mapped_any = {
0, 0, 0, 0
}
};
-
-struct in6_addr in6addr_v4mapped_loopback = {
+static const struct in6_addr in6addr_v4mapped_loopback = {
.s6_addr = {
0, 0, 0, 0,
0, 0, 0, 0,
@@ -24,137 +25,785 @@ struct in6_addr in6addr_v4mapped_loopback = {
}
};
+#define NR_SOCKETS 8
+
FIXTURE(bind_wildcard)
{
- struct sockaddr_in addr4;
- struct sockaddr_in6 addr6;
+ int fd[NR_SOCKETS];
+ socklen_t addrlen[NR_SOCKETS];
+ union {
+ struct sockaddr addr;
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ } addr[NR_SOCKETS];
};
FIXTURE_VARIANT(bind_wildcard)
{
- const __u32 addr4_const;
- const struct in6_addr *addr6_const;
- int expected_errno;
+ sa_family_t family[2];
+ const void *addr[2];
+ bool ipv6_only[2];
+
+ /* 6 bind() calls below follow two bind() for the defined 2 addresses:
+ *
+ * 0.0.0.0
+ * 127.0.0.1
+ * ::
+ * ::1
+ * ::ffff:0.0.0.0
+ * ::ffff:127.0.0.1
+ */
+ int expected_errno[NR_SOCKETS];
+ int expected_reuse_errno[NR_SOCKETS];
+};
+
+/* (IPv4, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local)
+{
+ .family = {AF_INET, AF_INET},
+ .addr = {&in4addr_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any)
+{
+ .family = {AF_INET, AF_INET},
+ .addr = {&in4addr_loopback, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
+/* (IPv4, IPv6) */
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only)
+{
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_loopback,
- .expected_errno = 0,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_v4mapped_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_v4mapped_loopback,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only)
+{
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_loopback,
- .expected_errno = 0,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_v4mapped_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_v4mapped_loopback,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_loopback, &in4addr_any},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_loopback, &in4addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_any, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_loopback, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv6) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {true, true},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i,
+ int family, const void *addr_const)
+{
+ if (family == AF_INET) {
+ struct sockaddr_in *addr4 = &self->addr[i].addr4;
+ const __u32 *addr4_const = addr_const;
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(0);
+ addr4->sin_addr.s_addr = htonl(*addr4_const);
+
+ self->addrlen[i] = sizeof(struct sockaddr_in);
+ } else {
+ struct sockaddr_in6 *addr6 = &self->addr[i].addr6;
+ const struct in6_addr *addr6_const = addr_const;
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(0);
+ addr6->sin6_addr = *addr6_const;
+
+ self->addrlen[i] = sizeof(struct sockaddr_in6);
+ }
+}
+
FIXTURE_SETUP(bind_wildcard)
{
- self->addr4.sin_family = AF_INET;
- self->addr4.sin_port = htons(0);
- self->addr4.sin_addr.s_addr = htonl(variant->addr4_const);
+ setup_addr(self, 0, variant->family[0], variant->addr[0]);
+ setup_addr(self, 1, variant->family[1], variant->addr[1]);
+
+ setup_addr(self, 2, AF_INET, &in4addr_any);
+ setup_addr(self, 3, AF_INET, &in4addr_loopback);
- self->addr6.sin6_family = AF_INET6;
- self->addr6.sin6_port = htons(0);
- self->addr6.sin6_addr = *variant->addr6_const;
+ setup_addr(self, 4, AF_INET6, &in6addr_any);
+ setup_addr(self, 5, AF_INET6, &in6addr_loopback);
+ setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any);
+ setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback);
}
FIXTURE_TEARDOWN(bind_wildcard)
{
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ close(self->fd[i]);
}
-void bind_sockets(struct __test_metadata *_metadata,
- FIXTURE_DATA(bind_wildcard) *self,
- int expected_errno,
- struct sockaddr *addr1, socklen_t addrlen1,
- struct sockaddr *addr2, socklen_t addrlen2)
+void bind_socket(struct __test_metadata *_metadata,
+ FIXTURE_DATA(bind_wildcard) *self,
+ const FIXTURE_VARIANT(bind_wildcard) *variant,
+ int i, int reuse)
{
- int fd[2];
int ret;
- fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0);
- ASSERT_GT(fd[0], 0);
+ self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0);
+ ASSERT_GT(self->fd[i], 0);
- ret = bind(fd[0], addr1, addrlen1);
- ASSERT_EQ(ret, 0);
+ if (i < 2 && variant->ipv6_only[i]) {
+ ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+ }
- ret = getsockname(fd[0], addr1, &addrlen1);
- ASSERT_EQ(ret, 0);
+ if (i < 2 && reuse) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+ }
- ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port;
+ self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port;
- fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0);
- ASSERT_GT(fd[1], 0);
+ ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]);
- ret = bind(fd[1], addr2, addrlen2);
- if (expected_errno) {
- ASSERT_EQ(ret, -1);
- ASSERT_EQ(errno, expected_errno);
+ if (reuse) {
+ if (variant->expected_reuse_errno[i]) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, variant->expected_reuse_errno[i]);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
} else {
+ if (variant->expected_errno[i]) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, variant->expected_errno[i]);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
+ }
+
+ if (i == 0) {
+ ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]);
ASSERT_EQ(ret, 0);
}
+}
- close(fd[1]);
- close(fd[0]);
+TEST_F(bind_wildcard, plain)
+{
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, 0);
}
-TEST_F(bind_wildcard, v4_v6)
+TEST_F(bind_wildcard, reuseaddr)
{
- bind_sockets(_metadata, self, variant->expected_errno,
- (struct sockaddr *)&self->addr4, sizeof(self->addr4),
- (struct sockaddr *)&self->addr6, sizeof(self->addr6));
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, SO_REUSEADDR);
}
-TEST_F(bind_wildcard, v6_v4)
+TEST_F(bind_wildcard, reuseport)
{
- bind_sockets(_metadata, self, variant->expected_errno,
- (struct sockaddr *)&self->addr6, sizeof(self->addr6),
- (struct sockaddr *)&self->addr4, sizeof(self->addr4));
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, SO_REUSEPORT);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 4c424855482629..4131f3263a4826 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -383,12 +383,14 @@ do_transfer()
local stat_cookierx_last
local stat_csum_err_s
local stat_csum_err_c
+ local stat_tcpfb_last_l
stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -457,11 +459,13 @@ do_transfer()
local stat_cookietx_now
local stat_cookierx_now
local stat_ooo_now
+ local stat_tcpfb_now_l
stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -508,6 +512,11 @@ do_transfer()
fi
fi
+ if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ mptcp_lib_pr_fail "unexpected fallback to TCP"
+ rets=1
+ fi
+
if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
extra+=" WARN: CookieSent: did not advance"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 5e9211e8982568..e4403236f65548 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -729,7 +729,7 @@ pm_nl_check_endpoint()
[ -n "$_flags" ]; flags="flags $_flags"
shift
elif [ $1 = "dev" ]; then
- [ -n "$2" ]; dev="dev $1"
+ [ -n "$2" ]; dev="dev $2"
shift
elif [ $1 = "id" ]; then
_id=$2
@@ -3610,6 +3610,8 @@ endpoint_tests()
local tests_pid=$!
wait_mpj $ns2
+ pm_nl_check_endpoint "creation" \
+ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
chk_subflow_nr "before delete" 2
chk_mptcp_info subflows 1 subflows 1
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
index 7c5b12664b03b0..bfb07dc495186d 100644
--- a/tools/testing/selftests/net/reuseaddr_conflict.c
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -109,6 +109,6 @@ int main(void)
fd1 = open_port(0, 1);
if (fd1 >= 0)
error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
- fprintf(stderr, "Success");
+ fprintf(stderr, "Success\n");
return 0;
}
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 74ff9fb2a6f0e1..58da5de99ac451 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -1177,6 +1177,7 @@ encap_params_common()
local plen=$1; shift
local enc_ethtype=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -1195,11 +1196,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - no match"
@@ -1212,20 +1213,20 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - no match"
run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - match"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - no match"
@@ -1238,11 +1239,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - no match"
@@ -1250,11 +1251,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - no match"
@@ -1272,6 +1273,7 @@ encap_params_ipv4_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1279,7 +1281,7 @@ encap_params_ipv4_ipv4()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}
encap_params_ipv6_ipv4()
@@ -1291,6 +1293,7 @@ encap_params_ipv6_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1298,7 +1301,7 @@ encap_params_ipv6_ipv4()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}
encap_params_ipv4_ipv6()
@@ -1310,6 +1313,7 @@ encap_params_ipv4_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1317,7 +1321,7 @@ encap_params_ipv4_ipv6()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}
encap_params_ipv6_ipv6()
@@ -1329,6 +1333,7 @@ encap_params_ipv6_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1336,7 +1341,7 @@ encap_params_ipv6_ipv6()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}
starg_exclude_ir_common()
@@ -1347,6 +1352,7 @@ starg_exclude_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1368,14 +1374,14 @@ starg_exclude_ir_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"
# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1385,14 +1391,14 @@ starg_exclude_ir_common()
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"
# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1407,6 +1413,7 @@ starg_exclude_ir_ipv4_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1415,7 +1422,7 @@ starg_exclude_ir_ipv4_ipv4()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_exclude_ir_ipv6_ipv4()
@@ -1426,6 +1433,7 @@ starg_exclude_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1434,7 +1442,7 @@ starg_exclude_ir_ipv6_ipv4()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_exclude_ir_ipv4_ipv6()
@@ -1445,6 +1453,7 @@ starg_exclude_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1453,7 +1462,7 @@ starg_exclude_ir_ipv4_ipv6()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_exclude_ir_ipv6_ipv6()
@@ -1464,6 +1473,7 @@ starg_exclude_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1472,7 +1482,7 @@ starg_exclude_ir_ipv6_ipv6()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_include_ir_common()
@@ -1483,6 +1493,7 @@ starg_include_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1504,14 +1515,14 @@ starg_include_ir_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"
# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1521,14 +1532,14 @@ starg_include_ir_common()
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"
# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1543,6 +1554,7 @@ starg_include_ir_ipv4_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1551,7 +1563,7 @@ starg_include_ir_ipv4_ipv4()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_include_ir_ipv6_ipv4()
@@ -1562,6 +1574,7 @@ starg_include_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1570,7 +1583,7 @@ starg_include_ir_ipv6_ipv4()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_include_ir_ipv4_ipv6()
@@ -1581,6 +1594,7 @@ starg_include_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1589,7 +1603,7 @@ starg_include_ir_ipv4_ipv6()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_include_ir_ipv6_ipv6()
@@ -1600,6 +1614,7 @@ starg_include_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1608,7 +1623,7 @@ starg_include_ir_ipv6_ipv6()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_exclude_p2mp_common()
@@ -1618,6 +1633,7 @@ starg_exclude_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1635,12 +1651,12 @@ starg_exclude_p2mp_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"
# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"
@@ -1648,7 +1664,7 @@ starg_exclude_p2mp_common()
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
@@ -1660,6 +1676,7 @@ starg_exclude_p2mp_ipv4_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1667,7 +1684,7 @@ starg_exclude_p2mp_ipv4_ipv4()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1678,6 +1695,7 @@ starg_exclude_p2mp_ipv6_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1685,7 +1703,7 @@ starg_exclude_p2mp_ipv6_ipv4()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1696,6 +1714,7 @@ starg_exclude_p2mp_ipv4_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1703,7 +1722,7 @@ starg_exclude_p2mp_ipv4_ipv6()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1714,6 +1733,7 @@ starg_exclude_p2mp_ipv6_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1721,7 +1741,7 @@ starg_exclude_p2mp_ipv6_ipv6()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1732,6 +1752,7 @@ starg_include_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1749,12 +1770,12 @@ starg_include_p2mp_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"
# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"
@@ -1762,7 +1783,7 @@ starg_include_p2mp_common()
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
@@ -1774,6 +1795,7 @@ starg_include_p2mp_ipv4_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1781,7 +1803,7 @@ starg_include_p2mp_ipv4_ipv4()
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1792,6 +1814,7 @@ starg_include_p2mp_ipv6_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1799,7 +1822,7 @@ starg_include_p2mp_ipv6_ipv4()
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1810,6 +1833,7 @@ starg_include_p2mp_ipv4_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1817,7 +1841,7 @@ starg_include_p2mp_ipv4_ipv6()
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1828,6 +1852,7 @@ starg_include_p2mp_ipv6_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1835,7 +1860,7 @@ starg_include_p2mp_ipv6_ipv6()
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1847,6 +1872,7 @@ egress_vni_translation_common()
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -1882,20 +1908,20 @@ egress_vni_translation_common()
# Make sure that packets sent from the first VTEP over VLAN 10 are
# received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
# the second VTEP, since it is configured as PVID.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - PVID configured"
# Remove PVID flag from VLAN 4000 on the second VTEP and make sure
# packets are no longer received by the SVI interface.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - no PVID configured"
# Reconfigure the PVID and make sure packets are received again.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
log_test $? 0 "Egress VNI translation - PVID reconfigured"
}
@@ -1908,6 +1934,7 @@ egress_vni_translation_ipv4_ipv4()
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1915,7 +1942,7 @@ egress_vni_translation_ipv4_ipv4()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}
egress_vni_translation_ipv6_ipv4()
@@ -1926,6 +1953,7 @@ egress_vni_translation_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1933,7 +1961,7 @@ egress_vni_translation_ipv6_ipv4()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}
egress_vni_translation_ipv4_ipv6()
@@ -1944,6 +1972,7 @@ egress_vni_translation_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1951,7 +1980,7 @@ egress_vni_translation_ipv4_ipv6()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}
egress_vni_translation_ipv6_ipv6()
@@ -1962,6 +1991,7 @@ egress_vni_translation_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1969,7 +1999,7 @@ egress_vni_translation_ipv6_ipv6()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}
all_zeros_mdb_common()
@@ -1982,12 +2012,18 @@ all_zeros_mdb_common()
local vtep4_ip=$1; shift
local plen=$1; shift
local ipv4_grp=239.1.1.1
+ local ipv4_grp_dmac=01:00:5e:01:01:01
local ipv4_unreg_grp=239.2.2.2
+ local ipv4_unreg_grp_dmac=01:00:5e:02:02:02
local ipv4_ll_grp=224.0.0.100
+ local ipv4_ll_grp_dmac=01:00:5e:00:00:64
local ipv4_src=192.0.2.129
local ipv6_grp=ff0e::1
+ local ipv6_grp_dmac=33:33:00:00:00:01
local ipv6_unreg_grp=ff0e::2
+ local ipv6_unreg_grp_dmac=33:33:00:00:00:02
local ipv6_ll_grp=ff02::1
+ local ipv6_ll_grp_dmac=33:33:00:00:00:01
local ipv6_src=2001:db8:100::1
# Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
@@ -2023,7 +2059,7 @@ all_zeros_mdb_common()
# Send registered IPv4 multicast and make sure it only arrives to the
# first VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Registered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
@@ -2031,7 +2067,7 @@ all_zeros_mdb_common()
# Send unregistered IPv4 multicast that is not link-local and make sure
# it arrives to the first and second VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -2039,7 +2075,7 @@ all_zeros_mdb_common()
# Send IPv4 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Link-local IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -2074,7 +2110,7 @@ all_zeros_mdb_common()
# Send registered IPv6 multicast and make sure it only arrives to the
# third VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 1
log_test $? 0 "Registered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 0
@@ -2082,7 +2118,7 @@ all_zeros_mdb_common()
# Send unregistered IPv6 multicast that is not link-local and make sure
# it arrives to the third and fourth VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
@@ -2090,7 +2126,7 @@ all_zeros_mdb_common()
# Send IPv6 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Link-local IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
@@ -2165,6 +2201,7 @@ mdb_fdb_common()
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -2188,7 +2225,7 @@ mdb_fdb_common()
# Send IP multicast traffic and make sure it is forwarded by the MDB
# and only arrives to the first VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
@@ -2205,7 +2242,7 @@ mdb_fdb_common()
# Remove the MDB entry and make sure that IP multicast is now forwarded
# by the FDB to the second VTEP.
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 2
@@ -2221,14 +2258,15 @@ mdb_fdb_ipv4_ipv4()
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}
mdb_fdb_ipv6_ipv4()
@@ -2240,14 +2278,15 @@ mdb_fdb_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}
mdb_fdb_ipv4_ipv6()
@@ -2259,14 +2298,15 @@ mdb_fdb_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}
mdb_fdb_ipv6_ipv6()
@@ -2278,14 +2318,15 @@ mdb_fdb_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}
mdb_grp1_loop()
@@ -2320,7 +2361,9 @@ mdb_torture_common()
local vtep1_ip=$1; shift
local vtep2_ip=$1; shift
local grp1=$1; shift
+ local grp1_dmac=$1; shift
local grp2=$1; shift
+ local grp2_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
local pid1
@@ -2345,9 +2388,9 @@ mdb_torture_common()
pid1=$!
mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
pid2=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid3=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid4=$!
sleep 30
@@ -2363,15 +2406,17 @@ mdb_torture_ipv4_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129
echo
echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}
mdb_torture_ipv6_ipv4()
@@ -2380,15 +2425,17 @@ mdb_torture_ipv6_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1
echo
echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}
mdb_torture_ipv4_ipv6()
@@ -2397,15 +2444,17 @@ mdb_torture_ipv4_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129
echo
echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}
mdb_torture_ipv6_ipv6()
@@ -2414,15 +2463,17 @@ mdb_torture_ipv6_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1
echo
echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}
################################################################################
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index c6eda21cefb6b8..f27a12d2a2c997 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -1615,6 +1615,40 @@ TEST_F(tls, getsockopt)
EXPECT_EQ(errno, EINVAL);
}
+TEST_F(tls, recv_efault)
+{
+ char *rec1 = "1111111111";
+ char *rec2 = "2222222222";
+ struct msghdr hdr = {};
+ struct iovec iov[2];
+ char recv_mem[12];
+ int ret;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ EXPECT_EQ(send(self->fd, rec1, 10, 0), 10);
+ EXPECT_EQ(send(self->fd, rec2, 10, 0), 10);
+
+ iov[0].iov_base = recv_mem;
+ iov[0].iov_len = sizeof(recv_mem);
+ iov[1].iov_base = NULL; /* broken iov to make process_rx_list fail */
+ iov[1].iov_len = 1;
+
+ hdr.msg_iovlen = 2;
+ hdr.msg_iov = iov;
+
+ EXPECT_EQ(recv(self->cfd, recv_mem, 1, 0), 1);
+ EXPECT_EQ(recv_mem[0], rec1[0]);
+
+ ret = recvmsg(self->cfd, &hdr, 0);
+ EXPECT_LE(ret, sizeof(recv_mem));
+ EXPECT_GE(ret, 9);
+ EXPECT_EQ(memcmp(rec1, recv_mem, 9), 0);
+ if (ret > 9)
+ EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0);
+}
+
FIXTURE(tls_err)
{
int fd, cfd;
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 380cb15e942e42..83ed987cff340e 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -244,7 +244,7 @@ for family in 4 6; do
create_vxlan_pair
ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
- run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
cleanup
# use NAT to circumvent GRO FWD check
@@ -258,13 +258,7 @@ for family in 4 6; do
# load arp cache before running the test to reduce the amount of
# stray traffic on top of the UDP tunnel
ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
- run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
- cleanup
-
- create_vxlan_pair
- run_bench "UDP tunnel fwd perf" $OL_NET$DST
- ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
- run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
cleanup
done
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index d49dd3ffd0d96a..c001dd79179d5d 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end)
diff = end.tv_usec - start.tv_usec;
diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
- if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
printf("Diff too high: %lld..", diff);
return -1;
}
@@ -184,80 +184,71 @@ static int check_timer_create(int which)
return 0;
}
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
{
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
- return NULL;
+ if (pthread_self() != ctd_thread)
+ ctd_failed = 1;
+ ctd_count--;
}
-static void distribution_handler(int nr)
+static void *ctd_thread_func(void *arg)
{
- if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
- __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
-}
-
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
-{
- int err, i;
- timer_t id;
- const int nthreads = 10;
- pthread_t threads[nthreads];
struct itimerspec val = {
.it_value.tv_sec = 0,
.it_value.tv_nsec = 1000 * 1000,
.it_interval.tv_sec = 0,
.it_interval.tv_nsec = 1000 * 1000,
};
+ timer_t id;
- remain = nthreads + 1; /* worker threads + this thread */
- signal(SIGALRM, distribution_handler);
- err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
- if (err < 0) {
- ksft_perror("Can't create timer");
- return -1;
- }
- err = timer_settime(id, 0, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ /* 1/10 seconds to ensure the leader sleeps */
+ usleep(10000);
- for (i = 0; i < nthreads; i++) {
- err = pthread_create(&threads[i], NULL, distribution_thread,
- NULL);
- if (err) {
- ksft_print_msg("Can't create thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ ctd_count = 100;
+ if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+ return "Can't create timer\n";
+ if (timer_settime(id, 0, &val, NULL))
+ return "Can't set timer\n";
- /* Wait for all threads to receive the signal. */
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+ while (ctd_count > 0 && !ctd_failed)
+ ;
- for (i = 0; i < nthreads; i++) {
- err = pthread_join(threads[i], NULL);
- if (err) {
- ksft_print_msg("Can't join thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ if (timer_delete(id))
+ return "Can't delete timer\n";
- if (timer_delete(id)) {
- ksft_perror("Can't delete timer");
- return -1;
- }
+ return NULL;
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+ const char *errmsg;
- ksft_test_result_pass("check_timer_distribution\n");
+ signal(SIGALRM, ctd_sighandler);
+
+ errmsg = "Can't create thread\n";
+ if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+ goto err;
+
+ errmsg = "Can't join thread\n";
+ if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+ goto err;
+
+ if (!ctd_failed)
+ ksft_test_result_pass("check signal distribution\n");
+ else if (ksft_min_kernel_version(6, 3))
+ ksft_test_result_fail("check signal distribution\n");
+ else
+ ksft_test_result_skip("check signal distribution (old kernel)\n");
return 0;
+err:
+ ksft_print_msg("%s", errmsg);
+ return -1;
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 48b9a803235a80..d13ebde203221a 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -21,9 +21,6 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
-
-
-
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -62,45 +59,47 @@ int clear_time_state(void)
#define NUM_FREQ_OUTOFRANGE 4
#define NUM_FREQ_INVALID 2
+#define SHIFTED_PPM (1 << 16)
+
long valid_freq[NUM_FREQ_VALID] = {
- -499<<16,
- -450<<16,
- -400<<16,
- -350<<16,
- -300<<16,
- -250<<16,
- -200<<16,
- -150<<16,
- -100<<16,
- -75<<16,
- -50<<16,
- -25<<16,
- -10<<16,
- -5<<16,
- -1<<16,
+ -499 * SHIFTED_PPM,
+ -450 * SHIFTED_PPM,
+ -400 * SHIFTED_PPM,
+ -350 * SHIFTED_PPM,
+ -300 * SHIFTED_PPM,
+ -250 * SHIFTED_PPM,
+ -200 * SHIFTED_PPM,
+ -150 * SHIFTED_PPM,
+ -100 * SHIFTED_PPM,
+ -75 * SHIFTED_PPM,
+ -50 * SHIFTED_PPM,
+ -25 * SHIFTED_PPM,
+ -10 * SHIFTED_PPM,
+ -5 * SHIFTED_PPM,
+ -1 * SHIFTED_PPM,
-1000,
- 1<<16,
- 5<<16,
- 10<<16,
- 25<<16,
- 50<<16,
- 75<<16,
- 100<<16,
- 150<<16,
- 200<<16,
- 250<<16,
- 300<<16,
- 350<<16,
- 400<<16,
- 450<<16,
- 499<<16,
+ 1 * SHIFTED_PPM,
+ 5 * SHIFTED_PPM,
+ 10 * SHIFTED_PPM,
+ 25 * SHIFTED_PPM,
+ 50 * SHIFTED_PPM,
+ 75 * SHIFTED_PPM,
+ 100 * SHIFTED_PPM,
+ 150 * SHIFTED_PPM,
+ 200 * SHIFTED_PPM,
+ 250 * SHIFTED_PPM,
+ 300 * SHIFTED_PPM,
+ 350 * SHIFTED_PPM,
+ 400 * SHIFTED_PPM,
+ 450 * SHIFTED_PPM,
+ 499 * SHIFTED_PPM,
};
long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
- -1000<<16,
- -550<<16,
- 550<<16,
- 1000<<16,
+ -1000 * SHIFTED_PPM,
+ -550 * SHIFTED_PPM,
+ 550 * SHIFTED_PPM,
+ 1000 * SHIFTED_PPM,
};
#define LONG_MAX (~0UL>>1)
diff --git a/tools/testing/selftests/turbostat/defcolumns.py b/tools/testing/selftests/turbostat/defcolumns.py
new file mode 100755
index 00000000000000..d9b042097da7ad
--- /dev/null
+++ b/tools/testing/selftests/turbostat/defcolumns.py
@@ -0,0 +1,60 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+
+turbostat = which('turbostat')
+if turbostat is None:
+ print('Could not find turbostat binary')
+ exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+ print('Could not find timeout binary')
+ exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+#
+# By default --list reports also "usec" and "Time_Of_Day_Seconds" columns
+# which are only visible when running with --debug.
+#
+expected_columns_debug = proc_turbostat.stdout.replace(b',', b'\t').strip()
+expected_columns = expected_columns_debug.replace(b'usec\t', b'').replace(b'Time_Of_Day_Seconds\t', b'').replace(b'X2APIC\t', b'').replace(b'APIC\t', b'')
+
+#
+# Run turbostat with no options for 10 seconds and send SIGINT
+#
+timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '1s']
+turbostat_argv = [turbostat, '-i', '0.250']
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+ exit(1)
+print('OK')
+
+#
+# Same, but with --debug
+#
+turbostat_argv.append('--debug')
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns_debug != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}')
+ exit(1)
+print('OK')