aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2023-10-13 20:10:33 -0700
committerTony Luck <tony.luck@intel.com>2023-10-16 10:07:50 -0700
commit8de349ad7259c6a1a5a4f4999f4afa6ff097b171 (patch)
tree35c74235ce9fae742cabd1935c76c442f9d1e178
parent77fcf12d16c55254e518923342c0fa7ecead3361 (diff)
downloadlinux-resctrl2_patches_v6.6-rc5.tar.gz
resctrl2: Intel Sub-NUMA Cluster (SNC) support.resctrl2_patches_v6.6-rc5
Detect SNC mode and when enabled make adjustments: 1) Monitor functions are per-node instead of per-L3 cache 2) RMID counters divided up by nodes, adjust count available 3) Reading RMIDs requires a per-node offset loaded into QM_EVTSEL MSR 4) Adjust upscale factor for values from QM_CTR 5) Effective sizes of L3 caches are divided by SNC nodes 6) Allow scoping of domains by "NODE". 7) Need to update MSR_RMID_SNC_CONFIG to reconfigure RMIDs Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/resctrl.h1
-rw-r--r--fs/resctrl2/arch/x86/rdt_llc_occupancy.c3
-rw-r--r--fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c3
-rw-r--r--fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c3
-rw-r--r--fs/resctrl2/arch/x86/rdt_monitor.c105
-rw-r--r--fs/resctrl2/domain.c1
-rw-r--r--fs/resctrl2/size.c11
-rw-r--r--include/linux/resctrl.h1
9 files changed, 118 insertions, 11 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 7aec906c4a528c..b5ae4a3421aa94 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1101,6 +1101,7 @@
#define MSR_IA32_QM_CTR 0xc8e
#define MSR_IA32_PQR_ASSOC 0xc8f
#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_RMID_SNC_CONFIG 0xca0
#define MSR_IA32_L2_CBM_BASE 0xd10
#define MSR_IA32_MBA_THRTL_BASE 0xd50
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 1bf79a583ec6eb..3eba7d88d10d37 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -101,6 +101,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c);
#include <linux/bitfield.h>
extern resctrl_ids_t arch_resctrl_default_ids;
+extern int arch_snc_nodes_per_l3_cache;
int arch_init_alloc_ids(struct resctrl_resource *r);
void arch_reset_alloc_ids(void);
diff --git a/fs/resctrl2/arch/x86/rdt_llc_occupancy.c b/fs/resctrl2/arch/x86/rdt_llc_occupancy.c
index 3087248b43c1a9..231643772e7cae 100644
--- a/fs/resctrl2/arch/x86/rdt_llc_occupancy.c
+++ b/fs/resctrl2/arch/x86/rdt_llc_occupancy.c
@@ -47,6 +47,9 @@ static int rdt_monitor_init(void)
if (!boot_cpu_has(X86_FEATURE_CQM) || !x86_match_cpu(mon_feature))
return -ENODEV;
+ if (arch_snc_nodes_per_l3_cache > 1)
+ mon.scope = RESCTRL_NODE;
+
resctrl_register_resource(&mon);
return 0;
diff --git a/fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c b/fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c
index f19cc7beb8711f..fda31b0ec8404a 100644
--- a/fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c
+++ b/fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c
@@ -47,6 +47,9 @@ static int rdt_monitor_init(void)
if (!boot_cpu_has(X86_FEATURE_CQM) || !x86_match_cpu(mon_feature))
return -ENODEV;
+ if (arch_snc_nodes_per_l3_cache > 1)
+ mon.scope = RESCTRL_NODE;
+
resctrl_register_resource(&mon);
return 0;
diff --git a/fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c b/fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c
index 898a2a32a58c2e..210a66c0aef05d 100644
--- a/fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c
+++ b/fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c
@@ -47,6 +47,9 @@ static int rdt_monitor_init(void)
if (!boot_cpu_has(X86_FEATURE_CQM) || !x86_match_cpu(mon_feature))
return -ENODEV;
+ if (arch_snc_nodes_per_l3_cache > 1)
+ mon.scope = RESCTRL_NODE;
+
resctrl_register_resource(&mon);
return 0;
diff --git a/fs/resctrl2/arch/x86/rdt_monitor.c b/fs/resctrl2/arch/x86/rdt_monitor.c
index 36e375673a0084..e3411bc1909066 100644
--- a/fs/resctrl2/arch/x86/rdt_monitor.c
+++ b/fs/resctrl2/arch/x86/rdt_monitor.c
@@ -1,15 +1,23 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2023 Intel Corporation. */
-#include <asm/cpufeatures.h>
+#include <linux/cacheinfo.h>
#include <linux/kthread.h>
#include <linux/delay.h>
+#include <linux/mod_devicetable.h>
+
+#include <asm/cpufeatures.h>
+#include <asm/cpu_device_id.h>
#include "../../internal.h"
#include "rdt.h"
#define MBM_POLL_DELAY 1000 // milliseconds
+
+int arch_snc_nodes_per_l3_cache = 1;
+EXPORT_SYMBOL_GPL(arch_snc_nodes_per_l3_cache);
+
static int max_threshold_occupancy;
static int mbm_width = 24;
static char *mon_features;
@@ -114,6 +122,17 @@ struct rrmid_info {
u64 chunks;
};
+static u64 snc_adjust_rmid(u64 rmid)
+{
+ if (arch_snc_nodes_per_l3_cache > 1) {
+ int node = cpu_to_node(raw_smp_processor_id());
+
+ rmid += (node % arch_snc_nodes_per_l3_cache) * num_rmids;
+ }
+
+ return rmid;
+}
+
static void __rdt_rmid_read(void *info)
{
struct rrmid_info *rr = info;
@@ -124,7 +143,7 @@ static void __rdt_rmid_read(void *info)
m = rr->domain;
if (rr->event <= EV_LOC) {
- wrmsrl(MSR_IA32_QM_EVTSEL, (rr->rmid << 32) | rr->event);
+ wrmsrl(MSR_IA32_QM_EVTSEL, (snc_adjust_rmid(rr->rmid) << 32) | rr->event);
rdmsrl(MSR_IA32_QM_CTR, chunks);
} else {
chunks = 0;
@@ -138,7 +157,7 @@ static void __rdt_rmid_read(void *info)
u64 crmid = cr - rmid_array;
if (rr->event <= EV_LOC) {
- wrmsrl(MSR_IA32_QM_EVTSEL, (crmid << 32) | rr->event);
+ wrmsrl(MSR_IA32_QM_EVTSEL, (snc_adjust_rmid(crmid) << 32) | rr->event);
rdmsrl(MSR_IA32_QM_CTR, chunks);
} else {
chunks = 0;
@@ -190,7 +209,7 @@ static void update_rmids(void *info)
s = &ri->mydomain->state[rmid].state[0];
else
s = &ri->mydomain->state[rmid].state[1];
- wrmsrl(MSR_IA32_QM_EVTSEL, (rmid << 32) | event);
+ wrmsrl(MSR_IA32_QM_EVTSEL, (snc_adjust_rmid(rmid) << 32) | event);
rdmsrl(MSR_IA32_QM_CTR, msr);
now = jiffies;
addchunks = wrap(s->prev_msr, msr);
@@ -219,7 +238,7 @@ static void check_limbo(struct mydomain *m)
if (!(r->llc_busy_domains & BIT(m->id)))
continue;
- wrmsrl(MSR_IA32_QM_EVTSEL, (rmid << 32) | EV_LLC);
+ wrmsrl(MSR_IA32_QM_EVTSEL, (snc_adjust_rmid(rmid) << 32) | EV_LLC);
rdmsrl(MSR_IA32_QM_CTR, chunks);
if (chunks <= llc_busy_threshold) {
@@ -390,6 +409,18 @@ void rmid_reparent(int rmid, int prmid)
list_move(&r->child_list, &pr->child_list);
}
+static void snc_remap_rmids(bool online)
+{
+ u64 val;
+
+ rdmsrl(MSR_RMID_SNC_CONFIG, val);
+ if (online)
+ val &= ~BIT_ULL(0);
+ else
+ val |= BIT_ULL(0);
+ wrmsrl(MSR_RMID_SNC_CONFIG, val);
+}
+
static void domain_update(struct resctrl_resource *r, int what, int cpu, void *domain)
{
struct mydomain *m = domain;
@@ -397,6 +428,7 @@ static void domain_update(struct resctrl_resource *r, int what, int cpu, void *d
if (what == RESCTRL_DOMAIN_DELETE) {
/* Last CPU in domain going offline, stop polling */
m->kthread = NULL;
+ snc_remap_rmids(false);
} else if (what == RESCTRL_DOMAIN_DELETE_CPU && cpu == m->cpu) {
/* Polling CPU for this domain going offline, pick another */
m->kthread = NULL;
@@ -405,6 +437,7 @@ static void domain_update(struct resctrl_resource *r, int what, int cpu, void *d
} else if (what == RESCTRL_DOMAIN_ADD) {
/* New domain online, start polling */
m->cpu = -1;
+ snc_remap_rmids(true);
init_poll_one_domain(m);
}
}
@@ -480,6 +513,62 @@ static void add_feature(char *feature)
mon_features = tmp;
}
+/* CPU models that support MSR_RMID_SNC_CONFIG */
+static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, 0),
+ {}
+};
+
+/*
+ * There isn't a simple h/w bit that indicates whether a CPU is running
+ * in Sub NUMA Cluster (SNC) mode. Infer the state by comparing the
+ * ratio of NUMA nodes to L3 cache instances.
+ * It is not possible to accurately determine SNC state if the system is
+ * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
+ * to L3 caches. It will be OK if system is booted with hyperthreading
+ * disabled (since this doesn't affect the ratio).
+ */
+static __init int snc_get_config(void)
+{
+ unsigned long *node_caches;
+ int mem_only_nodes = 0;
+ int cpu, node, ret;
+ int num_l3_caches;
+
+ if (!x86_match_cpu(snc_cpu_ids))
+ return 1;
+
+ node_caches = bitmap_zalloc(nr_node_ids, GFP_KERNEL);
+ if (!node_caches)
+ return 1;
+
+ cpus_read_lock();
+ for_each_node(node) {
+ cpu = cpumask_first(cpumask_of_node(node));
+ if (cpu < nr_cpu_ids)
+ set_bit(get_cpu_cacheinfo_id(cpu, 3), node_caches);
+ else
+ mem_only_nodes++;
+ }
+ cpus_read_unlock();
+
+ num_l3_caches = bitmap_weight(node_caches, nr_node_ids);
+ kfree(node_caches);
+
+ if (!num_l3_caches)
+ return 1;
+
+ ret = (nr_node_ids - mem_only_nodes) / num_l3_caches;
+
+ if (ret > 1)
+ monitor.scope = RESCTRL_NODE;
+
+ return ret;
+}
+
static int __init rdt_monitor_init(void)
{
u32 eax, ebx, ecx, edx;
@@ -487,6 +576,8 @@ static int __init rdt_monitor_init(void)
if (!boot_cpu_has(X86_FEATURE_CQM) || !boot_cpu_has(X86_FEATURE_CQM_LLC))
return -ENODEV;
+ arch_snc_nodes_per_l3_cache = snc_get_config();
+
if (boot_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
add_feature("llc_occupancy");
if (boot_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
@@ -501,8 +592,8 @@ static int __init rdt_monitor_init(void)
} else {
mbm_width += eax & 0xff;
}
- upscale = ebx;
- num_rmids = ecx + 1;
+ upscale = ebx / arch_snc_nodes_per_l3_cache;
+ num_rmids = (ecx + 1) / arch_snc_nodes_per_l3_cache;
rdt_mbm_apply_quirk(num_rmids);
monitor.domain_size += num_rmids * sizeof(struct arch_mbm_state);
diff --git a/fs/resctrl2/domain.c b/fs/resctrl2/domain.c
index 79a7b8ad623446..8d0fc481abf9aa 100644
--- a/fs/resctrl2/domain.c
+++ b/fs/resctrl2/domain.c
@@ -42,6 +42,7 @@ static int get_domain_id(unsigned int cpu, enum resctrl_scope scope)
switch (scope) {
case RESCTRL_L2CACHE: return get_cpu_cacheinfo_id(cpu, 2);
case RESCTRL_L3CACHE: return get_cpu_cacheinfo_id(cpu, 3);
+ case RESCTRL_NODE: return cpu_to_node(cpu);
case RESCTRL_SOCKET: return topology_physical_package_id(cpu);
}
return -1;
diff --git a/fs/resctrl2/size.c b/fs/resctrl2/size.c
index 61cd56e7597947..e45bb0bbb3a1cb 100644
--- a/fs/resctrl2/size.c
+++ b/fs/resctrl2/size.c
@@ -6,7 +6,10 @@
static void show_val(struct seq_file *m, struct resctrl_resource *r, struct resctrl_domain *d,
int ctrl_indx)
{
+ int size = BITS_TO_LONGS(d->param);
unsigned long *curval = d->ctrls;
+ u32 cache_slice_size;
+ int nbits;
switch (r->schemata_fmt) {
default:
@@ -14,11 +17,11 @@ static void show_val(struct seq_file *m, struct resctrl_resource *r, struct resc
seq_printf(m, "%lu", curval[ctrl_indx]);
break;
case RESCTRL_BITMASK:
- int size = BITS_TO_LONGS(d->param);
- int nbits;
-
nbits = bitmap_weight(&curval[ctrl_indx * size], d->param);
- seq_printf(m, "%u", d->cache_size / d->param * nbits);
+ cache_slice_size = d->cache_size / d->param * nbits;
+ if (r->scope == RESCTRL_L3CACHE)
+ cache_slice_size /= arch_snc_nodes_per_l3_cache;
+ seq_printf(m, "%u", cache_slice_size);
break;
}
}
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 8b5694fa6eac7e..9210cd6c644a08 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -299,6 +299,7 @@ static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
enum resctrl_scope {
RESCTRL_L2CACHE,
RESCTRL_L3CACHE,
+ RESCTRL_NODE,
RESCTRL_SOCKET,
};