diff options
author | Tony Luck <tony.luck@intel.com> | 2023-10-13 20:10:33 -0700 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2023-10-16 10:07:50 -0700 |
commit | 8de349ad7259c6a1a5a4f4999f4afa6ff097b171 (patch) | |
tree | 35c74235ce9fae742cabd1935c76c442f9d1e178 | |
parent | 77fcf12d16c55254e518923342c0fa7ecead3361 (diff) | |
download | linux-resctrl2_patches_v6.6-rc5.tar.gz |
resctrl2: Intel Sub-NUMA Cluster (SNC) support.resctrl2_patches_v6.6-rc5
Detect SNC mode and when enabled make adjustments:
1) Monitor functions are per-node instead of per-L3 cache
2) RMID counters divided up by nodes, adjust count available
3) Reading RMIDs requires a per-node offset loaded into QM_EVTSEL MSR
4) Adjust upscale factor for values from QM_CTR
5) Effective sizes of L3 caches are divided by SNC nodes
6) Allow scoping of domains by "NODE".
7) Need to update MSR_RMID_SNC_CONFIG to reconfigure RMIDs
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/resctrl.h | 1 | ||||
-rw-r--r-- | fs/resctrl2/arch/x86/rdt_llc_occupancy.c | 3 | ||||
-rw-r--r-- | fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c | 3 | ||||
-rw-r--r-- | fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c | 3 | ||||
-rw-r--r-- | fs/resctrl2/arch/x86/rdt_monitor.c | 105 | ||||
-rw-r--r-- | fs/resctrl2/domain.c | 1 | ||||
-rw-r--r-- | fs/resctrl2/size.c | 11 | ||||
-rw-r--r-- | include/linux/resctrl.h | 1 |
9 files changed, 118 insertions, 11 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 7aec906c4a528c..b5ae4a3421aa94 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1101,6 +1101,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 1bf79a583ec6eb..3eba7d88d10d37 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -101,6 +101,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c); #include <linux/bitfield.h> extern resctrl_ids_t arch_resctrl_default_ids; +extern int arch_snc_nodes_per_l3_cache; int arch_init_alloc_ids(struct resctrl_resource *r); void arch_reset_alloc_ids(void); diff --git a/fs/resctrl2/arch/x86/rdt_llc_occupancy.c b/fs/resctrl2/arch/x86/rdt_llc_occupancy.c index 3087248b43c1a9..231643772e7cae 100644 --- a/fs/resctrl2/arch/x86/rdt_llc_occupancy.c +++ b/fs/resctrl2/arch/x86/rdt_llc_occupancy.c @@ -47,6 +47,9 @@ static int rdt_monitor_init(void) if (!boot_cpu_has(X86_FEATURE_CQM) || !x86_match_cpu(mon_feature)) return -ENODEV; + if (arch_snc_nodes_per_l3_cache > 1) + mon.scope = RESCTRL_NODE; + resctrl_register_resource(&mon); return 0; diff --git a/fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c b/fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c index f19cc7beb8711f..fda31b0ec8404a 100644 --- a/fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c +++ b/fs/resctrl2/arch/x86/rdt_mbm_local_bytes.c @@ -47,6 +47,9 @@ static int rdt_monitor_init(void) if (!boot_cpu_has(X86_FEATURE_CQM) || !x86_match_cpu(mon_feature)) return -ENODEV; + if (arch_snc_nodes_per_l3_cache > 1) + mon.scope = RESCTRL_NODE; + resctrl_register_resource(&mon); return 0; diff --git a/fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c b/fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c index 898a2a32a58c2e..210a66c0aef05d 100644 --- a/fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c +++ b/fs/resctrl2/arch/x86/rdt_mbm_total_bytes.c @@ -47,6 +47,9 @@ static int rdt_monitor_init(void) if (!boot_cpu_has(X86_FEATURE_CQM) || !x86_match_cpu(mon_feature)) return -ENODEV; + if (arch_snc_nodes_per_l3_cache > 1) + mon.scope = RESCTRL_NODE; + resctrl_register_resource(&mon); return 0; diff --git a/fs/resctrl2/arch/x86/rdt_monitor.c b/fs/resctrl2/arch/x86/rdt_monitor.c index 36e375673a0084..e3411bc1909066 100644 --- a/fs/resctrl2/arch/x86/rdt_monitor.c +++ b/fs/resctrl2/arch/x86/rdt_monitor.c @@ -1,15 +1,23 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2023 Intel Corporation. */ -#include <asm/cpufeatures.h> +#include <linux/cacheinfo.h> #include <linux/kthread.h> #include <linux/delay.h> +#include <linux/mod_devicetable.h> + +#include <asm/cpufeatures.h> +#include <asm/cpu_device_id.h> #include "../../internal.h" #include "rdt.h" #define MBM_POLL_DELAY 1000 // milliseconds + +int arch_snc_nodes_per_l3_cache = 1; +EXPORT_SYMBOL_GPL(arch_snc_nodes_per_l3_cache); + static int max_threshold_occupancy; static int mbm_width = 24; static char *mon_features; @@ -114,6 +122,17 @@ struct rrmid_info { u64 chunks; }; +static u64 snc_adjust_rmid(u64 rmid) +{ + if (arch_snc_nodes_per_l3_cache > 1) { + int node = cpu_to_node(raw_smp_processor_id()); + + rmid += (node % arch_snc_nodes_per_l3_cache) * num_rmids; + } + + return rmid; +} + static void __rdt_rmid_read(void *info) { struct rrmid_info *rr = info; @@ -124,7 +143,7 @@ static void __rdt_rmid_read(void *info) m = rr->domain; if (rr->event <= EV_LOC) { - wrmsrl(MSR_IA32_QM_EVTSEL, (rr->rmid << 32) | rr->event); + wrmsrl(MSR_IA32_QM_EVTSEL, (snc_adjust_rmid(rr->rmid) << 32) | rr->event); rdmsrl(MSR_IA32_QM_CTR, chunks); } else { chunks = 0; @@ -138,7 +157,7 @@ static void __rdt_rmid_read(void *info) u64 crmid = cr - rmid_array; if (rr->event <= EV_LOC) { - wrmsrl(MSR_IA32_QM_EVTSEL, (crmid << 32) | rr->event); + wrmsrl(MSR_IA32_QM_EVTSEL, (snc_adjust_rmid(crmid) << 32) | rr->event); rdmsrl(MSR_IA32_QM_CTR, chunks); } else { chunks = 0; @@ -190,7 +209,7 @@ static void update_rmids(void *info) s = &ri->mydomain->state[rmid].state[0]; else s = &ri->mydomain->state[rmid].state[1]; - wrmsrl(MSR_IA32_QM_EVTSEL, (rmid << 32) | event); + wrmsrl(MSR_IA32_QM_EVTSEL, (snc_adjust_rmid(rmid) << 32) | event); rdmsrl(MSR_IA32_QM_CTR, msr); now = jiffies; addchunks = wrap(s->prev_msr, msr); @@ -219,7 +238,7 @@ static void check_limbo(struct mydomain *m) if (!(r->llc_busy_domains & BIT(m->id))) continue; - wrmsrl(MSR_IA32_QM_EVTSEL, (rmid << 32) | EV_LLC); + wrmsrl(MSR_IA32_QM_EVTSEL, (snc_adjust_rmid(rmid) << 32) | EV_LLC); rdmsrl(MSR_IA32_QM_CTR, chunks); if (chunks <= llc_busy_threshold) { @@ -390,6 +409,18 @@ void rmid_reparent(int rmid, int prmid) list_move(&r->child_list, &pr->child_list); } +static void snc_remap_rmids(bool online) +{ + u64 val; + + rdmsrl(MSR_RMID_SNC_CONFIG, val); + if (online) + val &= ~BIT_ULL(0); + else + val |= BIT_ULL(0); + wrmsrl(MSR_RMID_SNC_CONFIG, val); +} + static void domain_update(struct resctrl_resource *r, int what, int cpu, void *domain) { struct mydomain *m = domain; @@ -397,6 +428,7 @@ static void domain_update(struct resctrl_resource *r, int what, int cpu, void *d if (what == RESCTRL_DOMAIN_DELETE) { /* Last CPU in domain going offline, stop polling */ m->kthread = NULL; + snc_remap_rmids(false); } else if (what == RESCTRL_DOMAIN_DELETE_CPU && cpu == m->cpu) { /* Polling CPU for this domain going offline, pick another */ m->kthread = NULL; @@ -405,6 +437,7 @@ static void domain_update(struct resctrl_resource *r, int what, int cpu, void *d } else if (what == RESCTRL_DOMAIN_ADD) { /* New domain online, start polling */ m->cpu = -1; + snc_remap_rmids(true); init_poll_one_domain(m); } } @@ -480,6 +513,62 @@ static void add_feature(char *feature) mon_features = tmp; } +/* CPU models that support MSR_RMID_SNC_CONFIG */ +static const struct x86_cpu_id snc_cpu_ids[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, 0), + {} +}; + +/* + * There isn't a simple h/w bit that indicates whether a CPU is running + * in Sub NUMA Cluster (SNC) mode. Infer the state by comparing the + * ratio of NUMA nodes to L3 cache instances. + * It is not possible to accurately determine SNC state if the system is + * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes + * to L3 caches. It will be OK if system is booted with hyperthreading + * disabled (since this doesn't affect the ratio). + */ +static __init int snc_get_config(void) +{ + unsigned long *node_caches; + int mem_only_nodes = 0; + int cpu, node, ret; + int num_l3_caches; + + if (!x86_match_cpu(snc_cpu_ids)) + return 1; + + node_caches = bitmap_zalloc(nr_node_ids, GFP_KERNEL); + if (!node_caches) + return 1; + + cpus_read_lock(); + for_each_node(node) { + cpu = cpumask_first(cpumask_of_node(node)); + if (cpu < nr_cpu_ids) + set_bit(get_cpu_cacheinfo_id(cpu, 3), node_caches); + else + mem_only_nodes++; + } + cpus_read_unlock(); + + num_l3_caches = bitmap_weight(node_caches, nr_node_ids); + kfree(node_caches); + + if (!num_l3_caches) + return 1; + + ret = (nr_node_ids - mem_only_nodes) / num_l3_caches; + + if (ret > 1) + monitor.scope = RESCTRL_NODE; + + return ret; +} + static int __init rdt_monitor_init(void) { u32 eax, ebx, ecx, edx; @@ -487,6 +576,8 @@ static int __init rdt_monitor_init(void) if (!boot_cpu_has(X86_FEATURE_CQM) || !boot_cpu_has(X86_FEATURE_CQM_LLC)) return -ENODEV; + arch_snc_nodes_per_l3_cache = snc_get_config(); + if (boot_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) add_feature("llc_occupancy"); if (boot_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) @@ -501,8 +592,8 @@ static int __init rdt_monitor_init(void) } else { mbm_width += eax & 0xff; } - upscale = ebx; - num_rmids = ecx + 1; + upscale = ebx / arch_snc_nodes_per_l3_cache; + num_rmids = (ecx + 1) / arch_snc_nodes_per_l3_cache; rdt_mbm_apply_quirk(num_rmids); monitor.domain_size += num_rmids * sizeof(struct arch_mbm_state); diff --git a/fs/resctrl2/domain.c b/fs/resctrl2/domain.c index 79a7b8ad623446..8d0fc481abf9aa 100644 --- a/fs/resctrl2/domain.c +++ b/fs/resctrl2/domain.c @@ -42,6 +42,7 @@ static int get_domain_id(unsigned int cpu, enum resctrl_scope scope) switch (scope) { case RESCTRL_L2CACHE: return get_cpu_cacheinfo_id(cpu, 2); case RESCTRL_L3CACHE: return get_cpu_cacheinfo_id(cpu, 3); + case RESCTRL_NODE: return cpu_to_node(cpu); case RESCTRL_SOCKET: return topology_physical_package_id(cpu); } return -1; diff --git a/fs/resctrl2/size.c b/fs/resctrl2/size.c index 61cd56e7597947..e45bb0bbb3a1cb 100644 --- a/fs/resctrl2/size.c +++ b/fs/resctrl2/size.c @@ -6,7 +6,10 @@ static void show_val(struct seq_file *m, struct resctrl_resource *r, struct resctrl_domain *d, int ctrl_indx) { + int size = BITS_TO_LONGS(d->param); unsigned long *curval = d->ctrls; + u32 cache_slice_size; + int nbits; switch (r->schemata_fmt) { default: @@ -14,11 +17,11 @@ static void show_val(struct seq_file *m, struct resctrl_resource *r, struct resc seq_printf(m, "%lu", curval[ctrl_indx]); break; case RESCTRL_BITMASK: - int size = BITS_TO_LONGS(d->param); - int nbits; - nbits = bitmap_weight(&curval[ctrl_indx * size], d->param); - seq_printf(m, "%u", d->cache_size / d->param * nbits); + cache_slice_size = d->cache_size / d->param * nbits; + if (r->scope == RESCTRL_L3CACHE) + cache_slice_size /= arch_snc_nodes_per_l3_cache; + seq_printf(m, "%u", cache_slice_size); break; } } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8b5694fa6eac7e..9210cd6c644a08 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -299,6 +299,7 @@ static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} enum resctrl_scope { RESCTRL_L2CACHE, RESCTRL_L3CACHE, + RESCTRL_NODE, RESCTRL_SOCKET, }; |