aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>2014-01-21 09:52:25 +1100
committerEli Qiao <taget@linux.vnet.ibm.com>2014-01-22 10:26:22 +0800
commitb2e0cb53dd3ab7c0ffbf86e7ba0636b620f58cb7 (patch)
tree0dcd0f5f05401ea5c6e23d6ac21ad695a226201b
parent644f43d7855c28ac230180869c249722ab904936 (diff)
downloadpowerkvm-b2e0cb53dd3ab7c0ffbf86e7ba0636b620f58cb7.tar.gz
powerpc: Add debug checks to catch invalid cpu-to-node mappings
There have been some weird bugs in the past where the kernel tried to associate threads of the same core to different NUMA nodes, and things went haywire after that point (as expected). But unfortunately, root-causing such issues have been quite challenging, due to the lack of appropriate debug checks in the kernel. These bugs usually lead to some odd soft-lockups in the scheduler's build-sched-domain code in the CPU hotplug path, which makes it very hard to trace it back to the incorrect cpu-to-node mappings. So add appropriate debug checks to catch such invalid cpu-to-node mappings as early as possible. Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/mm/numa.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b7293bba00622f..7f7c0db2cc560e 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -567,17 +567,39 @@ out:
return nid;
}
+static void verify_cpu_node_mapping(int cpu, int node)
+{
+ int base, sibling, i;
+
+ /* Verify that all the threads in the core belong to the same node */
+ base = cpu_first_thread_sibling(cpu);
+
+ for (i = 0; i < threads_per_core; i++) {
+ sibling = base + i;
+
+ if (sibling == cpu || cpu_is_offline(sibling))
+ continue;
+
+ if (cpu_to_node(sibling) != node) {
+ WARN(1, "CPU thread siblings %d and %d don't belong"
+ " to the same node!\n", cpu, sibling);
+ break;
+ }
+ }
+}
+
static int __cpuinit cpu_numa_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
unsigned long lcpu = (unsigned long)hcpu;
- int ret = NOTIFY_DONE;
+ int ret = NOTIFY_DONE, nid;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- numa_setup_cpu(lcpu);
+ nid = numa_setup_cpu(lcpu);
+ verify_cpu_node_mapping((int)lcpu, nid);
ret = NOTIFY_OK;
break;
#ifdef CONFIG_HOTPLUG_CPU