aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>2014-01-21 09:52:38 +1100
committerEli Qiao <taget@linux.vnet.ibm.com>2014-01-22 10:26:25 +0800
commita8893ba19ef7ba8bb39143b02d428f957a5771d4 (patch)
tree3573380e6a61d2ce386992e9b78e85a0c1315b6c
parent0f726b2f1870b2dfaac40aceb3de7a1f9159dfd5 (diff)
downloadpowerkvm-a8893ba19ef7ba8bb39143b02d428f957a5771d4.tar.gz
powernv/cpufreq: Fix crash on hotplug using a hotplug-invariant cpumask
The policy->cpus mask populated by the cpufreq driver is expected to be hotplug invariant, since the cpufreq core copies this mask as-it-is to policy->related_cpus mask (which shouldn't vary upon hotplug). The cpufreq core code later prunes the offlines cpus from the policy->cpus mask. At the moment, the powerpc cpufreq driver uses topology_thread_cpumask() to populate policy->cpus during .init(), and hence this is NOT hotplug invariant. Due to this, we hit the following bug: 1. Once we offline all threads of a core, say CPUs 8-15, and online CPU 8 back, its related cpus mask shows: $ cat /sys/devices/system/cpu/cpu8/cpufreq/related_cpus 8 [ It should have actually shown 8 9 10 11 12 13 14 15 ] 2. When we try to online the next sibling thread (CPU 9), it tries to do a fresh initialization since it is not listed in the related_cpus mask of CPU 8.(Note that for CPU 9, the cpufreq driver would have populated the related_cpus mask as [ 8 9 ], since those are the 2 online threads in that core so far). During CPU 9 init, it fails in the call to cpufreq_add_dev_symlink() because it tries to initialize the sysfs files for CPU 8 as well (which had already been initialized) while iterating through the policy->cpus. As a result, we hit this bug while onlining CPU 9: [ 1019.458183] sysfs: cannot create duplicate filename '/devices/system/cpu/cpu8/cpufreq' [ 1019.458270] ------------[ cut here ]------------ [ 1019.458338] WARNING: at fs/sysfs/dir.c:530 [ 1019.458367] Modules linked in: xt_tcpudp ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables x_tables kvm binfmt_misc autofs4 lpfc [ 1019.458543] CPU: 76 PID: 73014 Comm: bash Not tainted 3.10.11-cpufreq-10 #1 [ 1019.458590] task: c000000ff02c3200 ti: c000000fe7604000 task.ti: c000000fe7604000 [ 1019.458645] NIP: c000000000284634 LR: c000000000284630 CTR: c0000000005b5d10 [ 1019.458700] REGS: c000000fe7606fa0 TRAP: 0700 Not tainted (3.10.11-cpufreq-10) [ 1019.458754] MSR: 9000000100029032 <SF,HV,EE,ME,IR,DR,RI> CR: 28222824 XER: 20000000 [ 1019.458883] SOFTE: 1 [ 1019.458903] CFAR: c000000000874d6c [ 1019.458930] GPR00: c000000000284630 c000000fe7607220 c000000000d9ab60 000000000000004a GPR04: 0000000000000000 000000000000005a c000000000c82fb8 c000000004482448 GPR08: c000000000c7ab60 0000000000000000 0000000000000000 0000000000000000 GPR12: 0000000028222822 c00000000fe13000 0000000010142550 c000000000ce8d70 GPR16: 0000000000000001 c000000000f28c68 0000000000000000 c000000003c20030 GPR20: c000000ff6d91800 c000000000ce8fc8 c000000000b45340 c000000000e26858 GPR24: c000000000ce8d70 0000000000000000 0000000000000001 c000000ff6d91a70 GPR28: c000000fef1b2000 c000000fe7607320 c000000fc98087a0 ffffffffffffffef [ 1019.459605] NIP [c000000000284634] .sysfs_add_one+0xe4/0x100 [ 1019.459653] LR [c000000000284630] .sysfs_add_one+0xe0/0x100 [ 1019.459689] PACATMSCRATCH [9000000100009032] [ 1019.459726] Call Trace: [ 1019.459747] [c000000fe7607220] [c000000000284630] .sysfs_add_one+0xe0/0x100 (unreliable) [ 1019.459813] [c000000fe76072b0] [c0000000002854dc] .sysfs_do_create_link_sd+0x10c/0x320 [ 1019.459879] [c000000fe7607370] [c000000000718318] .cpufreq_add_dev_interface+0x2e8/0x410 [ 1019.459943] [c000000fe7607710] [c000000000718da0] .cpufreq_add_dev+0x590/0x6d0 [ 1019.460009] [c000000fe7607810] [c000000000899580] .cpufreq_cpu_callback+0x7c/0x94 [ 1019.460073] [c000000fe7607890] [c00000000086f40c] .notifier_call_chain+0x8c/0x100 [ 1019.460138] [c000000fe7607930] [c000000000091450] .cpu_notify+0x40/0xa0 [ 1019.460194] [c000000fe76079b0] [c00000000089696c] ._cpu_up+0x17c/0x1ec [ 1019.460249] [c000000fe7607a70] [c000000000896b40] .cpu_up+0x164/0x194 [ 1019.460304] [c000000fe7607b00] [c000000000746edc] .store_online+0xbc/0xa60 [ 1019.460361] [c000000fe7607bb0] [c0000000004faf64] .dev_attr_store+0x64/0xa0 [ 1019.460417] [c000000fe7607c40] [c000000000282244] .sysfs_write_file+0xf4/0x1d0 [ 1019.460482] [c000000fe7607cf0] [c0000000001f1fa8] .vfs_write+0xe8/0x260 [ 1019.460537] [c000000fe7607d90] [c0000000001f2c44] .SyS_write+0x64/0xe0 [ 1019.460593] [c000000fe7607e30] [c000000000009d54] syscall_exit+0x0/0x98 [ 1019.460647] Instruction dump: [ 1019.460675] 481b0b2d 60000000 e89e0010 7f83e378 38a01000 481b0b19 60000000 7f84e378 [ 1019.460774] 3c62ffd5 38632cf0 485f06dd 60000000 <0fe00000> 7f83e378 4bf5f8a5 60000000 [ 1019.460952] ---[ end trace 600f2280a5b2cd86 ]--- None of this would have occurred if related_cpus had remained unchanged during hotplug, because in that case, CPU 9 would have done a light-weight init, thus avoiding this duplication bug. So fix this by populating policy->cpus in a hotplug invariant manner in the cpufreq driver. Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--drivers/cpufreq/powerpc-book3s-cpufreq.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/drivers/cpufreq/powerpc-book3s-cpufreq.c b/drivers/cpufreq/powerpc-book3s-cpufreq.c
index d18519d9fcf16..9ab0c80ee4555 100644
--- a/drivers/cpufreq/powerpc-book3s-cpufreq.c
+++ b/drivers/cpufreq/powerpc-book3s-cpufreq.c
@@ -24,6 +24,8 @@
#include <linux/delay.h>
#include <linux/of_platform.h>
+#include <asm/cputhreads.h>
+#include <asm/topology.h>
#include <asm/machdep.h>
#include <asm/prom.h>
#include <asm/scom.h>
@@ -201,10 +203,14 @@ int powernv_set_freq(cpumask_var_t cpus, unsigned int new_index)
static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
- int i;
+ int base, i;
#ifdef CONFIG_SMP
- cpumask_copy(policy->cpus, topology_thread_cpumask(policy->cpu));
+ base = cpu_first_thread_sibling(policy->cpu);
+
+ for (i = 0; i < threads_per_core; i++) {
+ cpumask_set_cpu(base + i, policy->cpus);
+ }
#endif
policy->cpuinfo.transition_latency = 25000;