aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2014-01-21 09:52:35 +1100
committerEli Qiao <taget@linux.vnet.ibm.com>2014-01-22 10:26:24 +0800
commit06f5602188ab63d07aee890e694a8f5cf2fa57ad (patch)
tree6fc608f4bf772416174081130a83608f8b155331
parent447863a8ae2d1258f76e884fef0a1924974321a9 (diff)
downloadpowerkvm-06f5602188ab63d07aee890e694a8f5cf2fa57ad.tar.gz
powerpc/book3s: Recover from MC in sapphire on SCOM read via MMIO.
Detect and recover from machine check when inside opal on a special scom load instructions. On specific SCOM read via MMIO we may get a machine check exception with SRR0 pointing inside opal. To recover from MC in this scenario, get a recovery instruction address and return to it from MC. OPAL will export the machine check recoverable ranges through device tree node mcheck-recoverable-ranges under ibm,opal: # hexdump /proc/device-tree/ibm,opal/mcheck-recoverable-ranges 0000000 0000 0000 3000 2804 0000 000c 0000 0000 0000010 3000 2814 0000 0000 3000 27f0 0000 000c 0000020 0000 0000 3000 2814 xxxx xxxx xxxx xxxx 0000030 llll llll yyyy yyyy yyyy yyyy ... ... # where: xxxx xxxx xxxx xxxx = Starting instruction address llll llll = Length of the address range. yyyy yyyy yyyy yyyy = recovery address Each recoverable address range entry is an (start address, len, recovery address), 2 cells each for start and recovery address, 1 cell for len, totalling 5 cells per entry. During kernel boot time, build up the recovery table with the list of recovery ranges from device-tree node which will be used during machine check exception to recover from MMIO SCOM UE. Changes in v2: - As per Ben's comment, added mcheck-recoverable-ranges property under ibm,opal node. - Changed the format of the mcheck-recoverable-ranges list. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/machdep.h3
-rw-r--r--arch/powerpc/include/asm/mce.h3
-rw-r--r--arch/powerpc/include/asm/opal.h3
-rw-r--r--arch/powerpc/kernel/mce.c4
-rw-r--r--arch/powerpc/kernel/mce_power.c37
-rw-r--r--arch/powerpc/kernel/prom.c5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c100
-rw-r--r--arch/powerpc/platforms/powernv/setup.c1
8 files changed, 146 insertions, 10 deletions
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 7d26aabadf472d..fe5268c9fb434b 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -167,6 +167,9 @@ struct machdep_calls {
int (*system_reset_exception)(struct pt_regs *regs);
int (*machine_check_exception)(struct pt_regs *regs);
+ /* Called during machine check exception to retrive fixup address. */
+ bool (*mce_check_early_recovery)(struct pt_regs *regs);
+
/* Motherboard/chipset features. This is a kind of general purpose
* hook used to control some machine specific features (like reset
* lines, chip power control, etc...).
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 8e99edf6d966d8..f97d8cb6bdf64f 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -187,7 +187,8 @@ struct mce_error_info {
#define MCE_EVENT_DONTRELEASE false
extern void save_mce_event(struct pt_regs *regs, long handled,
- struct mce_error_info *mce_err, uint64_t addr);
+ struct mce_error_info *mce_err, uint64_t nip,
+ uint64_t addr);
extern int get_mce_event(struct machine_check_event *mce, bool release);
extern void release_mce_event(void);
extern void machine_check_queue_event(void);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index d2a28e673161a4..013a696d098a00 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -880,6 +880,8 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token,
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data);
+extern int early_init_dt_scan_recoverable_ranges(unsigned long node,
+ const char *uname, int depth, void *data);
extern int opal_get_chars(uint32_t vtermno, char *buf, int count);
extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
@@ -921,6 +923,7 @@ extern void opal_platform_dump_init(void);
extern void opal_sys_param_init(void);
extern int opal_machine_check(struct pt_regs *regs);
+extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
extern void opal_shutdown(void);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index cadef7e64e4245..a7fd4cb78b788e 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -70,7 +70,7 @@ static void mce_set_error_info(struct machine_check_event *mce,
*/
void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
- uint64_t addr)
+ uint64_t nip, uint64_t addr)
{
uint64_t srr1;
int index = __get_cpu_var(mce_nest_count)++;
@@ -86,7 +86,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
/* Populate generic machine check info */
mce->version = MCE_V1;
- mce->srr0 = regs->nip;
+ mce->srr0 = nip;
mce->srr1 = regs->msr;
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index b36e777a734fe0..4317194c197791 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -26,6 +26,7 @@
#include <linux/ptrace.h>
#include <asm/mmu.h>
#include <asm/mce.h>
+#include <asm/machdep.h>
/* flush SLBs and reload */
static void flush_and_reload_slb(void)
@@ -197,13 +198,32 @@ static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
}
}
+static long mce_handle_ue_error(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ /*
+ * On specific SCOM read via MMIO we may get a machine check
+ * exception with SRR0 pointing inside opal. If that is the
+ * case OPAL may have recovery address to re-read SCOM data in
+ * different way and hence we can recover from this MC.
+ */
+
+ if (ppc_md.mce_check_early_recovery) {
+ if (ppc_md.mce_check_early_recovery(regs))
+ handled = 1;
+ }
+ return handled;
+}
+
long __machine_check_early_realmode_p7(struct pt_regs *regs)
{
- uint64_t srr1, addr;
+ uint64_t srr1, nip, addr;
long handled = 1;
struct mce_error_info mce_error_info = { 0 };
srr1 = regs->msr;
+ nip = regs->nip;
/*
* Handle memory errors depending whether this was a load/store or
@@ -221,7 +241,11 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
addr = regs->nip;
}
- save_mce_event(regs, handled, &mce_error_info, addr);
+ /* Handle UE error. */
+ if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs);
+
+ save_mce_event(regs, handled, &mce_error_info, nip, addr);
return handled;
}
@@ -263,11 +287,12 @@ static long mce_handle_derror_p8(uint64_t dsisr)
long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
- uint64_t srr1, addr;
+ uint64_t srr1, nip, addr;
long handled = 1;
struct mce_error_info mce_error_info = { 0 };
srr1 = regs->msr;
+ nip = regs->nip;
if (P7_SRR1_MC_LOADSTORE(srr1)) {
handled = mce_handle_derror_p8(regs->dsisr);
@@ -279,6 +304,10 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
addr = regs->nip;
}
- save_mce_event(regs, handled, &mce_error_info, addr);
+ /* Handle UE error. */
+ if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs);
+
+ save_mce_event(regs, handled, &mce_error_info, nip, addr);
return handled;
}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 2de07e52c35607..620d88736cf020 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -750,6 +750,11 @@ void __init early_init_devtree(void *params)
spinning_secondaries = boot_cpu_count - 1;
#endif
+#ifdef CONFIG_PPC_POWERNV
+ /* Scan and build the list of machine check recoverable ranges */
+ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL);
+#endif
+
DBG(" <- early_init_devtree()\n");
}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 61b5630a1ef7fa..ea8e717317ff17 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -22,6 +22,7 @@
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
+#include <linux/memblock.h>
#include <asm/opal.h>
#include <asm/firmware.h>
#include <asm/mce.h>
@@ -34,6 +35,7 @@ struct kobject *opal_kobj;
struct opal {
u64 base;
u64 entry;
+ u64 size;
} opal;
/* OPAL in-memory console */
@@ -51,6 +53,15 @@ struct memcons {
uint32_t in_cons;
};
+struct mcheck_recoverable_range {
+ u64 start_addr;
+ u64 end_addr;
+ u64 recover_addr;
+};
+
+static struct mcheck_recoverable_range *mc_recoverable_range;
+static int mc_recoverable_range_len;
+
static struct device_node *opal_node;
static DEFINE_SPINLOCK(opal_write_lock);
extern u64 opal_mc_secondary_handler[];
@@ -65,25 +76,29 @@ static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
int __init early_init_dt_scan_opal(unsigned long node,
const char *uname, int depth, void *data)
{
- const void *basep, *entryp;
- unsigned long basesz, entrysz;
+ const void *basep, *entryp, *sizep;
+ unsigned long basesz, entrysz, runtimesz;
if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
return 0;
basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
+ sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
- if (!basep || !entryp)
+ if (!basep || !entryp || !sizep)
return 1;
opal.base = of_read_number(basep, basesz/4);
opal.entry = of_read_number(entryp, entrysz/4);
+ opal.size = of_read_number(sizep, runtimesz/4);
pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%ld)\n",
opal.base, basep, basesz);
pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n",
opal.entry, entryp, entrysz);
+ pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%ld)\n",
+ opal.size, sizep, runtimesz);
powerpc_firmware_features |= FW_FEATURE_OPAL;
if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
@@ -100,6 +115,53 @@ int __init early_init_dt_scan_opal(unsigned long node,
return 1;
}
+int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ unsigned long i, size;
+ const __be32 *prop;
+
+ if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &size);
+
+ if (!prop)
+ return 1;
+
+ pr_debug("Found machine check recoverable ranges.\n");
+
+ /*
+ * Allocate a buffer to hold the MC recoverable ranges. We would be
+ * accessing them in real mode, hence it needs to be within
+ * RMO region.
+ */
+ mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
+ ppc64_rma_size));
+ memset(mc_recoverable_range, 0, size);
+
+ /*
+ * Each recoverable address entry is an (start address,len,
+ * recover address) pair, * 2 cells each, totalling 4 cells per entry.
+ */
+ for (i = 0; i < size / (sizeof(*prop) * 5); i++) {
+ mc_recoverable_range[i].start_addr =
+ of_read_number(prop + (i * 5) + 0, 2);
+ mc_recoverable_range[i].end_addr =
+ mc_recoverable_range[i].start_addr +
+ of_read_number(prop + (i * 5) + 2, 1);
+ mc_recoverable_range[i].recover_addr =
+ of_read_number(prop + (i * 5) + 3, 2);
+
+ pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
+ mc_recoverable_range[i].start_addr,
+ mc_recoverable_range[i].end_addr,
+ mc_recoverable_range[i].recover_addr);
+ }
+ mc_recoverable_range_len = i;
+ return 1;
+}
+
static int __init opal_register_exception_handlers(void)
{
u64 glue;
@@ -411,6 +473,38 @@ int opal_machine_check(struct pt_regs *regs)
return 0;
}
+static uint64_t find_recovery_address(uint64_t nip)
+{
+ int i;
+
+ for (i = 0; i < mc_recoverable_range_len; i++)
+ if ((nip >= mc_recoverable_range[i].start_addr) &&
+ (nip < mc_recoverable_range[i].end_addr))
+ return mc_recoverable_range[i].recover_addr;
+ return 0;
+}
+
+bool opal_mce_check_early_recovery(struct pt_regs *regs)
+{
+ uint64_t recover_addr = 0;
+
+ if (!opal.base || !opal.size)
+ goto out;
+
+ if ((regs->nip >= opal.base) &&
+ (regs->nip <= (opal.base + opal.size)))
+ recover_addr = find_recovery_address(regs->nip);
+
+ /*
+ * Setup regs->nip to rfi into fixup address.
+ */
+ if (recover_addr)
+ regs->nip = recover_addr;
+
+out:
+ return !!recover_addr;
+}
+
static irqreturn_t opal_interrupt(int irq, void *data)
{
uint64_t events;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 86733d6942886a..1735678e73c194 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -187,6 +187,7 @@ static void __init pnv_setup_machdep_opal(void)
ppc_md.power_off = pnv_power_off;
ppc_md.halt = pnv_halt;
ppc_md.machine_check_exception = opal_machine_check;
+ ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
}
#ifdef CONFIG_PPC_POWERNV_RTAS