diff options
author | Gavin Shan <gwshan@linux.vnet.ibm.com> | 2014-05-10 21:45:39 +1000 |
---|---|---|
committer | Eli Qiao <taget@linux.vnet.ibm.com> | 2014-05-12 11:04:40 +0800 |
commit | b7b33876ec63e50620ab710f2e34c50ca7cb5541 (patch) | |
tree | 285df31bc9ec9d6b5db78bf68d5fc6479f5e9054 | |
parent | afe34f689bff47714a18fd92c544f4318a8fc586 (diff) | |
download | powerkvm-b7b33876ec63e50620ab710f2e34c50ca7cb5541.tar.gz |
powerpc/eeh: Dump PE location code
As Ben suggested, it's meaningful to dump PE's location code
for site engineers when hitting EEH errors. The patch introduces
function eeh_pe_loc_get() to retireve the location code from
dev-tree so that we can output it when hitting EEH errors.
If primary PE bus is root bus, the PHB's dev-node would be tried
prior to root port's dev-node. Otherwise, the upstream bridge's
dev-node of the primary PE bus will be check for the location code
directly.
This fixes BZ 109585. Please apply to the next build for GA.
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r-- | arch/powerpc/include/asm/eeh.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh.c | 13 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh_pe.c | 60 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-ioda.c | 18 |
4 files changed, 81 insertions, 11 deletions
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b76f58c124ca04..fab7743c2640d5 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -254,6 +254,7 @@ void *eeh_pe_traverse(struct eeh_pe *root, void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); +const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); void *eeh_dev_init(struct device_node *dn, void *data); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 05e5fc6f5cd398..de9e244b11a5a5 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -326,8 +326,8 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); eeh_serialize_unlock(flags); - pr_err("EEH: PHB#%x failure detected\n", - phb_pe->phb->global_number); + pr_err("EEH: PHB#%x failure detected, location: %s\n", + phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); dump_stack(); eeh_send_failure_event(phb_pe); @@ -358,7 +358,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) unsigned long flags; struct device_node *dn; struct pci_dev *dev; - struct eeh_pe *pe, *parent_pe; + struct eeh_pe *pe, *parent_pe, *phb_pe; int rc = 0; const char *location; @@ -477,8 +477,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", - pe->addr, pe->phb->global_number); + phb_pe = eeh_phb_pe_get(pe->phb); + pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", + pe->phb->global_number, pe->addr); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); dump_stack(); eeh_send_failure_event(pe); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index ea9f4b1943e615..299aead9fc3ee0 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -793,6 +793,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) } /** + * eeh_pe_loc_get - Retrieve location code binding to the given PE + * @pe: EEH PE + * + * Retrieve the location code of the given PE. If the primary PE bus + * is root bus, we will grab location code from PHB device tree node + * or root port. Otherwise, the upstream bridge's device tree node + * of the primary PE bus will be checked for the location code. + */ +const char *eeh_pe_loc_get(struct eeh_pe *pe) +{ + struct pci_controller *hose; + struct pci_bus *bus = eeh_pe_bus_get(pe); + struct pci_dev *pdev; + struct device_node *dn; + const char *loc; + + if (!bus) + return "N/A"; + + /* PHB PE or root PE ? */ + if (pci_is_root_bus(bus)) { + hose = pci_bus_to_host(bus); + loc = of_get_property(hose->dn, + "ibm,loc-code", NULL); + if (loc) + return loc; + loc = of_get_property(hose->dn, + "ibm,io-base-loc-code", NULL); + if (loc) + return loc; + + pdev = pci_get_slot(bus, 0x0); + } else { + pdev = bus->self; + } + + if (!pdev) { + loc = "N/A"; + goto out; + } + + dn = pci_device_to_OF_node(pdev); + if (!dn) { + loc = "N/A"; + goto out; + } + + loc = of_get_property(dn, "ibm,loc-code", NULL); + if (!loc) + loc = of_get_property(dn, "ibm,slot-location-code", NULL); + if (!loc) + loc = "N/A"; + +out: + if (pci_is_root_bus(bus) && pdev) + pci_dev_put(pdev); + return loc; +} + +/** * eeh_pe_bus_get - Retrieve PCI bus according to the given PE * @pe: EEH PE * diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index ff93f41c78557b..fa37fa69aca153 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -789,18 +789,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) case OPAL_EEH_PHB_ERROR: if (severity == OPAL_EEH_SEV_PHB_DEAD) { *pe = phb_pe; - pr_err("EEH: dead PHB#%x detected\n", - hose->global_number); + pr_err("EEH: dead PHB#%x detected, location: %s\n", + hose->global_number, eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_DEAD_PHB; } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { *pe = phb_pe; - pr_err("EEH: fenced PHB#%x detected\n", - hose->global_number); + pr_err("EEH: Fenced PHB#%x detected, location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_FENCED_PHB; } else if (severity == OPAL_EEH_SEV_INF) { pr_info("EEH: PHB#%x informative error " - "detected\n", - hose->global_number); + "detected, location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ioda_eeh_phb_diag(hose); ret = EEH_NEXT_ERR_NONE; } @@ -821,6 +823,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) /* Try best to clear it */ pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", hose->global_number, frozen_pe_no); + pr_info("EEH: PHB location: %s\n", + eeh_pe_loc_get(phb_pe)); opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); ret = EEH_NEXT_ERR_NONE; @@ -829,6 +833,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) } else { pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", (*pe)->addr, (*pe)->phb->global_number); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_FROZEN_PE; } |