http://lia64.bkbits.net/linux-ia64-test-2.6.9 aegl@agluck-lia64.sc.intel.com|ChangeSet|20040915214744|59827 aegl # This is a BitKeeper generated diff -Nru style patch. # # ChangeSet # 2004/09/15 20:42:35-07:00 akpm@bix.(none) # Merge http://lia64.bkbits.net/linux-ia64-test-2.6.9 # into bix.(none):/usr/src/bk-ia64 # # kernel/posix-timers.c # 2004/09/15 20:42:31-07:00 akpm@bix.(none) +0 -0 # Auto merged # # drivers/char/hpet.c # 2004/09/15 20:42:31-07:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2004/09/15 21:34:47+00:00 tony.luck@intel.com # [IA64] make INIT dump work again # # current can be in region 5, use 'tpa' to convert to physical # # Signed-off-by: Tony Luck # # arch/ia64/kernel/minstate.h # 2004/09/15 21:32:29+00:00 aegl@agluck-lia64.sc.intel.com +1 -1 # current can be in region 5, use 'tpa' to convert to physical # # ChangeSet # 2004/09/15 18:25:06+00:00 seto.hidetoshi@jp.fujitsu.com # [IA64] Recovery from user-mode memory error # # This is the latest/Updated OS_MCA handler which try to do recovery # from multibit-ECC/poisoned memory-read error on user-land. # (Thank you very much for comments, Keith and Grant!) # # I'd still appreciate it if anyone having good test environment # could apply my patch and could report how it works. # (especially reports on non-Tiger/non-Intel platform are welcome.) # # Signed-off-by: Hidetoshi Seto # Signed-off-by: Tony Luck # # arch/ia64/kernel/mca_drv_asm.S # 2004/09/15 18:18:04+00:00 aegl@agluck-lia64.sc.intel.com +45 -0 # MCA 2xECC memory errors # # arch/ia64/kernel/mca_drv.h # 2004/09/15 18:18:04+00:00 aegl@agluck-lia64.sc.intel.com +113 -0 # MCA 2xECC memory errors # # arch/ia64/kernel/mca_drv_asm.S # 2004/09/15 18:18:04+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # BitKeeper file /data/home/aegl/BK/work/seto2/arch/ia64/kernel/mca_drv_asm.S # # arch/ia64/kernel/mca_drv.h # 2004/09/15 18:18:04+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # BitKeeper file /data/home/aegl/BK/work/seto2/arch/ia64/kernel/mca_drv.h # # arch/ia64/kernel/mca_drv.c # 2004/09/15 18:18:03+00:00 aegl@agluck-lia64.sc.intel.com +641 -0 # MCA 2xECC memory errors # # arch/ia64/kernel/mca_drv.c # 2004/09/15 18:18:03+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # BitKeeper file /data/home/aegl/BK/work/seto2/arch/ia64/kernel/mca_drv.c # # include/asm-ia64/mca.h # 2004/09/15 18:16:58+00:00 aegl@agluck-lia64.sc.intel.com +6 -0 # MCA 2xECC memory errors # # arch/ia64/kernel/mca.c # 2004/09/15 18:16:57+00:00 aegl@agluck-lia64.sc.intel.com +35 -6 # MCA 2xECC memory errors # # arch/ia64/kernel/Makefile # 2004/09/15 18:16:57+00:00 aegl@agluck-lia64.sc.intel.com +2 -0 # MCA 2xECC memory errors # # arch/ia64/Kconfig # 2004/09/15 18:16:57+00:00 aegl@agluck-lia64.sc.intel.com +3 -0 # MCA 2xECC memory errors # # ChangeSet # 2004/09/15 18:08:25+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/Linus # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # kernel/posix-timers.c # 2004/09/15 18:08:19+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # drivers/char/hpet.c # 2004/09/15 18:08:18+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # ChangeSet # 2004/09/12 22:05:23-07:00 akpm@bix.(none) # Merge bix.(none):/usr/src/bk25 into bix.(none):/usr/src/bk-ia64 # # drivers/char/hpet.c # 2004/09/12 22:05:19-07:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2004/09/11 15:10:10-07:00 akpm@bix.(none) # Merge bix.(none):/usr/src/bk25 into bix.(none):/usr/src/bk-ia64 # # kernel/posix-timers.c # 2004/09/11 15:10:05-07:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2004/09/10 17:13:17-07:00 akpm@bix.(none) # Merge bix.(none):/usr/src/bk25 into bix.(none):/usr/src/bk-ia64 # # drivers/char/hpet.c # 2004/09/10 17:13:13-07:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2004/09/09 23:19:44+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/Linus # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # arch/ia64/pci/pci.c # 2004/09/09 23:19:38+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # arch/ia64/mm/discontig.c # 2004/09/09 23:19:38+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # arch/ia64/kernel/acpi.c # 2004/09/09 23:19:38+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # ChangeSet # 2004/09/09 23:17:38+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/work/alex # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # arch/ia64/pci/pci.c # 2004/09/09 23:17:32+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # arch/ia64/mm/discontig.c # 2004/09/09 23:17:32+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # arch/ia64/kernel/acpi.c # 2004/09/09 23:17:32+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # ChangeSet # 2004/09/09 22:27:20+00:00 alex.williamson@hp.com # [IA64] iosapic.h, pci.c, iosapic.c, acpi.c: iosapic NUMA interrupt locality # # This patch associates IOSAPICs with NUMA nodes such that interrupts # gets assigned to a reasonably good default CPU. The patch does not # depend on the pxm_to_nid_map fixup, but results will be strange in some # configurations without it. This should work on any NUMA box that # exposes IOSAPICs with _MAT & _PXM methods, but it's only been tested on # an rx8620. There should be no change in behavior for boxes that don't # export both of these in ACPI namespace. # # Signed-off-by: Alex Williamson # Signed-off-by: Tony Luck # # include/asm-ia64/iosapic.h # 2004/09/09 22:25:26+00:00 aegl@agluck-lia64.sc.intel.com +3 -0 # iosapic NUMA interrupt locality # # arch/ia64/pci/pci.c # 2004/09/09 22:25:26+00:00 aegl@agluck-lia64.sc.intel.com +5 -0 # iosapic NUMA interrupt locality # # arch/ia64/kernel/iosapic.c # 2004/09/09 22:25:26+00:00 aegl@agluck-lia64.sc.intel.com +53 -2 # iosapic NUMA interrupt locality # # arch/ia64/kernel/acpi.c # 2004/09/09 22:25:25+00:00 aegl@agluck-lia64.sc.intel.com +67 -0 # iosapic NUMA interrupt locality # # ChangeSet # 2004/09/09 22:20:47+00:00 alex.williamson@hp.com # [IA64] sba_iommu.c: sba_iommu NUMA locality # # This patch adds NUMA locality info to sba_iommu, allowing coherent # DMA mappings to be allocated from the node nearest to the I/O. This # patch isn't dependent on the previous pxm_to_nid_map fixup, but may be # sub-optimal in some cases without it. Thanks # # Signed-off-by: Alex Williamson # Signed-off-by: Tony Luck # # arch/ia64/hp/common/sba_iommu.c # 2004/09/09 22:18:54+00:00 aegl@agluck-lia64.sc.intel.com +78 -2 # sba_iommu NUMA locality # # ChangeSet # 2004/09/09 22:14:48+00:00 alex.williamson@hp.com # [IA64] discontig.c: fixup pxm_to_nid_map # # I'd like to be able to use pxm_to_nid_map in several places to # discover proximity domain to node id associations. Unfortunately, after # reassign_cpu_only_nodes() plays with the node space, the pxm_to_nid_map # doesn't necessarily reflect reality. This fixes up the table so it's # still valid. Note that nid_to_pxm_map is still potentially broken, but # has a one-to-many problem if the above function combines several # proximity domains into a single node. Thanks to Bob Picco for the base # patch. # # Signed-off-by: Alex Williamson # Signed-off-by: Tony Luck # # arch/ia64/mm/discontig.c # 2004/09/09 22:12:53+00:00 aegl@agluck-lia64.sc.intel.com +24 -11 # fixup pxm_to_nid_map # # ChangeSet # 2004/09/08 18:06:50+00:00 aegl@agluck-lia64.sc.intel.com # Merge with Linux latest # # kernel/timer.c # 2004/09/08 18:06:42+00:00 aegl@agluck-lia64.sc.intel.com +1 -10 # Merge with Linux latest # # kernel/posix-timers.c # 2004/09/08 18:06:42+00:00 aegl@agluck-lia64.sc.intel.com +0 -5 # Merge with Linux latest # # include/linux/timex.h # 2004/09/08 18:06:42+00:00 aegl@agluck-lia64.sc.intel.com +2 -44 # Merge with Linux latest # # arch/ia64/kernel/time.c # 2004/09/08 18:06:42+00:00 aegl@agluck-lia64.sc.intel.com +0 -28 # Merge with Linux latest # # ChangeSet # 2004/09/08 18:05:16+00:00 aegl@agluck-lia64.sc.intel.com # Merge with Linus latest # # include/asm-ia64/acpi.h # 2004/09/08 17:45:07+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # drivers/char/hpet.c # 2004/09/08 17:45:07+00:00 aegl@agluck-lia64.sc.intel.com +0 -3 # Auto merged # # arch/ia64/sn/kernel/sn2/timer.c # 2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -4 # Auto merged # # arch/ia64/kernel/perfmon.c # 2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # arch/ia64/kernel/fsys.S # 2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -185 # Auto merged # # arch/ia64/kernel/cyclone.c # 2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -4 # Auto merged # # arch/ia64/kernel/asm-offsets.c # 2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -16 # Auto merged # # arch/ia64/configs/sn2_defconfig # 2004/09/08 17:45:05+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # arch/ia64/configs/generic_defconfig # 2004/09/08 17:45:05+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # ChangeSet # 2004/08/30 22:23:36+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/work/eranian # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # arch/ia64/kernel/perfmon.c # 2004/08/30 22:23:31+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # ChangeSet # 2004/08/30 21:16:20+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/work/sn2defconfig # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # arch/ia64/configs/sn2_defconfig # 2004/08/30 21:16:15+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # ChangeSet # 2004/08/25 23:16:36+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.8.1 # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # arch/ia64/configs/generic_defconfig # 2004/08/25 23:16:31+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # ChangeSet # 2004/08/24 22:21:59+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.8.1 # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # kernel/timer.c # 2004/08/24 22:21:54+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # kernel/posix-timers.c # 2004/08/24 22:21:53+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # # include/asm-ia64/acpi.h # 2004/08/24 22:21:53+00:00 aegl@agluck-lia64.sc.intel.com +0 -1 # Auto merged # # ChangeSet # 2004/08/24 21:26:27+00:00 clameter@sgi.com # scalability & performance improvements for timers # # Signed-off-by: Christoph Lameter # Signed-off-by: Tony Luck # # - Include corrected test data since the test program had a bad # influence on the outcome. Scalability is better than the # test program indicated. # - Correctly setup the timer accuracy. # - Consistently increases performance over existing codebase # - Make the IA64 fastcall work for all clock sources and not only # for ITC based clocking. # - Add fastcall for clock_gettime(REALTIME and MONOTONIC) # (the fastcall also returns nanoseconds instead of usecs*1000) # - Scalability improvements in particular for the use of global clocks # by avoiding the use of a cmpxchg. For applications # that continually "live" in gettimeofday on an SMP system this # will be a significant improvement. # - Ability to switch off the cmpxchg for ITC based systems through # a "nojitter" option on the kernel command line. This increases # scalability of the time functions significantly. The ITC tuning code # that runs during bootup typically insures that ITC offsets are less # than a few cycles which are longer than the delay caused by the gettime # functions and therefore the cmpxchg is not necessary on most systems. # - Self tuning interpolator limiting the jumps forward to 10-20 usecs # on each occurrence and increasing accuracy as well as robustness. # There is no danger anymore that the interpolator is configured to # be running too fast. # - Report the increased accuracy via clock_getres() to userspace. # - Generic interface. An interpolator can be easily setup by simply # setting up a time_interpolator structure with some values. # No coding of special functions needed. # - Supports the HPET timer. # # kernel/timer.c # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +10 -5 # scalability & performance improvements for timers # # kernel/posix-timers.c # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +5 -0 # scalability & performance improvements for timers # # include/linux/timex.h # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +123 -52 # scalability & performance improvements for timers # # drivers/char/hpet.c # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +3 -33 # scalability & performance improvements for timers # # arch/ia64/sn/kernel/sn2/timer.c # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +4 -45 # scalability & performance improvements for timers # # arch/ia64/kernel/time.c # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +35 -84 # scalability & performance improvements for timers # # arch/ia64/kernel/fsys.S # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +185 -172 # scalability & performance improvements for timers # # arch/ia64/kernel/cyclone.c # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +4 -55 # scalability & performance improvements for timers # # arch/ia64/kernel/asm-offsets.c # 2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +16 -3 # scalability & performance improvements for timers # # ChangeSet # 2004/08/24 16:16:47+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/Linus # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # include/asm-ia64/acpi.h # 2004/08/24 16:16:42+00:00 aegl@agluck-lia64.sc.intel.com +0 -1 # Auto merged # # ChangeSet # 2004/08/17 23:50:42+00:00 aegl@agluck-lia64.sc.intel.com # Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.8.1 # into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9 # # include/asm-ia64/acpi.h # 2004/08/17 23:50:37+00:00 aegl@agluck-lia64.sc.intel.com +0 -0 # Auto merged # diff -Nru a/arch/ia64/Kconfig b/arch/ia64/Kconfig --- a/arch/ia64/Kconfig 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/Kconfig 2004-09-15 20:43:35 -07:00 @@ -274,6 +274,9 @@ depends on IA32_SUPPORT default y +config IA64_MCA_RECOVERY + tristate "MCA recovery from errors other than TLB." + config PERFMON bool "Performance monitor support" help diff -Nru a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c --- a/arch/ia64/hp/common/sba_iommu.c 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/hp/common/sba_iommu.c 2004-09-15 20:43:35 -07:00 @@ -203,6 +203,9 @@ /* clearing pdir to prevent races with allocations. */ unsigned int res_bitshift; /* from the RIGHT! */ unsigned int res_size; /* size of resource map in bytes */ +#ifdef CONFIG_NUMA + unsigned int node; /* node where this IOC lives */ +#endif #if DELAYED_RESOURCE_CNT > 0 spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ /* than res_lock for bigger systems. */ @@ -1057,7 +1060,24 @@ struct ioc *ioc; void *addr; + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef CONFIG_NUMA + { + struct page *page; + page = alloc_pages_node(ioc->node == MAX_NUMNODES ? + numa_node_id() : ioc->node, flags, + get_order(size)); + + if (unlikely(!page)) + return NULL; + + addr = page_address(page); + } +#else addr = (void *) __get_free_pages(flags, get_order(size)); +#endif if (unlikely(!addr)) return NULL; @@ -1081,8 +1101,6 @@ * If device can't bypass or bypass is disabled, pass the 32bit fake * device to map single to get an iova mapping. */ - ioc = GET_IOC(dev); - ASSERT(ioc); *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0); return addr; @@ -1799,6 +1817,10 @@ seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); +#ifdef CONFIG_NUMA + if (ioc->node != MAX_NUMNODES) + seq_printf(s, "NUMA node : %d\n", ioc->node); +#endif seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); @@ -1899,6 +1921,58 @@ printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); } +#ifdef CONFIG_NUMA +static void __init +sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *obj; + acpi_handle phandle; + unsigned int node; + + ioc->node = MAX_NUMNODES; + + /* + * Check for a _PXM on this node first. We don't typically see + * one here, so we'll end up getting it from the parent. + */ + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) { + if (ACPI_FAILURE(acpi_get_parent(handle, &phandle))) + return; + + /* Reset the acpi buffer */ + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL, + &buffer))) + return; + } + + if (!buffer.length || !buffer.pointer) + return; + + obj = buffer.pointer; + + if (obj->type != ACPI_TYPE_INTEGER || + obj->integer.value >= MAX_PXM_DOMAINS) { + acpi_os_free(buffer.pointer); + return; + } + + node = pxm_to_nid_map[obj->integer.value]; + acpi_os_free(buffer.pointer); + + if (node >= MAX_NUMNODES || !node_online(node)) + return; + + ioc->node = node; + return; +} +#else +#define sba_map_ioc_to_node(ioc, handle) +#endif + static int __init acpi_sba_ioc_add(struct acpi_device *device) { @@ -1941,6 +2015,8 @@ if (!ioc) return 1; + /* setup NUMA node association */ + sba_map_ioc_to_node(ioc, device->handle); return 0; } diff -Nru a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile --- a/arch/ia64/kernel/Makefile 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/kernel/Makefile 2004-09-15 20:43:35 -07:00 @@ -17,6 +17,8 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o +obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +mca_recovery-y += mca_drv.o mca_drv_asm.o # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff -Nru a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c --- a/arch/ia64/kernel/acpi.c 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/kernel/acpi.c 2004-09-15 20:43:35 -07:00 @@ -650,4 +650,71 @@ return 0; } +#ifdef CONFIG_NUMA +acpi_status __init +acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *obj; + struct acpi_table_iosapic *iosapic; + unsigned int gsi_base; + int node; + + /* Only care about objects w/ a method that returns the MADT */ + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return AE_OK; + + if (!buffer.length || !buffer.pointer) + return AE_OK; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*iosapic)) { + acpi_os_free(buffer.pointer); + return AE_OK; + } + + iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer; + + if (iosapic->header.type != ACPI_MADT_IOSAPIC) { + acpi_os_free(buffer.pointer); + return AE_OK; + } + + gsi_base = iosapic->global_irq_base; + + acpi_os_free(buffer.pointer); + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + /* + * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell + * us which node to associate this with. + */ + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) + return AE_OK; + + if (!buffer.length || !buffer.pointer) + return AE_OK; + + obj = buffer.pointer; + + if (obj->type != ACPI_TYPE_INTEGER || + obj->integer.value >= MAX_PXM_DOMAINS) { + acpi_os_free(buffer.pointer); + return AE_OK; + } + + node = pxm_to_nid_map[obj->integer.value]; + acpi_os_free(buffer.pointer); + + if (node >= MAX_NUMNODES || !node_online(node) || + cpus_empty(node_to_cpumask(node))) + return AE_OK; + + /* We know a gsi to node mapping! */ + map_iosapic_to_node(gsi_base, node); + return AE_OK; +} +#endif /* CONFIG_NUMA */ #endif /* CONFIG_ACPI_BOOT */ diff -Nru a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c --- a/arch/ia64/kernel/iosapic.c 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/kernel/iosapic.c 2004-09-15 20:43:35 -07:00 @@ -117,6 +117,9 @@ char *addr; /* base address of IOSAPIC */ unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ unsigned short num_rte; /* number of RTE in this IOSAPIC */ +#ifdef CONFIG_NUMA + unsigned short node; /* numa node association via pxm */ +#endif } iosapic_lists[NR_IOSAPICS]; static int num_iosapic; @@ -488,7 +491,7 @@ } static unsigned int -get_target_cpu (void) +get_target_cpu (unsigned int gsi, int vector) { #ifdef CONFIG_SMP static int cpu = -1; @@ -507,6 +510,34 @@ if (!cpu_online(smp_processor_id())) return hard_smp_processor_id(); +#ifdef CONFIG_NUMA + { + int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; + cpumask_t cpu_mask; + + iosapic_index = find_iosapic(gsi); + if (iosapic_index < 0 || + iosapic_lists[iosapic_index].node == MAX_NUMNODES) + goto skip_numa_setup; + + cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); + + num_cpus = cpus_weight(cpu_mask); + + if (!num_cpus) + goto skip_numa_setup; + + /* Use vector assigment to distribute across cpus in node */ + cpu_index = vector % num_cpus; + + for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) + numa_cpu = next_cpu(numa_cpu, cpu_mask); + + if (numa_cpu != NR_CPUS) + return cpu_physical_id(numa_cpu); + } +skip_numa_setup: +#endif /* * Otherwise, round-robin interrupt vectors across all the * processors. (It'd be nice if we could be smarter in the @@ -550,7 +581,7 @@ } vector = assign_irq_vector(AUTO_ASSIGN); - dest = get_target_cpu(); + dest = get_target_cpu(gsi, vector); register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger); } @@ -680,6 +711,9 @@ iosapic_lists[num_iosapic].addr = addr; iosapic_lists[num_iosapic].gsi_base = gsi_base; iosapic_lists[num_iosapic].num_rte = num_rte; +#ifdef CONFIG_NUMA + iosapic_lists[num_iosapic].node = MAX_NUMNODES; +#endif num_iosapic++; if ((gsi_base == 0) && pcat_compat) { @@ -692,3 +726,20 @@ iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE); } } + +#ifdef CONFIG_NUMA +void __init +map_iosapic_to_node(unsigned int gsi_base, int node) +{ + int index; + + index = find_iosapic(gsi_base); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __FUNCTION__, gsi_base); + return; + } + iosapic_lists[index].node = node; + return; +} +#endif diff -Nru a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c --- a/arch/ia64/kernel/mca.c 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/kernel/mca.c 2004-09-15 20:43:35 -07:00 @@ -82,11 +82,6 @@ # define IA64_MCA_DEBUG(fmt...) #endif -typedef struct ia64_fptr { - unsigned long fp; - unsigned long gp; -} ia64_fptr_t; - /* Used by mca_asm.S */ ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; @@ -831,6 +826,31 @@ } +/* Function pointer for extra MCA recovery */ +int (*ia64_mca_ucmc_extension) + (void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*) + = NULL; + +int +ia64_reg_MCA_extension(void *fn) +{ + if (ia64_mca_ucmc_extension) + return 1; + + ia64_mca_ucmc_extension = fn; + return 0; +} + +void +ia64_unreg_MCA_extension(void) +{ + if (ia64_mca_ucmc_extension) + ia64_mca_ucmc_extension = NULL; +} + +EXPORT_SYMBOL(ia64_reg_MCA_extension); +EXPORT_SYMBOL(ia64_unreg_MCA_extension); + /* * ia64_mca_ucmc_handler * @@ -852,10 +872,19 @@ { pal_processor_state_info_t *psp = (pal_processor_state_info_t *) &ia64_sal_to_os_handoff_state.proc_state_param; - int recover = psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc); + int recover; /* Get the MCA error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); + + /* TLB error is only exist in this SAL error record */ + recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) + /* other error recovery */ + || (ia64_mca_ucmc_extension + && ia64_mca_ucmc_extension( + IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), + &ia64_sal_to_os_handoff_state, + &ia64_os_to_sal_handoff_state)); /* * Wakeup all the processors which are spinning in the rendezvous diff -Nru a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ia64/kernel/mca_drv.c 2004-09-15 20:43:35 -07:00 @@ -0,0 +1,641 @@ +/* + * File: mca_drv.c + * Purpose: Generic MCA handling layer + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mca_drv.h" + +/* max size of SAL error record (default) */ +static int sal_rec_max = 10000; + +/* from mca.c */ +extern int ia64_reg_MCA_extension(void*); +extern void ia64_unreg_MCA_extension(void); +static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state; +static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state; + +/* from mca_drv_asm.S */ +extern void *mca_handler_bhhook(void); + +static spinlock_t mca_bh_lock = SPIN_LOCK_UNLOCKED; + +typedef enum { + MCA_IS_LOCAL = 0, + MCA_IS_GLOBAL = 1 +} mca_type_t; + +#define MAX_PAGE_ISOLATE 32 + +static struct page *page_isolate[MAX_PAGE_ISOLATE]; +static int num_page_isolate = 0; + +typedef enum { + ISOLATE_NG = 0, + ISOLATE_OK = 1 +} isolate_status_t; + +/* + * This pool keeps pointers to the section part of SAL error record + */ +static struct { + slidx_list_t *buffer; /* section pointer list pool */ + int cur_idx; /* Current index of section pointer list pool */ + int max_idx; /* Maximum index of section pointer list pool */ +} slidx_pool; + +/** + * mca_page_isolate - isolate a poisoned page in order not to use it later + * @paddr: poisoned memory location + * + * Return value: + * ISOLATE_OK / ISOLATE_NG + */ + +static isolate_status_t +mca_page_isolate(unsigned long paddr) +{ + int i; + struct page *p; + + /* whether physical address is valid or not */ + if ( !ia64_phys_addr_valid(paddr) ) + return ISOLATE_NG; + + /* convert physical address to physical page number */ + p = pfn_to_page(paddr>>PAGE_SHIFT); + + /* check whether a page number have been already registered or not */ + for( i = 0; i < num_page_isolate; i++ ) + if( page_isolate[i] == p ) + return ISOLATE_OK; /* already listed */ + + /* limitation check */ + if( num_page_isolate == MAX_PAGE_ISOLATE ) + return ISOLATE_NG; + + /* kick pages having attribute 'SLAB' or 'Reserved' */ + if( PageSlab(p) || PageReserved(p) ) + return ISOLATE_NG; + + /* add attribute 'Reserved' and register the page */ + SetPageReserved(p); + page_isolate[num_page_isolate++] = p; + + return ISOLATE_OK; +} + +/** + * mca_hanlder_bh - Kill the process which occurred memory read error + * @paddr: poisoned address received from MCA Handler + */ + +void +mca_handler_bh(unsigned long paddr) +{ + printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n", + current->pid, current->comm); + + spin_lock(&mca_bh_lock); + if (mca_page_isolate(paddr) == ISOLATE_OK) { + printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); + } else { + printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr); + } + spin_unlock(&mca_bh_lock); + + /* This process is about to be killed itself */ + force_sig(SIGKILL, current); + schedule(); +} + +/** + * mca_make_peidx - Make index of processor error section + * @slpi: pointer to record of processor error section + * @peidx: pointer to index of processor error section + */ + +static void +mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx) +{ + /* + * calculate the start address of + * "struct cpuid_info" and "sal_processor_static_info_t". + */ + u64 total_check_num = slpi->valid.num_cache_check + + slpi->valid.num_tlb_check + + slpi->valid.num_bus_check + + slpi->valid.num_reg_file_check + + slpi->valid.num_ms_check; + u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num + + sizeof(sal_log_processor_info_t); + u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info); + + peidx_head(peidx) = slpi; + peidx_mid(peidx) = (struct sal_cpuid_info *) + (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL); + peidx_bottom(peidx) = (sal_processor_static_info_t *) + (slpi->valid.psi_static_struct ? + ((char*)slpi + head_size + mid_size) : NULL); +} + +/** + * mca_make_slidx - Make index of SAL error record + * @buffer: pointer to SAL error record + * @slidx: pointer to index of SAL error record + * + * Return value: + * 1 if record has platform error / 0 if not + */ +#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \ + { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \ + hl->hdr = ptr; \ + list_add(&hl->list, &(sect)); \ + slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; } + +static int +mca_make_slidx(void *buffer, slidx_table_t *slidx) +{ + int platform_err = 0; + int record_len = ((sal_log_record_header_t*)buffer)->len; + u32 ercd_pos; + int sects; + sal_log_section_hdr_t *sp; + + /* + * Initialize index referring current record + */ + INIT_LIST_HEAD(&(slidx->proc_err)); + INIT_LIST_HEAD(&(slidx->mem_dev_err)); + INIT_LIST_HEAD(&(slidx->sel_dev_err)); + INIT_LIST_HEAD(&(slidx->pci_bus_err)); + INIT_LIST_HEAD(&(slidx->smbios_dev_err)); + INIT_LIST_HEAD(&(slidx->pci_comp_err)); + INIT_LIST_HEAD(&(slidx->plat_specific_err)); + INIT_LIST_HEAD(&(slidx->host_ctlr_err)); + INIT_LIST_HEAD(&(slidx->plat_bus_err)); + INIT_LIST_HEAD(&(slidx->unsupported)); + + /* + * Extract a Record Header + */ + slidx->header = buffer; + + /* + * Extract each section records + * (arranged from "int ia64_log_platform_info_print()") + */ + for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0; + ercd_pos < record_len; ercd_pos += sp->len, sects++) { + sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos); + if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) { + LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp); + } else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp); + } else { + LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp); + } + } + slidx->n_sections = sects; + + return platform_err; +} + +/** + * init_record_index_pools - Initialize pool of lists for SAL record index + * + * Return value: + * 0 on Success / -ENOMEM on Failure + */ +static int +init_record_index_pools(void) +{ + int i; + int rec_max_size; /* Maximum size of SAL error records */ + int sect_min_size; /* Minimum size of SAL error sections */ + /* minimum size table of each section */ + static int sal_log_sect_min_sizes[] = { + sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t), + sizeof(sal_log_mem_dev_err_info_t), + sizeof(sal_log_sel_dev_err_info_t), + sizeof(sal_log_pci_bus_err_info_t), + sizeof(sal_log_smbios_dev_err_info_t), + sizeof(sal_log_pci_comp_err_info_t), + sizeof(sal_log_plat_specific_err_info_t), + sizeof(sal_log_host_ctlr_err_info_t), + sizeof(sal_log_plat_bus_err_info_t), + }; + + /* + * MCA handler cannot allocate new memory on flight, + * so we preallocate enough memory to handle a SAL record. + * + * Initialize a handling set of slidx_pool: + * 1. Pick up the max size of SAL error records + * 2. Pick up the min size of SAL error sections + * 3. Allocate the pool as enough to 2 SAL records + * (now we can estimate the maxinum of section in a record.) + */ + + /* - 1 - */ + rec_max_size = sal_rec_max; + + /* - 2 - */ + sect_min_size = sal_log_sect_min_sizes[0]; + for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++) + if (sect_min_size > sal_log_sect_min_sizes[i]) + sect_min_size = sal_log_sect_min_sizes[i]; + + /* - 3 - */ + slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; + slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL); + + return slidx_pool.buffer ? 0 : -ENOMEM; +} + + +/***************************************************************************** + * Recovery functions * + *****************************************************************************/ + +/** + * is_mca_global - Check whether this MCA is global or not + * @peidx: pointer of index of processor error section + * @pbci: pointer to pal_bus_check_info_t + * + * Return value: + * MCA_IS_LOCAL / MCA_IS_GLOBAL + */ + +static mca_type_t +is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci) +{ + pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); + + /* + * PAL can request a rendezvous, if the MCA has a global scope. + * If "rz_always" flag is set, SAL requests MCA rendezvous + * in spite of global MCA. + * Therefore it is local MCA when rendezvous has not been requested. + * Failed to rendezvous, the system must be down. + */ + switch (sal_to_os_handoff_state->imsto_rendez_state) { + case -1: /* SAL rendezvous unsuccessful */ + return MCA_IS_GLOBAL; + case 0: /* SAL rendezvous not required */ + return MCA_IS_LOCAL; + case 1: /* SAL rendezvous successful int */ + case 2: /* SAL rendezvous successful int with init */ + default: + break; + } + + /* + * If One or more Cache/TLB/Reg_File/Uarch_Check is here, + * it would be a local MCA. (i.e. processor internal error) + */ + if (psp->tc || psp->cc || psp->rc || psp->uc) + return MCA_IS_LOCAL; + + /* + * Bus_Check structure with Bus_Check.ib (internal bus error) flag set + * would be a global MCA. (e.g. a system bus address parity error) + */ + if (!pbci || pbci->ib) + return MCA_IS_GLOBAL; + + /* + * Bus_Check structure with Bus_Check.eb (external bus error) flag set + * could be either a local MCA or a global MCA. + * + * Referring Bus_Check.bsi: + * 0: Unknown/unclassified + * 1: BERR# + * 2: BINIT# + * 3: Hard Fail + * (FIXME: Are these SGI specific or generic bsi values?) + */ + if (pbci->eb) + switch (pbci->bsi) { + case 0: + /* e.g. a load from poisoned memory */ + return MCA_IS_LOCAL; + case 1: + case 2: + case 3: + return MCA_IS_GLOBAL; + } + + return MCA_IS_GLOBAL; +} + +/** + * recover_from_read_error - Try to recover the errors which type are "read"s. + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) +{ + sal_log_mod_error_info_t *smei; + pal_min_state_area_t *pmsa; + struct ia64_psr *psr1, *psr2; + ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; + + /* Is target address valid? */ + if (!pbci->tv) + return 0; + + /* + * cpu read or memory-mapped io read + * + * offending process affected process OS MCA do + * kernel mode kernel mode down system + * kernel mode user mode kill the process + * user mode kernel mode down system (*) + * user mode user mode kill the process + * + * (*) You could terminate offending user-mode process + * if (pbci->pv && pbci->pl != 0) *and* if you sure + * the process not have any locks of kernel. + */ + + psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); + + /* + * Check the privilege level of interrupted context. + * If it is user-mode, then terminate affected process. + */ + if (psr1->cpl != 0) { + smei = peidx_bus_check(peidx, 0); + if (smei->valid.target_identifier) { + /* + * setup for resume to bottom half of MCA, + * "mca_handler_bhhook" + */ + pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61)); + /* pass to bhhook as 1st argument (gr8) */ + pmsa->pmsa_gr[8-1] = smei->target_identifier; + /* set interrupted return address (but no use) */ + pmsa->pmsa_br0 = pmsa->pmsa_iip; + /* change resume address to bottom half */ + pmsa->pmsa_iip = mca_hdlr_bh->fp; + pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; + /* set cpl with kernel mode */ + psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; + psr2->cpl = 0; + psr2->ri = 0; + + return 1; + } + + } + + return 0; +} + +/** + * recover_from_platform_error - Recover from platform error. + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) +{ + int status = 0; + pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); + + if (psp->bc && pbci->eb && pbci->bsi == 0) { + switch(pbci->type) { + case 1: /* partial read */ + case 3: /* full line(cpu) read */ + case 9: /* I/O space read */ + status = recover_from_read_error(slidx, peidx, pbci); + break; + case 0: /* unknown */ + case 2: /* partial write */ + case 4: /* full line write */ + case 5: /* implicit or explicit write-back operation */ + case 6: /* snoop probe */ + case 7: /* incoming or outgoing ptc.g */ + case 8: /* write coalescing transactions */ + case 10: /* I/O space write */ + case 11: /* inter-processor interrupt message(IPI) */ + case 12: /* interrupt acknowledge or external task priority cycle */ + default: + break; + } + } + + return status; +} + +/** + * recover_from_processor_error + * @platform: whether there are some platform error section or not + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * + * Return value: + * 1 on Success / 0 on Failure + */ +/* + * Later we try to recover when below all conditions are satisfied. + * 1. Only one processor error section is exist. + * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK) + * 3. The entry of BUS_CHECK_INFO is 1. + * 4. "External bus error" flag is set and the others are not set. + */ + +static int +recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) +{ + pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); + + /* + * We cannot recover errors with other than bus_check. + */ + if (psp->cc || psp->rc || psp->uc) + return 0; + + /* + * If there is no bus error, record is weird but we need not to recover. + */ + if (psp->bc == 0 || pbci == NULL) + return 1; + + /* + * Sorry, we cannot handle so many. + */ + if (peidx_bus_check_num(peidx) > 1) + return 0; + /* + * Well, here is only one bus error. + */ + if (pbci->ib || pbci->cc) + return 0; + if (pbci->eb && pbci->bsi > 0) + return 0; + if (psp->ci == 0) + return 0; + + /* + * This is a local MCA and estimated as recoverble external bus error. + * (e.g. a load from poisoned memory) + * This means "there are some platform errors". + */ + if (platform) + return recover_from_platform_error(slidx, peidx, pbci); + /* + * On account of strange SAL error record, we cannot recover. + */ + return 0; +} + +/** + * mca_try_to_recover - Try to recover from MCA + * @rec: pointer to a SAL error record + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +mca_try_to_recover(void *rec, + ia64_mca_sal_to_os_state_t *sal_to_os_state, + ia64_mca_os_to_sal_state_t *os_to_sal_state) +{ + int platform_err; + int n_proc_err; + slidx_table_t slidx; + peidx_table_t peidx; + pal_bus_check_info_t pbci; + + /* handoff state from/to mca.c */ + sal_to_os_handoff_state = sal_to_os_state; + os_to_sal_handoff_state = os_to_sal_state; + + /* Make index of SAL error record */ + platform_err = mca_make_slidx(rec, &slidx); + + /* Count processor error sections */ + n_proc_err = slidx_count(&slidx, proc_err); + + /* Now, OS can recover when there is one processor error section */ + if (n_proc_err > 1) + return 0; + else if (n_proc_err == 0) { + /* Weird SAL record ... We need not to recover */ + + return 1; + } + + /* Make index of processor error section */ + mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx); + + /* Extract Processor BUS_CHECK[0] */ + *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0); + + /* Check whether MCA is global or not */ + if (is_mca_global(&peidx, &pbci)) + return 0; + + /* Try to recover a processor error */ + return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci); +} + +/* + * ============================================================================= + */ + +int __init mca_external_handler_init(void) +{ + if (init_record_index_pools()) + return -ENOMEM; + + /* register external mca handlers */ + if (ia64_reg_MCA_extension(mca_try_to_recover)){ + printk(KERN_ERR "ia64_reg_MCA_extension failed.\n"); + kfree(slidx_pool.buffer); + return -EFAULT; + } + return 0; +} + +void __exit mca_external_handler_exit(void) +{ + /* unregister external mca handlers */ + ia64_unreg_MCA_extension(); + kfree(slidx_pool.buffer); +} + +module_init(mca_external_handler_init); +module_exit(mca_external_handler_exit); + +module_param(sal_rec_max, int, 0644); +MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record"); + +MODULE_DESCRIPTION("ia64 platform dependent mca handler driver"); +MODULE_LICENSE("GPL"); diff -Nru a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ia64/kernel/mca_drv.h 2004-09-15 20:43:35 -07:00 @@ -0,0 +1,113 @@ +/* + * File: mca_drv.h + * Purpose: Define helpers for Generic MCA handling + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) + */ +/* + * Processor error section: + * + * +-sal_log_processor_info_t *info-------------+ + * | sal_log_section_hdr_t header; | + * | ... | + * | sal_log_mod_error_info_t info[0]; | + * +-+----------------+-------------------------+ + * | CACHE_CHECK | ^ num_cache_check v + * +----------------+ + * | TLB_CHECK | ^ num_tlb_check v + * +----------------+ + * | BUS_CHECK | ^ num_bus_check v + * +----------------+ + * | REG_FILE_CHECK | ^ num_reg_file_check v + * +----------------+ + * | MS_CHECK | ^ num_ms_check v + * +-struct cpuid_info *id----------------------+ + * | regs[5]; | + * | reserved; | + * +-sal_processor_static_info_t *regs----------+ + * | valid; | + * | ... | + * | fr[128]; | + * +--------------------------------------------+ + */ + +/* peidx: index of processor error section */ +typedef struct peidx_table { + sal_log_processor_info_t *info; + struct sal_cpuid_info *id; + sal_processor_static_info_t *regs; +} peidx_table_t; + +#define peidx_head(p) (((p)->info)) +#define peidx_mid(p) (((p)->id)) +#define peidx_bottom(p) (((p)->regs)) + +#define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter)) +#define peidx_field_valid(p) (&(peidx_head(p)->valid)) +#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area)) + +#define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check) +#define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check) +#define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check) +#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check) +#define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check) + +#define peidx_cache_check_idx(p, n) (n) +#define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n) +#define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n) +#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n) +#define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n) + +#define peidx_mod_error_info(p, name, n) \ +({ int __idx = peidx_##name##_idx(p, n); \ + sal_log_mod_error_info_t *__ret = NULL; \ + if (peidx_##name##_num(p) > n) /*BUG*/ \ + __ret = &(peidx_head(p)->info[__idx]); \ + __ret; }) + +#define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n) +#define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n) +#define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n) +#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n) +#define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n) + +#define peidx_check_info(proc, name, n) \ +({ \ + sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\ + u64 __temp = __info && __info->valid.check_info \ + ? __info->check_info : 0; \ + __temp; }) + +/* slidx: index of SAL log error record */ + +typedef struct slidx_list { + struct list_head list; + sal_log_section_hdr_t *hdr; +} slidx_list_t; + +typedef struct slidx_table { + sal_log_record_header_t *header; + int n_sections; /* # of section headers */ + struct list_head proc_err; + struct list_head mem_dev_err; + struct list_head sel_dev_err; + struct list_head pci_bus_err; + struct list_head smbios_dev_err; + struct list_head pci_comp_err; + struct list_head plat_specific_err; + struct list_head host_ctlr_err; + struct list_head plat_bus_err; + struct list_head unsupported; /* list of unsupported sections */ +} slidx_table_t; + +#define slidx_foreach_entry(pos, head) \ + list_for_each_entry(pos, head, list) +#define slidx_first_entry(head) \ + (((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL) +#define slidx_count(slidx, sec) \ +({ int __count = 0; \ + slidx_list_t *__pos; \ + slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\ + __count; }) + diff -Nru a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S --- /dev/null Wed Dec 31 16:00:00 196900 +++ b/arch/ia64/kernel/mca_drv_asm.S 2004-09-15 20:43:35 -07:00 @@ -0,0 +1,45 @@ +/* + * File: mca_drv_asm.S + * Purpose: Assembly portion of Generic MCA handling + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) + */ +#include +#include + +#include +#include + +GLOBAL_ENTRY(mca_handler_bhhook) + invala // clear RSE ? + ;; // + cover // + ;; // + clrrrb // + ;; + alloc r16=ar.pfs,0,2,1,0 // make a new frame + ;; + mov r13=IA64_KR(CURRENT) // current task pointer + ;; + adds r12=IA64_TASK_THREAD_KSP_OFFSET,r13 + ;; + ld8 r12=[r12] // stack pointer + ;; + mov loc0=r16 + movl loc1=mca_handler_bh // recovery C function + ;; + mov out0=r8 // poisoned address + mov b6=loc1 + ;; + mov loc1=rp + ;; + br.call.sptk.many rp=b6 // not return ... + ;; + mov ar.pfs=loc0 + mov rp=loc1 + ;; + mov r8=r0 + br.ret.sptk.many rp + ;; +END(mca_handler_bhhook) diff -Nru a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h --- a/arch/ia64/kernel/minstate.h 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/kernel/minstate.h 2004-09-15 20:43:35 -07:00 @@ -65,7 +65,7 @@ #endif #ifdef MINSTATE_PHYS -# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; dep reg=0,reg,61,3 +# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; tpa reg=reg # define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS # define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS #endif diff -Nru a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c --- a/arch/ia64/mm/discontig.c 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/mm/discontig.c 2004-09-15 20:43:35 -07:00 @@ -53,11 +53,12 @@ static void __init reassign_cpu_only_nodes(void) { struct node_memblk_s *p; - int i, j, k, nnode, nid, cpu, cpunid; + int i, j, k, nnode, nid, cpu, cpunid, pxm; u8 cslit, slit; static DECLARE_BITMAP(nodes_with_mem, NR_NODES) __initdata; static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata; static int node_flip[NR_NODES] __initdata; + static int old_nid_map[NR_CPUS] __initdata; for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) if (!test_bit(p->nid, (void *) nodes_with_mem)) { @@ -104,9 +105,14 @@ for (cpu = 0; cpu < NR_CPUS; cpu++) if (node_cpuid[cpu].nid == i) { - /* For nodes not being reassigned just fix the cpu's nid. */ + /* + * For nodes not being reassigned just + * fix the cpu's nid and reverse pxm map + */ if (cpunid < numnodes) { - node_cpuid[cpu].nid = cpunid; + pxm = nid_to_pxm_map[i]; + pxm_to_nid_map[pxm] = + node_cpuid[cpu].nid = cpunid; continue; } @@ -126,6 +132,8 @@ } } + /* save old nid map so we can update the pxm */ + old_nid_map[cpu] = node_cpuid[cpu].nid; node_cpuid[cpu].nid = k; } } @@ -134,14 +142,19 @@ * Fixup temporary nid values for CPU-only nodes. */ for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node_cpuid[cpu].nid == (numnodes + numnodes)) - node_cpuid[cpu].nid = nnode - 1; - else - for (i = 0; i < nnode; i++) - if (node_flip[i] == (node_cpuid[cpu].nid - numnodes)) { - node_cpuid[cpu].nid = i; - break; - } + if (node_cpuid[cpu].nid == (numnodes + numnodes)) { + pxm = nid_to_pxm_map[old_nid_map[cpu]]; + pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1; + } else { + for (i = 0; i < nnode; i++) { + if (node_flip[i] != (node_cpuid[cpu].nid - numnodes)) + continue; + + pxm = nid_to_pxm_map[old_nid_map[cpu]]; + pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i; + break; + } + } /* * Fix numa_slit by compressing from larger diff -Nru a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c --- a/arch/ia64/pci/pci.c 2004-09-15 20:43:35 -07:00 +++ b/arch/ia64/pci/pci.c 2004-09-15 20:43:35 -07:00 @@ -136,6 +136,11 @@ printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); +#ifdef CONFIG_NUMA +extern acpi_status acpi_map_iosapic (acpi_handle, u32, void*, void**); + + acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL); +#endif /* * PCI IRQ routing is set up by pci_enable_device(), but we * also do it here in case there are still broken drivers that diff -Nru a/include/asm-ia64/iosapic.h b/include/asm-ia64/iosapic.h --- a/include/asm-ia64/iosapic.h 2004-09-15 20:43:35 -07:00 +++ b/include/asm-ia64/iosapic.h 2004-09-15 20:43:35 -07:00 @@ -90,6 +90,9 @@ extern unsigned int iosapic_version (char *addr); extern void iosapic_pci_fixup (int); +#ifdef CONFIG_NUMA +extern void __init map_iosapic_to_node (unsigned int, int); +#endif #else #define iosapic_system_init(pcat_compat) do { } while (0) #define iosapic_init(address,gsi_base) do { } while (0) diff -Nru a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h --- a/include/asm-ia64/mca.h 2004-09-15 20:43:35 -07:00 +++ b/include/asm-ia64/mca.h 2004-09-15 20:43:35 -07:00 @@ -22,6 +22,11 @@ #define IA64_MCA_RENDEZ_TIMEOUT (20 * 1000) /* value in milliseconds - 20 seconds */ +typedef struct ia64_fptr { + unsigned long fp; + unsigned long gp; +} ia64_fptr_t; + typedef union cmcv_reg_u { u64 cmcv_regval; struct { @@ -114,6 +119,7 @@ extern void ia64_monarch_init_handler(void); extern void ia64_slave_init_handler(void); extern void ia64_mca_cmc_vector_setup(void); +extern int (*ia64_mca_ucmc_other_recover_fp)(void *,ia64_mca_sal_to_os_state_t *,ia64_mca_os_to_sal_state_t *); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_IA64_MCA_H */