http://lia64.bkbits.net/linux-ia64-test-2.6.9
aegl@agluck-lia64.sc.intel.com|ChangeSet|20040909231944|64985 aegl

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2004/09/12 22:05:23-07:00 akpm@bix.(none) 
#   Merge bix.(none):/usr/src/bk25 into bix.(none):/usr/src/bk-ia64
# 
# drivers/char/hpet.c
#   2004/09/12 22:05:19-07:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/09/11 15:10:10-07:00 akpm@bix.(none) 
#   Merge bix.(none):/usr/src/bk25 into bix.(none):/usr/src/bk-ia64
# 
# kernel/posix-timers.c
#   2004/09/11 15:10:05-07:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/09/10 17:13:17-07:00 akpm@bix.(none) 
#   Merge bix.(none):/usr/src/bk25 into bix.(none):/usr/src/bk-ia64
# 
# drivers/char/hpet.c
#   2004/09/10 17:13:13-07:00 akpm@bix.(none) +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/09/09 23:19:44+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/Linus
#   into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9
# 
# arch/ia64/pci/pci.c
#   2004/09/09 23:19:38+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# arch/ia64/mm/discontig.c
#   2004/09/09 23:19:38+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# arch/ia64/kernel/acpi.c
#   2004/09/09 23:19:38+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/09/09 23:17:38+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/work/alex
#   into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9
# 
# arch/ia64/pci/pci.c
#   2004/09/09 23:17:32+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# arch/ia64/mm/discontig.c
#   2004/09/09 23:17:32+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# arch/ia64/kernel/acpi.c
#   2004/09/09 23:17:32+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/09/09 22:27:20+00:00 alex.williamson@hp.com 
#   [IA64] iosapic.h, pci.c, iosapic.c, acpi.c: iosapic NUMA interrupt locality
#   
#   This patch associates IOSAPICs with NUMA nodes such that interrupts
#   gets assigned to a reasonably good default CPU.  The patch does not
#   depend on the pxm_to_nid_map fixup, but results will be strange in some
#   configurations without it.  This should work on any NUMA box that
#   exposes IOSAPICs with _MAT & _PXM methods, but it's only been tested on
#   an rx8620.  There should be no change in behavior for boxes that don't
#   export both of these in ACPI namespace.
#    
#   Signed-off-by: Alex Williamson <alex.williamson@hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# include/asm-ia64/iosapic.h
#   2004/09/09 22:25:26+00:00 aegl@agluck-lia64.sc.intel.com +3 -0
#   iosapic NUMA interrupt locality
# 
# arch/ia64/pci/pci.c
#   2004/09/09 22:25:26+00:00 aegl@agluck-lia64.sc.intel.com +5 -0
#   iosapic NUMA interrupt locality
# 
# arch/ia64/kernel/iosapic.c
#   2004/09/09 22:25:26+00:00 aegl@agluck-lia64.sc.intel.com +53 -2
#   iosapic NUMA interrupt locality
# 
# arch/ia64/kernel/acpi.c
#   2004/09/09 22:25:25+00:00 aegl@agluck-lia64.sc.intel.com +67 -0
#   iosapic NUMA interrupt locality
# 
# ChangeSet
#   2004/09/09 22:20:47+00:00 alex.williamson@hp.com 
#   [IA64] sba_iommu.c: sba_iommu NUMA locality
#   
#   This patch adds NUMA locality info to sba_iommu, allowing coherent
#   DMA mappings to be allocated from the node nearest to the I/O.  This
#   patch isn't dependent on the previous pxm_to_nid_map fixup, but may be
#   sub-optimal in some cases without it.  Thanks
#    
#   Signed-off-by: Alex Williamson <alex.williamson@hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/hp/common/sba_iommu.c
#   2004/09/09 22:18:54+00:00 aegl@agluck-lia64.sc.intel.com +78 -2
#   sba_iommu NUMA locality
# 
# ChangeSet
#   2004/09/09 22:14:48+00:00 alex.williamson@hp.com 
#   [IA64] discontig.c: fixup pxm_to_nid_map
#   
#   I'd like to be able to use pxm_to_nid_map in several places to
#   discover proximity domain to node id associations.  Unfortunately, after
#   reassign_cpu_only_nodes() plays with the node space, the pxm_to_nid_map
#   doesn't necessarily reflect reality.  This fixes up the table so it's
#   still valid.  Note that nid_to_pxm_map is still potentially broken, but
#   has a one-to-many problem if the above function combines several
#   proximity domains into a single node.  Thanks to Bob Picco for the base
#   patch.
#    
#   Signed-off-by: Alex Williamson <alex.williamson@hp.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
# 
# arch/ia64/mm/discontig.c
#   2004/09/09 22:12:53+00:00 aegl@agluck-lia64.sc.intel.com +24 -11
#   fixup pxm_to_nid_map
# 
# ChangeSet
#   2004/09/08 18:06:50+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge with Linux latest
# 
# kernel/timer.c
#   2004/09/08 18:06:42+00:00 aegl@agluck-lia64.sc.intel.com +1 -10
#   Merge with Linux latest
# 
# kernel/posix-timers.c
#   2004/09/08 18:06:42+00:00 aegl@agluck-lia64.sc.intel.com +0 -5
#   Merge with Linux latest
# 
# include/linux/timex.h
#   2004/09/08 18:06:42+00:00 aegl@agluck-lia64.sc.intel.com +2 -44
#   Merge with Linux latest
# 
# arch/ia64/kernel/time.c
#   2004/09/08 18:06:42+00:00 aegl@agluck-lia64.sc.intel.com +0 -28
#   Merge with Linux latest
# 
# ChangeSet
#   2004/09/08 18:05:16+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge with Linus latest
# 
# include/asm-ia64/acpi.h
#   2004/09/08 17:45:07+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# drivers/char/hpet.c
#   2004/09/08 17:45:07+00:00 aegl@agluck-lia64.sc.intel.com +0 -3
#   Auto merged
# 
# arch/ia64/sn/kernel/sn2/timer.c
#   2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -4
#   Auto merged
# 
# arch/ia64/kernel/perfmon.c
#   2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# arch/ia64/kernel/fsys.S
#   2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -185
#   Auto merged
# 
# arch/ia64/kernel/cyclone.c
#   2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -4
#   Auto merged
# 
# arch/ia64/kernel/asm-offsets.c
#   2004/09/08 17:45:06+00:00 aegl@agluck-lia64.sc.intel.com +0 -16
#   Auto merged
# 
# arch/ia64/configs/sn2_defconfig
#   2004/09/08 17:45:05+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# arch/ia64/configs/generic_defconfig
#   2004/09/08 17:45:05+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/08/30 22:23:36+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/work/eranian
#   into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9
# 
# arch/ia64/kernel/perfmon.c
#   2004/08/30 22:23:31+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/08/30 21:16:20+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/work/sn2defconfig
#   into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9
# 
# arch/ia64/configs/sn2_defconfig
#   2004/08/30 21:16:15+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/08/25 23:16:36+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.8.1
#   into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9
# 
# arch/ia64/configs/generic_defconfig
#   2004/08/25 23:16:31+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# ChangeSet
#   2004/08/24 22:21:59+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.8.1
#   into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9
# 
# kernel/timer.c
#   2004/08/24 22:21:54+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# kernel/posix-timers.c
#   2004/08/24 22:21:53+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
# include/asm-ia64/acpi.h
#   2004/08/24 22:21:53+00:00 aegl@agluck-lia64.sc.intel.com +0 -1
#   Auto merged
# 
# ChangeSet
#   2004/08/24 21:26:27+00:00 clameter@sgi.com 
#   scalability & performance improvements for timers
#   
#   Signed-off-by: Christoph Lameter <clameter@sgi.com>
#   Signed-off-by: Tony Luck <tony.luck@intel.com>
#   
#   - Include corrected test data since the test program had a bad
#     influence on the outcome. Scalability is better than the
#     test program indicated.
#   - Correctly setup the timer accuracy.
#   - Consistently increases performance over existing codebase
#   - Make the IA64 fastcall work for all clock sources and not only
#     for ITC based clocking.
#   - Add fastcall for clock_gettime(REALTIME and MONOTONIC)
#     (the fastcall also returns nanoseconds instead of usecs*1000)
#   - Scalability improvements in particular for the use of global clocks
#     by avoiding the use of a cmpxchg. For applications
#     that continually "live" in gettimeofday on an SMP system this
#     will be a significant improvement.
#   - Ability to switch off the cmpxchg for ITC based systems through
#     a "nojitter" option on the kernel command line. This increases
#     scalability of the time functions significantly. The ITC tuning code
#     that runs during bootup typically insures that ITC offsets are less
#     than a few cycles which are longer than the delay caused by the gettime
#     functions and therefore the cmpxchg is not necessary on most systems.
#   - Self tuning interpolator limiting the jumps forward to 10-20 usecs
#     on each occurrence and increasing accuracy as well as robustness.
#     There is no danger anymore that the interpolator is configured to
#     be running too fast.
#   - Report the increased accuracy via clock_getres() to userspace.
#   - Generic interface. An interpolator can be easily setup by simply
#     setting up a time_interpolator structure with some values.
#     No coding of special functions needed.
#   - Supports the HPET timer.
# 
# kernel/timer.c
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +10 -5
#   scalability & performance improvements for timers
# 
# kernel/posix-timers.c
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +5 -0
#   scalability & performance improvements for timers
# 
# include/linux/timex.h
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +123 -52
#   scalability & performance improvements for timers
# 
# drivers/char/hpet.c
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +3 -33
#   scalability & performance improvements for timers
# 
# arch/ia64/sn/kernel/sn2/timer.c
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +4 -45
#   scalability & performance improvements for timers
# 
# arch/ia64/kernel/time.c
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +35 -84
#   scalability & performance improvements for timers
# 
# arch/ia64/kernel/fsys.S
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +185 -172
#   scalability & performance improvements for timers
# 
# arch/ia64/kernel/cyclone.c
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +4 -55
#   scalability & performance improvements for timers
# 
# arch/ia64/kernel/asm-offsets.c
#   2004/08/24 21:20:31+00:00 aegl@agluck-lia64.sc.intel.com +16 -3
#   scalability & performance improvements for timers
# 
# ChangeSet
#   2004/08/24 16:16:47+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/Linus
#   into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9
# 
# include/asm-ia64/acpi.h
#   2004/08/24 16:16:42+00:00 aegl@agluck-lia64.sc.intel.com +0 -1
#   Auto merged
# 
# ChangeSet
#   2004/08/17 23:50:42+00:00 aegl@agluck-lia64.sc.intel.com 
#   Merge agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.8.1
#   into agluck-lia64.sc.intel.com:/data/home/aegl/BK/linux-ia64-test-2.6.9
# 
# include/asm-ia64/acpi.h
#   2004/08/17 23:50:37+00:00 aegl@agluck-lia64.sc.intel.com +0 -0
#   Auto merged
# 
diff -Nru a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
--- a/arch/ia64/hp/common/sba_iommu.c	2004-09-12 22:06:31 -07:00
+++ b/arch/ia64/hp/common/sba_iommu.c	2004-09-12 22:06:31 -07:00
@@ -203,6 +203,9 @@
 					/* clearing pdir to prevent races with allocations. */
 	unsigned int	res_bitshift;	/* from the RIGHT! */
 	unsigned int	res_size;	/* size of resource map in bytes */
+#ifdef CONFIG_NUMA
+	unsigned int	node;		/* node where this IOC lives */
+#endif
 #if DELAYED_RESOURCE_CNT > 0
 	spinlock_t	saved_lock;	/* may want to try to get this on a separate cacheline */
 					/* than res_lock for bigger systems. */
@@ -1057,7 +1060,24 @@
 	struct ioc *ioc;
 	void *addr;
 
+	ioc = GET_IOC(dev);
+	ASSERT(ioc);
+
+#ifdef CONFIG_NUMA
+	{
+		struct page *page;
+		page = alloc_pages_node(ioc->node == MAX_NUMNODES ?
+		                        numa_node_id() : ioc->node, flags,
+		                        get_order(size));
+
+		if (unlikely(!page))
+			return NULL;
+
+		addr = page_address(page);
+	}
+#else
 	addr = (void *) __get_free_pages(flags, get_order(size));
+#endif
 	if (unlikely(!addr))
 		return NULL;
 
@@ -1081,8 +1101,6 @@
 	 * If device can't bypass or bypass is disabled, pass the 32bit fake
 	 * device to map single to get an iova mapping.
 	 */
-	ioc = GET_IOC(dev);
-	ASSERT(ioc);
 	*dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
 
 	return addr;
@@ -1799,6 +1817,10 @@
 
 	seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n",
 		ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF));
+#ifdef CONFIG_NUMA
+	if (ioc->node != MAX_NUMNODES)
+		seq_printf(s, "NUMA node       : %d\n", ioc->node);
+#endif
 	seq_printf(s, "IOVA size       : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024));
 	seq_printf(s, "IOVA page size  : %ld kb\n", iovp_size/1024);
 
@@ -1899,6 +1921,58 @@
 	printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number);
 }
 
+#ifdef CONFIG_NUMA
+static void __init
+sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
+{
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *obj;
+	acpi_handle phandle;
+	unsigned int node;
+
+	ioc->node = MAX_NUMNODES;
+
+	/*
+	 * Check for a _PXM on this node first.  We don't typically see
+	 * one here, so we'll end up getting it from the parent.
+	 */
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) {
+		if (ACPI_FAILURE(acpi_get_parent(handle, &phandle)))
+			return;
+
+		/* Reset the acpi buffer */
+		buffer.length = ACPI_ALLOCATE_BUFFER;
+		buffer.pointer = NULL;
+
+		if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL,
+		                                      &buffer)))
+			return;
+	}
+
+	if (!buffer.length || !buffer.pointer)
+		return;
+
+	obj = buffer.pointer;
+
+	if (obj->type != ACPI_TYPE_INTEGER ||
+	    obj->integer.value >= MAX_PXM_DOMAINS) {
+		acpi_os_free(buffer.pointer);
+		return;
+	}
+
+	node = pxm_to_nid_map[obj->integer.value];
+	acpi_os_free(buffer.pointer);
+
+	if (node >= MAX_NUMNODES || !node_online(node))
+		return;
+
+	ioc->node = node;
+	return;
+}
+#else
+#define sba_map_ioc_to_node(ioc, handle)
+#endif
+
 static int __init
 acpi_sba_ioc_add(struct acpi_device *device)
 {
@@ -1941,6 +2015,8 @@
 	if (!ioc)
 		return 1;
 
+	/* setup NUMA node association */
+	sba_map_ioc_to_node(ioc, device->handle);
 	return 0;
 }
 
diff -Nru a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
--- a/arch/ia64/kernel/acpi.c	2004-09-12 22:06:32 -07:00
+++ b/arch/ia64/kernel/acpi.c	2004-09-12 22:06:32 -07:00
@@ -650,4 +650,71 @@
 	return 0;
 }
 
+#ifdef CONFIG_NUMA
+acpi_status __init
+acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
+{
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *obj;
+	struct acpi_table_iosapic *iosapic;
+	unsigned int gsi_base;
+	int node;
+
+	/* Only care about objects w/ a method that returns the MADT */
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+		return AE_OK;
+
+	if (!buffer.length || !buffer.pointer)
+		return AE_OK;
+
+	obj = buffer.pointer;
+	if (obj->type != ACPI_TYPE_BUFFER ||
+	    obj->buffer.length < sizeof(*iosapic)) {
+		acpi_os_free(buffer.pointer);
+		return AE_OK;
+	}
+
+	iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
+
+	if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
+		acpi_os_free(buffer.pointer);
+		return AE_OK;
+	}
+
+	gsi_base = iosapic->global_irq_base;
+
+	acpi_os_free(buffer.pointer);
+	buffer.length = ACPI_ALLOCATE_BUFFER;
+	buffer.pointer = NULL;
+
+	/*
+	 * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell
+	 * us which node to associate this with.
+	 */
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer)))
+		return AE_OK;
+
+	if (!buffer.length || !buffer.pointer)
+		return AE_OK;
+
+	obj = buffer.pointer;
+
+	if (obj->type != ACPI_TYPE_INTEGER ||
+	    obj->integer.value >= MAX_PXM_DOMAINS) {
+		acpi_os_free(buffer.pointer);
+		return AE_OK;
+	}
+
+	node = pxm_to_nid_map[obj->integer.value];
+	acpi_os_free(buffer.pointer);
+
+	if (node >= MAX_NUMNODES || !node_online(node) ||
+	    cpus_empty(node_to_cpumask(node)))
+		return AE_OK;
+
+	/* We know a gsi to node mapping! */
+	map_iosapic_to_node(gsi_base, node);
+	return AE_OK;
+}
+#endif /* CONFIG_NUMA */
 #endif /* CONFIG_ACPI_BOOT */
diff -Nru a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
--- a/arch/ia64/kernel/iosapic.c	2004-09-12 22:06:32 -07:00
+++ b/arch/ia64/kernel/iosapic.c	2004-09-12 22:06:32 -07:00
@@ -117,6 +117,9 @@
 	char		*addr;		/* base address of IOSAPIC */
 	unsigned int 	gsi_base;	/* first GSI assigned to this IOSAPIC */
 	unsigned short 	num_rte;	/* number of RTE in this IOSAPIC */
+#ifdef CONFIG_NUMA
+	unsigned short	node;		/* numa node association via pxm */
+#endif
 } iosapic_lists[NR_IOSAPICS];
 
 static int num_iosapic;
@@ -488,7 +491,7 @@
 }
 
 static unsigned int
-get_target_cpu (void)
+get_target_cpu (unsigned int gsi, int vector)
 {
 #ifdef CONFIG_SMP
 	static int cpu = -1;
@@ -507,6 +510,34 @@
 	if (!cpu_online(smp_processor_id()))
 		return hard_smp_processor_id();
 
+#ifdef CONFIG_NUMA
+	{
+		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
+		cpumask_t cpu_mask;
+
+		iosapic_index = find_iosapic(gsi);
+		if (iosapic_index < 0 ||
+		    iosapic_lists[iosapic_index].node == MAX_NUMNODES)
+			goto skip_numa_setup;
+
+		cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
+		
+		num_cpus = cpus_weight(cpu_mask);
+
+		if (!num_cpus)
+			goto skip_numa_setup;
+
+		/* Use vector assigment to distribute across cpus in node */
+		cpu_index = vector % num_cpus;
+
+		for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
+			numa_cpu = next_cpu(numa_cpu, cpu_mask);
+
+		if (numa_cpu != NR_CPUS)
+			return cpu_physical_id(numa_cpu);
+	}
+skip_numa_setup:
+#endif
 	/*
 	 * Otherwise, round-robin interrupt vectors across all the
 	 * processors.  (It'd be nice if we could be smarter in the
@@ -550,7 +581,7 @@
 		}
 
 		vector = assign_irq_vector(AUTO_ASSIGN);
-		dest = get_target_cpu();
+		dest = get_target_cpu(gsi, vector);
 		register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
 			polarity, trigger);
 	}
@@ -680,6 +711,9 @@
 	iosapic_lists[num_iosapic].addr = addr;
 	iosapic_lists[num_iosapic].gsi_base = gsi_base;
 	iosapic_lists[num_iosapic].num_rte = num_rte;
+#ifdef CONFIG_NUMA
+	iosapic_lists[num_iosapic].node = MAX_NUMNODES;
+#endif
 	num_iosapic++;
 
 	if ((gsi_base == 0) && pcat_compat) {
@@ -692,3 +726,20 @@
 			iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
 	}
 }
+
+#ifdef CONFIG_NUMA
+void __init
+map_iosapic_to_node(unsigned int gsi_base, int node)
+{
+	int index;
+
+	index = find_iosapic(gsi_base);
+	if (index < 0) {
+		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+		       __FUNCTION__, gsi_base);
+		return;
+	}
+	iosapic_lists[index].node = node;
+	return;
+}
+#endif
diff -Nru a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
--- a/arch/ia64/mm/discontig.c	2004-09-12 22:06:32 -07:00
+++ b/arch/ia64/mm/discontig.c	2004-09-12 22:06:32 -07:00
@@ -53,11 +53,12 @@
 static void __init reassign_cpu_only_nodes(void)
 {
 	struct node_memblk_s *p;
-	int i, j, k, nnode, nid, cpu, cpunid;
+	int i, j, k, nnode, nid, cpu, cpunid, pxm;
 	u8 cslit, slit;
 	static DECLARE_BITMAP(nodes_with_mem, NR_NODES) __initdata;
 	static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
 	static int node_flip[NR_NODES] __initdata;
+	static int old_nid_map[NR_CPUS] __initdata;
 
 	for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
 		if (!test_bit(p->nid, (void *) nodes_with_mem)) {
@@ -104,9 +105,14 @@
 
 		for (cpu = 0; cpu < NR_CPUS; cpu++)
 			if (node_cpuid[cpu].nid == i) {
-				/* For nodes not being reassigned just fix the cpu's nid. */
+				/*
+				 * For nodes not being reassigned just
+				 * fix the cpu's nid and reverse pxm map
+				 */
 				if (cpunid < numnodes) {
-					node_cpuid[cpu].nid = cpunid;
+					pxm = nid_to_pxm_map[i];
+					pxm_to_nid_map[pxm] =
+					          node_cpuid[cpu].nid = cpunid;
 					continue;
 				}
 
@@ -126,6 +132,8 @@
 						}
 					}
 
+				/* save old nid map so we can update the pxm */
+				old_nid_map[cpu] = node_cpuid[cpu].nid;
 				node_cpuid[cpu].nid = k;
 			}
 	}
@@ -134,14 +142,19 @@
 	 * Fixup temporary nid values for CPU-only nodes.
 	 */
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
-		if (node_cpuid[cpu].nid == (numnodes + numnodes))
-			node_cpuid[cpu].nid = nnode - 1;
-		else
-			for (i = 0; i < nnode; i++)
-				if (node_flip[i] == (node_cpuid[cpu].nid - numnodes)) {
-					node_cpuid[cpu].nid = i;
-					break;
-				}
+		if (node_cpuid[cpu].nid == (numnodes + numnodes)) {
+			pxm = nid_to_pxm_map[old_nid_map[cpu]];
+			pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;
+		} else {
+			for (i = 0; i < nnode; i++) {
+				if (node_flip[i] != (node_cpuid[cpu].nid - numnodes))
+					continue;
+
+				pxm = nid_to_pxm_map[old_nid_map[cpu]];
+				pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;
+				break;
+			}
+		}
 
 	/*
 	 * Fix numa_slit by compressing from larger
diff -Nru a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
--- a/arch/ia64/pci/pci.c	2004-09-12 22:06:31 -07:00
+++ b/arch/ia64/pci/pci.c	2004-09-12 22:06:32 -07:00
@@ -136,6 +136,11 @@
 
 	printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
 
+#ifdef CONFIG_NUMA
+extern acpi_status acpi_map_iosapic (acpi_handle, u32, void*, void**);
+
+	acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL);
+#endif
 	/*
 	 * PCI IRQ routing is set up by pci_enable_device(), but we
 	 * also do it here in case there are still broken drivers that
diff -Nru a/include/asm-ia64/iosapic.h b/include/asm-ia64/iosapic.h
--- a/include/asm-ia64/iosapic.h	2004-09-12 22:06:32 -07:00
+++ b/include/asm-ia64/iosapic.h	2004-09-12 22:06:32 -07:00
@@ -90,6 +90,9 @@
 extern unsigned int iosapic_version (char *addr);
 
 extern void iosapic_pci_fixup (int);
+#ifdef CONFIG_NUMA
+extern void __init map_iosapic_to_node (unsigned int, int);
+#endif
 #else
 #define iosapic_system_init(pcat_compat)			do { } while (0)
 #define iosapic_init(address,gsi_base)				do { } while (0)