7 files changed, 435 insertions, 399 deletions
diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile
index 7309c25496613..a56bcb1e6cf5d 100644
--- a/arch/ia64/sn/kernel/sn2/Makefile
+++ b/arch/ia64/sn/kernel/sn2/Makefile
@@ -11,4 +11,5 @@
 
 EXTRA_CFLAGS := -DLITTLE_ENDIAN
 
-obj-y += cache.o iomv.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o timer.o
+obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \
+	 prominfo_proc.o timer.o
diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c
index af59e0fe3ddcd..f0cce9edc53d1 100644
--- a/arch/ia64/sn/kernel/sn2/cache.c
+++ b/arch/ia64/sn/kernel/sn2/cache.c
@@ -4,7 +4,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  * 
- * Copyright (C) 2001-2002 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001-2003 Silicon Graphics, Inc. All rights reserved.
  *
  */
 
@@ -24,6 +24,5 @@ void
 sn_flush_all_caches(long flush_addr, long bytes)
 {
 	flush_icache_range(flush_addr, flush_addr+bytes);
+	mb();
 }
-
-
diff --git a/arch/ia64/sn/kernel/sn2/io.c b/arch/ia64/sn/kernel/sn2/io.c
index 9de12763a35f0..59423708d30ce 100644
--- a/arch/ia64/sn/kernel/sn2/io.c
+++ b/arch/ia64/sn/kernel/sn2/io.c
@@ -9,13 +9,8 @@
  * we wrap the inlines from asm/ia64/sn/sn2/io.h here.
  */
 
-#include <linux/config.h>
-#include <linux/types.h>
-
 #include <asm/sn/sn2/io.h>
 
-#ifdef CONFIG_IA64_GENERIC
-
 unsigned int
 sn_inb (unsigned long port)
 {
@@ -73,7 +68,7 @@ sn_readl (void *addr)
 unsigned long
 sn_readq (void *addr)
 {
-	return __sn_readq (addr)
+	return __sn_readq (addr);
 }
 
 
@@ -94,5 +89,3 @@ asm ("__sn_readb = sn_readb");
 asm ("__sn_readw = sn_readw");
 asm ("__sn_readl = sn_readl");
 asm ("__sn_readq = sn_readq");
-
-#endif /* CONFIG_IA64_GENERIC */
diff --git a/arch/ia64/sn/kernel/sn2/iomv.c b/arch/ia64/sn/kernel/sn2/iomv.c
deleted file mode 100644
index 4dd53594f5c07..0000000000000
--- a/arch/ia64/sn/kernel/sn2/iomv.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* 
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
- */
-
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-#include <asm/sn/simulator.h>
-#include <asm/sn/pda.h>
-#include <asm/sn/sn_cpuid.h>
-
-/**
- * sn_io_addr - convert an in/out port to an i/o address
- * @port: port to convert
- *
- * Legacy in/out instructions are converted to ld/st instructions
- * on IA64.  This routine will convert a port number into a valid 
- * SN i/o address.  Used by sn_in*() and sn_out*().
- */
-void *
-sn_io_addr(unsigned long port)
-{
-	if (!IS_RUNNING_ON_SIMULATOR()) {
-		return( (void *)  (port | __IA64_UNCACHED_OFFSET));
-	} else {
-		unsigned long io_base;
-		unsigned long addr;
- 
-		/*
- 		 * word align port, but need more than 10 bits
- 		 * for accessing registers in bedrock local block
- 		 * (so we don't do port&0xfff)
- 		 */
-		if ((port >= 0x1f0 && port <= 0x1f7) ||
-			port == 0x3f6 || port == 0x3f7) {
-			io_base = (0xc000000fcc000000 | ((unsigned long)get_nasid() << 38));
-			addr = io_base | ((port >> 2) << 12) | (port & 0xfff);
-		} else {
-			addr = __ia64_get_io_port_base() | ((port >> 2) << 2);
-		}
-		return(void *) addr;
-	}
-}
-
-EXPORT_SYMBOL(sn_io_addr);
-
-/**
- * sn_mmiob - I/O space memory barrier
- *
- * Acts as a memory mapped I/O barrier for platforms that queue writes to 
- * I/O space.  This ensures that subsequent writes to I/O space arrive after
- * all previous writes.  For most ia64 platforms, this is a simple
- * 'mf.a' instruction.  For other platforms, mmiob() may have to read
- * a chipset register to ensure ordering.
- *
- * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear.
- * See PV 871084 for details about the WAR about zero value.
- *
- */
-void
-sn_mmiob (void)
-{
-	while ((((volatile unsigned long) (*pda.pio_write_status_addr)) & SH_PIO_WRITE_STATUS_0_PENDING_WRITE_COUNT_MASK) != 
-				SH_PIO_WRITE_STATUS_0_PENDING_WRITE_COUNT_MASK)
-		udelay(1);
-}
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
new file mode 100644
index 0000000000000..f860679d5b12f
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -0,0 +1,361 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * Module to export the system's Firmware Interface Tables, including
+ * PROM revision numbers, in /proc
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <asm/io.h>
+#include <asm/sn/simulator.h>
+
+/* to lookup nasids */
+#include <asm/sn/sn_cpuid.h>
+
+MODULE_DESCRIPTION("PROM version reporting for /proc");
+MODULE_AUTHOR("Chad Talbott");
+MODULE_LICENSE("GPL");
+
+#undef DEBUG_PROMINFO
+
+#define TRACE_PROMINFO
+
+#if defined(DEBUG_PROMINFO)
+#  define DPRINTK(x...) printk(KERN_DEBUG x)
+#else
+#  define DPRINTK(x...)
+#endif
+
+#if defined(TRACE_PROMINFO) && defined(DEBUG_PROMINFO)
+#  if defined(__GNUC__)
+#    define TRACE()	printk(KERN_DEBUG "%s:%d:%s\n", \
+			       __FILE__, __LINE__, __FUNCTION__)
+#  else
+#    define TRACE()	printk(KERN_DEBUG "%s:%d\n", __LINE__, __FILE__)
+#  endif
+#else
+#  define TRACE()
+#endif
+
+/* Sub-regions determined by bits in Node Offset */
+#define	LB_PROM_SPACE		0x0000000700000000ul /* Local LB PROM */
+
+#define FIT_SIGNATURE		0x2020205f5449465ful
+/* Standard Intel FIT entry types */
+#define FIT_ENTRY_FIT_HEADER	0x00	/* FIT header entry */
+#define FIT_ENTRY_PAL_B		0x01	/* PAL_B entry */
+/* Entries 0x02 through 0x0D reserved by Intel */
+#define FIT_ENTRY_PAL_A_PROC	0x0E	/* Processor-specific PAL_A entry */
+#define FIT_ENTRY_PAL_A		0x0F	/* PAL_A entry, same as... */
+#define FIT_ENTRY_PAL_A_GEN	0x0F	/* ...Generic PAL_A entry */
+#define FIT_ENTRY_UNUSED	0x7F	/* Unused (reserved by Intel?) */
+/* OEM-defined entries range from 0x10 to 0x7E. */
+#define FIT_ENTRY_SAL_A		0x10	/* SAL_A entry */
+#define FIT_ENTRY_SAL_B		0x11	/* SAL_B entry */
+#define FIT_ENTRY_SALRUNTIME	0x12	/* SAL runtime entry */
+#define FIT_ENTRY_EFI		0x1F	/* EFI entry */
+#define FIT_ENTRY_FPSWA		0x20	/* embedded fpswa entry */
+#define FIT_ENTRY_VMLINUX	0x21	/* embedded vmlinux entry */
+
+#define FIT_MAJOR_SHIFT	(32 + 8)
+#define FIT_MAJOR_MASK	((1 << 8) - 1)
+#define FIT_MINOR_SHIFT	32
+#define FIT_MINOR_MASK	((1 << 8) - 1)
+
+#define FIT_MAJOR(q)	\
+	((unsigned) ((q) >> FIT_MAJOR_SHIFT) & FIT_MAJOR_MASK)
+#define FIT_MINOR(q)	\
+	((unsigned) ((q) >> FIT_MINOR_SHIFT) & FIT_MINOR_MASK)
+
+#define FIT_TYPE_SHIFT	(32 + 16)
+#define FIT_TYPE_MASK	((1 << 7) - 1)
+
+#define FIT_TYPE(q)	\
+	((unsigned) ((q) >> FIT_TYPE_SHIFT) & FIT_TYPE_MASK)
+
+#define FIT_ENTRY(type, maj, min, size)					\
+	((((unsigned long)(maj) & FIT_MAJOR_MASK) << FIT_MAJOR_SHIFT) |	\
+	 (((unsigned long)(min) & FIT_MINOR_MASK) << FIT_MINOR_SHIFT) |	\
+	 (((unsigned long)(type) & FIT_TYPE_MASK) << FIT_TYPE_SHIFT) |	\
+	 (size))
+
+struct fit_type_map_t {
+	unsigned char	type;
+	const char	*name;
+};
+
+static const struct fit_type_map_t fit_entry_types[] = {
+	{ FIT_ENTRY_FIT_HEADER, "FIT Header" },
+	{ FIT_ENTRY_PAL_A_GEN,  "Generic PAL_A" },
+	{ FIT_ENTRY_PAL_A_PROC, "Processor-specific PAL_A" },
+	{ FIT_ENTRY_PAL_A,      "PAL_A" },
+	{ FIT_ENTRY_PAL_B,      "PAL_B" },
+	{ FIT_ENTRY_SAL_A,      "SAL_A" },
+	{ FIT_ENTRY_SAL_B,      "SAL_B" },
+	{ FIT_ENTRY_SALRUNTIME, "SAL runtime" },
+	{ FIT_ENTRY_EFI,	"EFI" },
+	{ FIT_ENTRY_VMLINUX,    "Embedded Linux" },
+	{ FIT_ENTRY_FPSWA,      "Embedded FPSWA" },
+	{ FIT_ENTRY_UNUSED,     "Unused" },
+	{ 0xff,                 "Error" },
+};
+
+static const char *
+fit_type_name(unsigned char type)
+{
+	struct fit_type_map_t const*mapp;
+
+	for (mapp = fit_entry_types; mapp->type != 0xff; mapp++)
+		if (type == mapp->type)
+			return mapp->name;
+
+	if ((type > FIT_ENTRY_PAL_A) && (type < FIT_ENTRY_UNUSED))
+		return "OEM type";
+	if ((type > FIT_ENTRY_PAL_B) && (type < FIT_ENTRY_PAL_A))
+		return "Reserved";
+
+	return "Unknown type";
+}
+
+/* These two routines read the FIT table directly from the FLASH PROM
+ * on a specific node.  The PROM can only be accessed using aligned 64
+ * bit reads, so we do that and then shift and mask the result to get
+ * at each field.
+ */
+static int
+dump_fit_entry(char *page, unsigned long *fentry)
+{
+	unsigned long q1, q2;
+	unsigned type;
+
+	TRACE();
+
+	q1 = readq(fentry);
+	q2 = readq(fentry + 1);
+	type = FIT_TYPE(q2);
+	return sprintf(page, "%02x %-25s %x.%02x %016lx %u\n",
+		       type,
+		       fit_type_name(type),
+		       FIT_MAJOR(q2), FIT_MINOR(q2),
+		       q1,
+		       /* mult by sixteen to get size in bytes */
+		       (unsigned)q2 * 16);
+}
+
+/* We assume that the fit table will be small enough that we can print
+ * the whole thing into one page.  (This is true for our default 16kB
+ * pages -- each entry is about 60 chars wide when printed.)  I read
+ * somewhere that the maximum size of the FIT is 128 entries, so we're
+ * OK except for 4kB pages (and no one is going to do that on SN
+ * anyway).
+ */
+static int
+dump_fit(char *page, unsigned long *fit)
+{
+	unsigned long qw;
+	int nentries;
+	int fentry;
+	char *p;
+
+	TRACE();
+
+	DPRINTK("dumping fit from %p\n", (void *)fit);
+
+	qw = readq(fit);
+	DPRINTK("FIT signature: %016lx (%.8s)\n", qw, (char *)&qw);
+	if (qw != FIT_SIGNATURE)
+		printk(KERN_WARNING "Unrecognized FIT signature");
+
+	qw = readq(fit + 1);
+	nentries = (unsigned)qw;
+	DPRINTK("number of fit entries: %u\n", nentries);
+	/* check that we won't overflow the page -- see comment above */
+	BUG_ON(nentries * 60 > PAGE_SIZE);
+
+	p = page;
+	for (fentry = 0; fentry < nentries; fentry++)
+		/* each FIT entry is two 64 bit words */
+		p += dump_fit_entry(p, fit + 2 * fentry);
+
+	return p - page;
+}
+
+static int
+dump_version(char *page, unsigned long *fit)
+{
+	int nentries;
+	int fentry;
+	unsigned long qw;
+
+	TRACE();
+
+	nentries = (unsigned)readq(fit + 1);
+	BUG_ON(nentries * 60 > PAGE_SIZE);
+
+	for (fentry = 0; fentry < nentries; fentry++) {
+		qw = readq(fit + 2 * fentry + 1);
+		if (FIT_TYPE(qw) == FIT_ENTRY_SAL_A)
+			return sprintf(page, "%x.%02x\n",
+				       FIT_MAJOR(qw), FIT_MINOR(qw));
+	}
+	return 0;
+}
+
+/* same as in proc_misc.c */
+static int
+proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof,
+		  int len)
+{
+	if (len <= off+count) *eof = 1;
+	*start = page + off;
+	len -= off;
+	if (len>count) len = count;
+	if (len<0) len = 0;
+	return len;
+}
+
+static int
+read_version_entry(char *page, char **start, off_t off, int count, int *eof,
+		   void *data)
+{
+	int len = 0;
+
+	MOD_INC_USE_COUNT;
+	/* data holds the pointer to this node's FIT */
+	len = dump_version(page, (unsigned long *)data);
+	len = proc_calc_metrics(page, start, off, count, eof, len);
+	MOD_DEC_USE_COUNT;
+	return len;
+}
+
+static int
+read_fit_entry(char *page, char **start, off_t off, int count, int *eof,
+	       void *data)
+{
+	int len = 0;
+
+	MOD_INC_USE_COUNT;
+	/* data holds the pointer to this node's FIT */
+	len = dump_fit(page, (unsigned long *)data);
+	len = proc_calc_metrics(page, start, off, count, eof, len);
+	MOD_DEC_USE_COUNT;
+
+	return len;
+}
+
+/* this is a fake FIT that's used on the medusa simulator which
+ * doesn't usually run a complete PROM. 
+ */
+#ifdef CONFIG_IA64_SGI_SN_SIM
+static unsigned long fakefit[] = {
+	/* this is all we need to satisfy the code below */
+	FIT_SIGNATURE,
+	FIT_ENTRY(FIT_ENTRY_FIT_HEADER, 0x02, 0x60, 2),
+	/* dump something arbitrary for
+	 * /proc/sgi_prominfo/nodeX/version */
+	0xbadbeef00fa3ef17ul,
+	FIT_ENTRY(FIT_ENTRY_SAL_A, 0, 0x99, 0x100)
+};	
+#endif
+
+static unsigned long *
+lookup_fit(int nasid)
+{
+	unsigned long *fitp;
+	unsigned long fit_paddr;
+	unsigned long *fit_vaddr;
+
+#ifdef CONFIG_IA64_SGI_SN_SIM
+	if (IS_RUNNING_ON_SIMULATOR())
+		return fakefit;
+#endif
+
+	fitp = (void *)GLOBAL_MMR_ADDR(nasid, LB_PROM_SPACE - 32);
+	DPRINTK("pointer to fit at %p\n", (void *)fitp);
+	fit_paddr = readq(fitp);
+	DPRINTK("fit pointer contains %lx\n", fit_paddr);
+	/* snag just the node-relative offset */
+	fit_paddr &= ~0ul >> (63-35);
+	/* the pointer to the FIT is relative to IA-64 compatibility
+	 * space.  However, the PROM is mapped at a different offset
+	 * in MMR space (both local and global)
+	 */
+	fit_paddr += 0x700000000;
+	fit_vaddr = (void *)GLOBAL_MMR_ADDR(nasid, fit_paddr);
+	DPRINTK("fit at %p\n", (void *)fit_vaddr);
+	return fit_vaddr;
+}
+
+/* module entry points */
+int __init prominfo_init(void);
+void __exit prominfo_exit(void);
+
+module_init(prominfo_init);
+module_exit(prominfo_exit);
+
+static struct proc_dir_entry **proc_entries;
+static struct proc_dir_entry *sgi_prominfo_entry;
+
+#define NODE_NAME_LEN 11
+
+int __init
+prominfo_init(void)
+{
+	struct proc_dir_entry **entp;
+	cnodeid_t cnodeid;
+	nasid_t nasid;
+	char name[NODE_NAME_LEN];
+
+	TRACE();
+
+	DPRINTK("running on cpu %d\n", smp_processor_id());
+	DPRINTK("numnodes %d\n", numnodes);
+
+	proc_entries = kmalloc(numnodes * sizeof(struct proc_dir_entry *),
+			       GFP_KERNEL);
+
+	sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL);
+
+	for (cnodeid = 0, entp = proc_entries;
+	     cnodeid < numnodes;
+	     cnodeid++, entp++) {
+		sprintf(name, "node%d", cnodeid);
+		*entp = proc_mkdir(name, sgi_prominfo_entry);
+		nasid = cnodeid_to_nasid(cnodeid);
+		create_proc_read_entry(
+			"fit", 0, *entp, read_fit_entry,
+			lookup_fit(nasid));
+		create_proc_read_entry(
+			"version", 0, *entp, read_version_entry,
+			lookup_fit(nasid));
+	}
+
+	return 0;
+}
+
+void __exit
+prominfo_exit(void)
+{
+	struct proc_dir_entry **entp;
+	unsigned cnodeid;
+	char name[NODE_NAME_LEN];
+
+	TRACE();
+
+	for (cnodeid = 0, entp = proc_entries;
+	     cnodeid < numnodes;
+	     cnodeid++, entp++) {
+		remove_proc_entry("fit", *entp);
+		remove_proc_entry("version", *entp);
+		sprintf(name, "node%d", cnodeid);
+		remove_proc_entry(name, sgi_prominfo_entry);
+	}
+	remove_proc_entry("sgi_prominfo", NULL);
+	kfree(proc_entries);
+}
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 1850229970c14..c7116a35e5b14 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -1,7 +1,7 @@
 /*
  * SN2 Platform specific SMP Support
  *
- * Copyright (C) 2000-2002 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
  * 
  * This program is free software; you can redistribute it and/or modify it 
  * under the terms of version 2 of the GNU General Public License 
@@ -79,284 +79,6 @@ wait_piowc(void)
 	return ws;
 }
 
-#ifdef PTCG_WAR
-/*
- * The following structure is used to pass params thru smp_call_function
- * to other cpus for flushing TLB ranges.
- */
-typedef struct {
-	unsigned long	start;
-	unsigned long	end;
-	unsigned long	nbits;
-	unsigned int	rid;
-	atomic_t	unfinished_count;
-	char		fill[96];
-} ptc_params_t;
-
-#define NUMPTC	512
-
-static ptc_params_t	ptcParamArray[NUMPTC] __attribute__((__aligned__(128)));
-
-/* use separate cache lines on ptcParamsNextByCpu to avoid false sharing */
-static ptc_params_t	*ptcParamsNextByCpu[NR_CPUS*16] __attribute__((__aligned__(128)));
-static volatile ptc_params_t	*ptcParamsEmpty __cacheline_aligned;
-
-/*REFERENCED*/
-static spinlock_t ptcParamsLock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
-
-static int ptcInit = 0;
-#ifdef PTCDEBUG
-static int ptcParamsAllBusy = 0;		/* debugging/statistics */
-static int ptcCountBacklog = 0;
-static int ptcBacklog[NUMPTC+1];
-static char ptcParamsCounts[NR_CPUS][NUMPTC] __attribute__((__aligned__(128)));
-static char ptcParamsResults[NR_CPUS][NUMPTC] __attribute__((__aligned__(128)));
-#endif
-
-/*
- * Make smp_send_flush_tlbsmp_send_flush_tlb() a weak reference,
- * so that we get a clean compile with the ia64 patch without the
- * actual SN1 specific code in arch/ia64/kernel/smp.c.
- */
-extern void smp_send_flush_tlb (void) __attribute((weak));
-
-
-/**
- * sn1_ptc_l_range - purge local translation cache
- * @start: start of virtual address range
- * @end: end of virtual address range
- * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
- *
- * Purges the range specified from the local processor's translation cache
- * (as opposed to the translation registers).  Note that more than the specified
- * range *may* be cleared from the cache by some processors.
- *
- * This is probably not good enough, but I don't want to try to make it better 
- * until I get some statistics on a running system. At a minimum, we should only 
- * send IPIs to 1 processor in each TLB domain & have it issue a ptc.g on it's 
- * own FSB. Also, we only have to serialize per FSB, not globally.
- *
- * More likely, we will have to do some work to reduce the frequency of calls to
- * this routine.
- */
-static inline void
-sn1_ptc_l_range(unsigned long start, unsigned long end, unsigned long nbits)
-{
-	do {
-		__asm__ __volatile__ ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory");
-		start += (1UL << nbits);
-	} while (start < end);
-	ia64_srlz_d();
-}
-
-/**
- * sn1_received_flush_tlb - cpu tlb flush routine
- *
- * Flushes the TLB of a given processor.
- */
-void
-sn1_received_flush_tlb(void)
-{
-	unsigned long	start, end, nbits;
-	unsigned int	rid, saved_rid;
-	int		cpu = smp_processor_id();
-	int		result;
-	ptc_params_t	*ptcParams;
-
-	ptcParams = ptcParamsNextByCpu[cpu*16];
-	if (ptcParams == ptcParamsEmpty)
-		return;
-
-	do {
-		start = ptcParams->start;
-		saved_rid = (unsigned int) ia64_get_rr(start);
-		end = ptcParams->end;
-		nbits = ptcParams->nbits;
-		rid = ptcParams->rid;
-
-		if (saved_rid != rid) {
-			ia64_set_rr(start, (unsigned long)rid);
-			ia64_srlz_d();
-		}
-
-		sn1_ptc_l_range(start, end, nbits);
-
-		if (saved_rid != rid) 
-			ia64_set_rr(start, (unsigned long)saved_rid);
-
-		ia64_srlz_i();
-
-		result = atomic_dec(&ptcParams->unfinished_count);
-#ifdef PTCDEBUG
-		{
-		    int i = ptcParams-&ptcParamArray[0];
-		    ptcParamsResults[cpu][i] = (char) result;
-		    ptcParamsCounts[cpu][i]++;
-		}
-#endif /* PTCDEBUG */
-
-		if (++ptcParams == &ptcParamArray[NUMPTC])
-			ptcParams = &ptcParamArray[0];
-
-	} while (ptcParams != ptcParamsEmpty);
-
-	ptcParamsNextByCpu[cpu*16] = ptcParams;
-}
-
-/**
- * sn1_global_tlb_purge - flush a translation cache range on all processors
- * @start: start of virtual address range to flush
- * @end: end of virtual address range
- * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
- *
- * Flushes the translation cache of all processors from @start to @end.
- */
-void
-sn1_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
-{
-	ptc_params_t	*params;
-	ptc_params_t	*next;
-	unsigned long	irqflags;
-#ifdef PTCDEBUG
-	ptc_params_t	*nextnext;
-	int		backlog = 0;
-#endif
-
-	if (smp_num_cpus == 1) {
-		sn1_ptc_l_range(start, end, nbits);
-		return;
-	}
-
-	if (in_interrupt()) {
-		/*
-		 *  If at interrupt level and cannot get spinlock, 
-		 *  then do something useful by flushing own tlbflush queue
-		 *  so as to avoid a possible deadlock.
-		 */
-		while (!spin_trylock(&ptcParamsLock)) {
-			local_irq_save(irqflags);
-			sn1_received_flush_tlb();
-			local_irq_restore(irqflags);
-			udelay(10);	/* take it easier on the bus */	
-		}
-	} else {
-		spin_lock(&ptcParamsLock);
-	}
-
-	if (!ptcInit) {
-		int cpu;
-		ptcInit = 1;
-		memset(ptcParamArray, 0, sizeof(ptcParamArray));
-		ptcParamsEmpty = &ptcParamArray[0];
-		for (cpu=0; cpu<NR_CPUS; cpu++)
-			ptcParamsNextByCpu[cpu*16] = &ptcParamArray[0];
-
-#ifdef PTCDEBUG
-		memset(ptcBacklog, 0, sizeof(ptcBacklog));
-		memset(ptcParamsCounts, 0, sizeof(ptcParamsCounts));
-		memset(ptcParamsResults, 0, sizeof(ptcParamsResults));
-#endif	/* PTCDEBUG */
-	}
-
-	params = (ptc_params_t *) ptcParamsEmpty;
-	next = (ptc_params_t *) ptcParamsEmpty + 1;
-	if (next == &ptcParamArray[NUMPTC])
-		next = &ptcParamArray[0];
-
-#ifdef PTCDEBUG
-	nextnext = next + 1;
-	if (nextnext == &ptcParamArray[NUMPTC])
-		nextnext = &ptcParamArray[0];
-
-	if (ptcCountBacklog) {
-		/* quick count of backlog */
-		ptc_params_t *ptr;
-
-		/* check the current pointer to the beginning */
-		ptr = params;
-		while(--ptr >= &ptcParamArray[0]) {
-			if (atomic_read(&ptr->unfinished_count) == 0)
-				break;
-			++backlog;
-		}
-
-		if (backlog) {
-			/* check the end of the array */
-			ptr = &ptcParamArray[NUMPTC];
-			while (--ptr > params) {
-				if (atomic_read(&ptr->unfinished_count) == 0)
-					break;
-				++backlog;
-			}
-		}
-		ptcBacklog[backlog]++;
-	}
-#endif	/* PTCDEBUG */
-
-	/* wait for the next entry to clear...should be rare */
-	if (atomic_read(&next->unfinished_count) > 0) {
-#ifdef PTCDEBUG
-		ptcParamsAllBusy++;
-
-		if (atomic_read(&nextnext->unfinished_count) == 0) {
-		    if (atomic_read(&next->unfinished_count) > 0) {
-			panic("\nnonzero next zero nextnext %lx %lx\n",
-			    (long)next, (long)nextnext);
-		    }
-		}
-#endif
-
-		/* it could be this cpu that is behind */
-		local_irq_save(irqflags);
-		sn1_received_flush_tlb();
-		local_irq_restore(irqflags);
-
-		/* now we know it's not this cpu, so just wait */
-		while (atomic_read(&next->unfinished_count) > 0) {
-			barrier();
-		}
-	}
-
-	params->start = start;
-	params->end = end;
-	params->nbits = nbits;
-	params->rid = (unsigned int) ia64_get_rr(start);
-	atomic_set(&params->unfinished_count, smp_num_cpus);
-
-	/* The atomic_set above can hit memory *after* the update
-	 * to ptcParamsEmpty below, which opens a timing window
-	 * that other cpus can squeeze into!
-	 */
-	mb();
-
-	/* everything is ready to process:
-	 *	-- global lock is held
-	 *	-- new entry + 1 is free
-	 *	-- new entry is set up
-	 * so now:
-	 *	-- update the global next pointer
-	 *	-- unlock the global lock
-	 *	-- send IPI to notify other cpus
-	 *	-- process the data ourselves
-	 */
-	ptcParamsEmpty = next;
-	spin_unlock(&ptcParamsLock);
-	smp_send_flush_tlb();
-
-	local_irq_save(irqflags);
-	sn1_received_flush_tlb();
-	local_irq_restore(irqflags);
-
-	/* 
-	 * Since IPIs are polled event (for now), we need to wait til the
-	 * TLB flush has started.
-	 * wait for the flush to complete 
-	 */ 
-	while (atomic_read(&params->unfinished_count) > 0)
-		barrier();
-}
-
-#endif /* PTCG_WAR */
 
 
 /**
@@ -372,18 +94,10 @@ sn1_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbit
 void
 sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
 {
-	int			cnode, mycnode, nasid;
+	int			cnode, mycnode, nasid, flushed=0;
 	volatile unsigned	long	*ptc0, *ptc1;
 	unsigned long		flags=0, data0, data1;
 
-	/*
-	 * Special case 1 cpu & 1 node. Use local purges.
-	 */
-#ifdef PTCG_WAR
-	sn1_global_tlb_purge(start, end, nbits);
-	return;
-#endif /* PTCG_WAR */
-		
 	data0 = (1UL<<SH_PTC_0_A_SHFT) |
 		(nbits<<SH_PTC_0_PS_SHFT) |
 		((ia64_get_rr(start)>>8)<<SH_PTC_0_RID_SHFT) |
@@ -392,18 +106,9 @@ sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbit
 	ptc0 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_0);
 	ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1);
 
-	mycnode = local_nodeid;
-
-	/* 
-	 * For now, we don't want to spin uninterruptibly waiting
-	 * for the lock. Makes hangs hard to debug.
-	 */
-	local_irq_save(flags);
-	while (!spin_trylock(&sn2_global_ptc_lock)) {
-		local_irq_restore(flags);
-		udelay(1);
-		local_irq_save(flags);
-	}
+	mycnode = numa_node_id();
+
+	spin_lock_irqsave(&sn2_global_ptc_lock, flags);
 
 	do {
 		data1 = start | (1UL<<SH_PTC_1_START_SHFT);
@@ -417,11 +122,13 @@ sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbit
 				ptc0 = CHANGE_NASID(nasid, ptc0);
 				ptc1 = CHANGE_NASID(nasid, ptc1);
 				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
+				flushed = 1;
 			}
 		}
 
-		if (wait_piowc() & SH_PIO_WRITE_STATUS_0_WRITE_DEADLOCK_MASK)
+		if (flushed && (wait_piowc() & SH_PIO_WRITE_STATUS_0_WRITE_DEADLOCK_MASK)) {
 			sn2_ptc_deadlock_recovery(data0, data1);
+		}
 
 		start += (1UL << nbits);
 
@@ -451,7 +158,7 @@ sn2_ptc_deadlock_recovery(unsigned long data0, unsigned long data1)
 	ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1);
 	piows = (long*)pda->pio_write_status_addr;
 
-	mycnode = local_nodeid;
+	mycnode = numa_node_id();
 
 	for (cnode = 0; cnode < numnodes; cnode++) {
 		if (is_headless_node(cnode) || cnode == mycnode)
@@ -482,16 +189,10 @@ sn2_ptc_deadlock_recovery(unsigned long data0, unsigned long data1)
 void
 sn_send_IPI_phys(long physid, int vector, int delivery_mode)
 {
-	long		nasid, slice;
-	long		val;
+	long		nasid, slice, val;
+	unsigned long	flags=0;
 	volatile long	*p;
 
-#ifdef BUS_INT_WAR
-	if (vector != ap_wakeup_vector && delivery_mode == IA64_IPI_DM_INT) {
-		return;
-	}
-#endif
-
 	nasid = cpu_physical_id_to_nasid(physid);
         slice = cpu_physical_id_to_slice(physid);
 
@@ -503,12 +204,15 @@ sn_send_IPI_phys(long physid, int vector, int delivery_mode)
 		(0x000feeUL<<SH_IPI_INT_BASE_SHFT);
 
 	mb();
+	if (enable_shub_wars_1_1() ) {
+		spin_lock_irqsave(&sn2_global_ptc_lock, flags);
+	}
 	pio_phys_write_mmr(p, val);
+	if (enable_shub_wars_1_1() ) {
+		wait_piowc();
+		spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+	}
 
-#ifndef CONFIG_SHUB_1_0_SPECIFIC
-	/* doesn't work on shub 1.0 */
-	wait_piowc();
-#endif
 }
 
 /**
@@ -536,4 +240,3 @@ sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
 
 	sn_send_IPI_phys(physid, vector, delivery_mode);
 }
-
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 60eefbac4a21e..e7a8987b672d2 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -31,6 +31,7 @@
  * http://oss.sgi.com/projects/GenInfo/NoticeExplan
  */
 #include <linux/config.h>
+#include <asm/uaccess.h>
 
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
@@ -43,7 +44,7 @@ static int partition_id_read_proc(char *page, char **start, off_t off,
 	return sprintf(page, "%d\n", sn_local_partid());
 }
 
-struct proc_dir_entry * sgi_proc_dir = NULL;
+static struct proc_dir_entry * sgi_proc_dir;
 
 void
 register_sn_partition_id(void) {
@@ -135,11 +136,60 @@ register_sn_force_interrupt(void) {
 		entry->write_proc = sn_force_interrupt_write_proc;
 	}
 }
+
+extern int sn_linkstats_get(char *);
+extern int sn_linkstats_reset(unsigned long);
+
+static int
+sn_linkstats_read_proc(char *page, char **start, off_t off,
+		int count, int *eof, void *data) {
+       
+	return sn_linkstats_get(page);
+}
+
+static int 
+sn_linkstats_write_proc(struct file *file, const char *buffer,
+                                        unsigned long count, void *data)
+{
+	char		s[64];
+	unsigned long	msecs;
+	int		e = count;
+
+	if (copy_from_user(s, buffer, count < sizeof(s) ? count : sizeof(s)))
+		e = -EFAULT;
+	else {
+		if (sscanf(s, "%lu", &msecs) != 1 || msecs < 5)
+			/* at least 5 milliseconds between updates */
+			e = -EINVAL;
+		else
+			sn_linkstats_reset(msecs);
+	}
+
+	return e;
+}
+
+void
+register_sn_linkstats(void) {
+	struct proc_dir_entry *entry;
+
+	if (!sgi_proc_dir) {
+		sgi_proc_dir = proc_mkdir("sgi_sn", 0);
+	}
+	entry = create_proc_entry("linkstats", 0444, sgi_proc_dir);
+	if (entry) {
+		entry->nlink = 1;
+		entry->data = 0;
+		entry->read_proc = sn_linkstats_read_proc;
+		entry->write_proc = sn_linkstats_write_proc;
+	}
+}
+
 void
 register_sn_procfs(void) {
 	register_sn_partition_id();
 	register_sn_serial_numbers();
 	register_sn_force_interrupt();
+	register_sn_linkstats();
 }
 
 #endif /* CONFIG_PROC_FS */