Merge nuts.davemloft.net:/disk1/BK/sparcwork-2.4

into nuts.davemloft.net:/disk1/BK/sparc-2.6
author: David S. Miller <davem@nuts.davemloft.net> 2004-08-07 03:54:42 -0700
committer: David S. Miller <davem@nuts.davemloft.net> 2004-08-07 03:54:42 -0700
commit: fefd14da08f4db1374a8e3c9563ca82948d61851 (patch)
tree: 43509cee039a55476c9f56ad945490788e656fd1 /arch
parent: 915a29ec1c5e34283a6231af1036114e4d612cb0 (diff)
parent: 2b03a2af725991bc3548ef10f5a723f6a0b780fc (diff)
download: history-fefd14da08f4db1374a8e3c9563ca82948d61851.tar.gz
8 files changed, 266 insertions, 356 deletions
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index b58afaeca19354..b7c3277bb92ac9 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -152,7 +152,8 @@ __handle_signal:
 		.globl			rtrap_irq, rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall
 rtrap_irq:
 rtrap_clr_l6:	clr			%l6
-rtrap:		ldub			[%g6 + TI_CPU], %l0
+rtrap:
+		ldub			[%g6 + TI_CPU], %l0
 		sethi			%hi(irq_stat), %l2	! &softirq_active
 		or			%l2, %lo(irq_stat), %l2	! &softirq_active
 irqsz_patchme:	sllx			%l0, 0, %l0
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 1c8b5436203563..abc65dc6d61e7d 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -35,6 +35,7 @@
 #include <asm/uaccess.h>
 #include <asm/timer.h>
 #include <asm/starfire.h>
+#include <asm/tlb.h>
 
 extern int linux_num_cpus;
 extern void calibrate_delay(void);
@@ -635,9 +636,8 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
 	}
 }
 
-extern unsigned long xcall_flush_tlb_page;
 extern unsigned long xcall_flush_tlb_mm;
-extern unsigned long xcall_flush_tlb_range;
+extern unsigned long xcall_flush_tlb_pending;
 extern unsigned long xcall_flush_tlb_kernel_range;
 extern unsigned long xcall_flush_tlb_all_spitfire;
 extern unsigned long xcall_flush_tlb_all_cheetah;
@@ -835,7 +835,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
 		int cpu = get_cpu();
 
 		if (atomic_read(&mm->mm_users) == 1) {
-			/* See smp_flush_tlb_page for info about this. */
 			mm->cpu_vm_mask = cpumask_of_cpu(cpu);
 			goto local_flush_and_out;
 		}
@@ -851,27 +850,40 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
 	}
 }
 
-void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start,
-			 unsigned long end)
+void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
 {
 	u32 ctx = CTX_HWBITS(mm->context);
 	int cpu = get_cpu();
 
-	start &= PAGE_MASK;
-	end    = PAGE_ALIGN(end);
-
 	if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) {
 		mm->cpu_vm_mask = cpumask_of_cpu(cpu);
 		goto local_flush_and_out;
+	} else {
+		/* This optimization is not valid.  Normally
+		 * we will be holding the page_table_lock, but
+		 * there is an exception which is copy_page_range()
+		 * when forking.  The lock is held during the individual
+		 * page table updates in the parent, but not at the
+		 * top level, which is where we are invoked.
+		 */
+		if (0) {
+			cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
+
+			/* By virtue of running under the mm->page_table_lock,
+			 * and mmu_context.h:switch_mm doing the same, the
+			 * following operation is safe.
+			 */
+			if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
+				goto local_flush_and_out;
+		}
 	}
 
-	smp_cross_call_masked(&xcall_flush_tlb_range,
-			      ctx, start, end,
+	smp_cross_call_masked(&xcall_flush_tlb_pending,
+			      ctx, nr, (unsigned long) vaddrs,
 			      mm->cpu_vm_mask);
 
- local_flush_and_out:
-	__flush_tlb_range(ctx, start, SECONDARY_CONTEXT,
-			  end, PAGE_SIZE, (end-start));
+local_flush_and_out:
+	__flush_tlb_pending(ctx, nr, vaddrs);
 
 	put_cpu();
 }
@@ -888,55 +900,6 @@ void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	}
 }
 
-void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page)
-{
-	{
-		u32 ctx = CTX_HWBITS(mm->context);
-		int cpu = get_cpu();
-
-		page &= PAGE_MASK;
-		if (mm == current->active_mm &&
-		    atomic_read(&mm->mm_users) == 1) {
-			/* By virtue of being the current address space, and
-			 * having the only reference to it, the following
-			 * operation is safe.
-			 *
-			 * It would not be a win to perform the xcall tlb
-			 * flush in this case, because even if we switch back
-			 * to one of the other processors in cpu_vm_mask it
-			 * is almost certain that all TLB entries for this
-			 * context will be replaced by the time that happens.
-			 */
-			mm->cpu_vm_mask = cpumask_of_cpu(cpu);
-			goto local_flush_and_out;
-		} else {
-			cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
-
-			/* By virtue of running under the mm->page_table_lock,
-			 * and mmu_context.h:switch_mm doing the same, the
-			 * following operation is safe.
-			 */
-			if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
-				goto local_flush_and_out;
-		}
-
-		/* OK, we have to actually perform the cross call.  Most
-		 * likely this is a cloned mm or kswapd is kicking out pages
-		 * for a task which has run recently on another cpu.
-		 */
-		smp_cross_call_masked(&xcall_flush_tlb_page,
-				      ctx, page, 0,
-				      mm->cpu_vm_mask);
-		if (!cpu_isset(cpu, mm->cpu_vm_mask))
-			return;
-
-	local_flush_and_out:
-		__flush_tlb_page(ctx, page, SECONDARY_CONTEXT);
-
-		put_cpu();
-	}
-}
-
 /* CPU capture. */
 /* #define CAPTURE_DEBUG */
 extern unsigned long xcall_capture;
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index d69d4b44189d64..5eafadd28a58fb 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -259,7 +259,7 @@ EXPORT_SYMBOL(verify_compat_iovec);
 
 EXPORT_SYMBOL(dump_thread);
 EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(pte_alloc_one_kernel);
+EXPORT_SYMBOL(__pte_alloc_one_kernel);
 #ifndef CONFIG_SMP
 EXPORT_SYMBOL(pgt_quicklists);
 #endif
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index 06e1579f061436..11625cce4891f6 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -423,7 +423,7 @@ static unsigned long timer_ticks_per_nsec_quotient;
 
 #define TICK_SIZE (tick_nsec / 1000)
 
-static __inline__ void timer_check_rtc(void)
+static inline void timer_check_rtc(void)
 {
 	/* last time the cmos clock got updated */
 	static long last_rtc_update;
@@ -1139,7 +1139,6 @@ EXPORT_SYMBOL(do_settimeofday);
  */
 void do_gettimeofday(struct timeval *tv)
 {
-	unsigned long flags;
 	unsigned long seq;
 	unsigned long usec, sec;
 	unsigned long max_ntp_tick = tick_usec - tickadj;
@@ -1147,7 +1146,7 @@ void do_gettimeofday(struct timeval *tv)
 	do {
 		unsigned long lost;
 
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
+		seq = read_seqbegin(&xtime_lock);
 		usec = do_gettimeoffset();
 		lost = jiffies - wall_jiffies;
 
@@ -1166,8 +1165,18 @@ void do_gettimeofday(struct timeval *tv)
 			usec += lost * tick_usec;
 
 		sec = xtime.tv_sec;
-		usec += (xtime.tv_nsec / 1000);
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
+
+		/* Believe it or not, this divide shows up on
+		 * kernel profiles.  The problem is that it is
+		 * both 64-bit and signed.  Happily, 32-bits
+		 * of precision is all we really need and in
+		 * doing so gcc ends up emitting a cheap multiply.
+		 *
+		 * XXX Why is tv_nsec 'long' and 'signed' in
+		 * XXX the first place, can it even be negative?
+		 */
+		usec += ((unsigned int) xtime.tv_nsec / 1000U);
+	} while (read_seqretry(&xtime_lock, seq));
 
 	while (usec >= 1000000) {
 		usec -= 1000000;
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index fb85e5a2afd7a8..cda87333a77b81 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
 EXTRA_AFLAGS := -ansi
 EXTRA_CFLAGS := -Werror
 
-obj-y    := ultra.o fault.o init.o generic.o extable.o
+obj-y    := ultra.o tlb.o fault.o init.o generic.o extable.o
 
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 994174afe72675..60308cead47045 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -37,8 +37,6 @@
 #include <asm/spitfire.h>
 #include <asm/sections.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 extern void device_scan(void);
 
 struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
@@ -252,87 +250,6 @@ out:
 	put_cpu();
 }
 
-/* When shared+writable mmaps of files go away, we lose all dirty
- * page state, so we have to deal with D-cache aliasing here.
- *
- * This code relies on the fact that flush_cache_range() is always
- * called for an area composed by a single VMA.  It also assumes that
- * the MM's page_table_lock is held.
- */
-static inline void flush_cache_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long address, unsigned long size)
-{
-	unsigned long offset;
-	pte_t *ptep;
-
-	if (pmd_none(*pmd))
-		return;
-	ptep = pte_offset_map(pmd, address);
-	offset = address & ~PMD_MASK;
-	if (offset + size > PMD_SIZE)
-		size = PMD_SIZE - offset;
-	size &= PAGE_MASK;
-	for (offset = 0; offset < size; ptep++, offset += PAGE_SIZE) {
-		pte_t pte = *ptep;
-
-		if (pte_none(pte))
-			continue;
-
-		if (pte_present(pte) && pte_dirty(pte)) {
-			struct page *page;
-			unsigned long pgaddr, uaddr;
-			unsigned long pfn = pte_pfn(pte);
-
-			if (!pfn_valid(pfn))
-				continue;
-			page = pfn_to_page(pfn);
-			if (PageReserved(page) || !page_mapping(page))
-				continue;
-			pgaddr = (unsigned long) page_address(page);
-			uaddr = address + offset;
-			if ((pgaddr ^ uaddr) & (1 << 13))
-				flush_dcache_page_all(mm, page);
-		}
-	}
-	pte_unmap(ptep - 1);
-}
-
-static inline void flush_cache_pmd_range(struct mm_struct *mm, pgd_t *dir, unsigned long address, unsigned long size)
-{
-	pmd_t *pmd;
-	unsigned long end;
-
-	if (pgd_none(*dir))
-		return;
-	pmd = pmd_offset(dir, address);
-	end = address + size;
-	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
-		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
-	do {
-		flush_cache_pte_range(mm, pmd, address, end - address);
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	pgd_t *dir = pgd_offset(mm, start);
-
-	if (mm == current->mm)
-		flushw_user();
-
-	if (vma->vm_file == NULL ||
-	    ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)))
-		return;
-
-	do {
-		flush_cache_pmd_range(mm, dir, start, end - start);
-		start = (start + PGDIR_SIZE) & PGDIR_MASK;
-		dir++;
-	} while (start && (start < end));
-}
-
 void flush_icache_range(unsigned long start, unsigned long end)
 {
 	/* Cheetah has coherent I-cache. */
@@ -1173,7 +1090,7 @@ struct pgtable_cache_struct pgt_quicklists;
 #else
 #define DC_ALIAS_SHIFT	0
 #endif
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	struct page *page;
 	unsigned long color;
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
new file mode 100644
index 00000000000000..11a5078182c460
--- /dev/null
+++ b/arch/sparc64/mm/tlb.c
@@ -0,0 +1,158 @@
+/* arch/sparc64/mm/tlb.c
+ *
+ * Copyright (C) 2004 David S. Miller <davem@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+
+/* Heavily inspired by the ppc64 code.  */
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) =
+	{ NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, };
+
+void flush_tlb_pending(void)
+{
+	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+
+	if (mp->tlb_nr) {
+		unsigned long context = mp->mm->context;
+
+		if (CTX_VALID(context)) {
+#ifdef CONFIG_SMP
+			smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
+					      &mp->vaddrs[0]);
+#else
+			__flush_tlb_pending(CTX_HWBITS(context), mp->tlb_nr,
+					    &mp->vaddrs[0]);
+#endif
+		}
+		mp->tlb_nr = 0;
+	}
+}
+
+void tlb_batch_add(pte_t *ptep, pte_t orig)
+{
+	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+	struct page *ptepage;
+	struct mm_struct *mm;
+	unsigned long vaddr, nr;
+
+	ptepage = virt_to_page(ptep);
+	mm = (struct mm_struct *) ptepage->mapping;
+
+	/* It is more efficient to let flush_tlb_kernel_range()
+	 * handle these cases.
+	 */
+	if (mm == &init_mm)
+		return;
+
+	vaddr = ptepage->index +
+		(((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE);
+	if (pte_exec(orig))
+		vaddr |= 0x1UL;
+
+	if (pte_dirty(orig)) {
+		unsigned long paddr, pfn = pte_pfn(orig);
+		struct address_space *mapping;
+		struct page *page;
+
+		if (!pfn_valid(pfn))
+			goto no_cache_flush;
+
+		page = pfn_to_page(pfn);
+		if (PageReserved(page))
+			goto no_cache_flush;
+
+		/* A real file page? */
+		mapping = page_mapping(page);
+		if (!mapping || mapping == &swapper_space)
+			goto no_cache_flush;
+
+		paddr = (unsigned long) page_address(page);
+		if ((paddr ^ vaddr) & (1 << 13))
+			flush_dcache_page_all(mm, page);
+	}
+
+no_cache_flush:
+	if (mp->tlb_frozen)
+		return;
+
+	nr = mp->tlb_nr;
+
+	if (unlikely(nr != 0 && mm != mp->mm)) {
+		flush_tlb_pending();
+		nr = 0;
+	}
+
+	if (nr == 0)
+		mp->mm = mm;
+
+	mp->vaddrs[nr] = vaddr;
+	mp->tlb_nr = ++nr;
+	if (nr >= TLB_BATCH_NR)
+		flush_tlb_pending();
+}
+
+void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+	unsigned long nr = mp->tlb_nr;
+	long s = start, e = end, vpte_base;
+
+	if (mp->tlb_frozen)
+		return;
+
+	/* Nobody should call us with start below VM hole and end above.
+	 * See if it is really true.
+	 */
+	BUG_ON(s > e);
+
+#if 0
+	/* Currently free_pgtables guarantees this.  */
+	s &= PMD_MASK;
+	e = (e + PMD_SIZE - 1) & PMD_MASK;
+#endif
+	vpte_base = (tlb_type == spitfire ?
+		     VPTE_BASE_SPITFIRE :
+		     VPTE_BASE_CHEETAH);
+
+	if (unlikely(nr != 0 && mm != mp->mm)) {
+		flush_tlb_pending();
+		nr = 0;
+	}
+
+	if (nr == 0)
+		mp->mm = mm;
+
+	start = vpte_base + (s >> (PAGE_SHIFT - 3));
+	end = vpte_base + (e >> (PAGE_SHIFT - 3));
+	while (start < end) {
+		mp->vaddrs[nr] = start;
+		mp->tlb_nr = ++nr;
+		if (nr >= TLB_BATCH_NR) {
+			flush_tlb_pending();
+			nr = 0;
+		}
+		start += PAGE_SIZE;
+	}
+	if (nr)
+		flush_tlb_pending();
+}
+
+unsigned long __ptrs_per_pmd(void)
+{
+	if (test_thread_flag(TIF_32BIT))
+		return (1UL << (32 - (PAGE_SHIFT-3) - PAGE_SHIFT));
+	return REAL_PTRS_PER_PMD;
+}
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 16410ba9555cf5..fae62faf0c81fb 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -26,25 +26,7 @@
 	 */
 	.text
 	.align		32
-	.globl		__flush_tlb_page, __flush_tlb_mm, __flush_tlb_range
-__flush_tlb_page: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=page&PAGE_MASK, %o2=SECONDARY_CONTEXT */
-	ldxa		[%o2] ASI_DMMU, %g2
-	cmp		%g2, %o0
-	bne,pn		%icc, __spitfire_flush_tlb_page_slow
-	 or		%o1, 0x10, %g3
-	stxa		%g0, [%g3] ASI_DMMU_DEMAP
-	stxa		%g0, [%g3] ASI_IMMU_DEMAP
-	retl
-	 flush		%g6
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
+	.globl		__flush_tlb_mm
 __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
 	ldxa		[%o1] ASI_DMMU, %g2
 	cmp		%g2, %o0
@@ -63,84 +45,31 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
 	nop
 	nop
 
-__flush_tlb_range: /* %o0=(ctx&TAG_CONTEXT_BITS), %o1=start&PAGE_MASK, %o2=SECONDARY_CONTEXT,
-		    * %o3=end&PAGE_MASK, %o4=PAGE_SIZE, %o5=(end - start)
-		    */
-#define TLB_MAGIC	207 /* Students, do you know how I calculated this?  -DaveM */
-	cmp		%o5, %o4
-	bleu,pt		%xcc, __flush_tlb_page
-	 srlx		%o5, PAGE_SHIFT, %g5
-	cmp		%g5, TLB_MAGIC
-	bgeu,pn		%icc, __spitfire_flush_tlb_range_constant_time
-	 or		%o1, 0x10, %g5
-	ldxa		[%o2] ASI_DMMU, %g2
-	cmp		%g2, %o0
-__spitfire_flush_tlb_range_page_by_page:
-	bne,pn		%icc, __spitfire_flush_tlb_range_pbp_slow
-	 sub		%o5, %o4, %o5
-1:	stxa		%g0, [%g5 + %o5] ASI_DMMU_DEMAP
-	stxa		%g0, [%g5 + %o5] ASI_IMMU_DEMAP
-	brnz,pt		%o5, 1b
-	 sub		%o5, %o4, %o5
-	retl
-	 flush		%g6
-__spitfire_flush_tlb_range_constant_time: /* %o0=ctx, %o1=start, %o3=end */
-	rdpr		%pstate, %g1
-	wrpr		%g1, PSTATE_IE, %pstate
-	mov		TLB_TAG_ACCESS, %g3
-	mov		((SPITFIRE_HIGHEST_LOCKED_TLBENT-1) << 3), %g2
-
-	/* Spitfire Errata #32 workaround. */
-	mov		0x8, %o4
-	stxa		%g0, [%o4] ASI_DMMU
-	flush		%g6
-
-1:	ldxa		[%g2] ASI_ITLB_TAG_READ, %o4
-	and		%o4, TAG_CONTEXT_BITS, %o5
-	cmp		%o5, %o0
-	bne,pt		%icc, 2f
-	 andn		%o4, TAG_CONTEXT_BITS, %o4
-	cmp		%o4, %o1
-	blu,pt		%xcc, 2f
-	 cmp		%o4, %o3
-	blu,pn		%xcc, 4f
-2:	 ldxa		[%g2] ASI_DTLB_TAG_READ, %o4
-	and		%o4, TAG_CONTEXT_BITS, %o5
-	cmp		%o5, %o0
-	andn		%o4, TAG_CONTEXT_BITS, %o4
-	bne,pt		%icc, 3f
-	 cmp		%o4, %o1
-	blu,pt		%xcc, 3f
-	 cmp		%o4, %o3
-	blu,pn		%xcc, 5f
-	 nop
-3:	brnz,pt		%g2, 1b
-	 sub		%g2, (1 << 3), %g2
-	retl
-	 wrpr		%g1, 0x0, %pstate
-4:	stxa		%g0, [%g3] ASI_IMMU
-	stxa		%g0, [%g2] ASI_ITLB_DATA_ACCESS
-	flush		%g6
-
-	/* Spitfire Errata #32 workaround. */
-	mov		0x8, %o4
-	stxa		%g0, [%o4] ASI_DMMU
-	flush		%g6
-
-	ba,pt		%xcc, 2b
+	.align		32
+	.globl		__flush_tlb_pending
+__flush_tlb_pending:
+	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+	rdpr		%pstate, %g5
+	sllx		%o1, 3, %o1
+	andn		%g5, PSTATE_IE, %g2
+	wrpr		%g2, %pstate
+	mov		SECONDARY_CONTEXT, %o4
+	ldxa		[%o4] ASI_DMMU, %g2
+	stxa		%o0, [%o4] ASI_DMMU
+1:	sub		%o1, (1 << 3), %o1
+	ldx		[%o2 + %o1], %o3
+	andcc		%o3, 1, %g0
+	be,pn		%icc, 2f
+	 andn		%o3, 1, %o3
+	stxa		%g0, [%o3] ASI_IMMU_DEMAP
+2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP
+	brnz,pt		%o1, 1b
 	 nop
-
-5:	stxa		%g0, [%g3] ASI_DMMU
-	stxa		%g0, [%g2] ASI_DTLB_DATA_ACCESS
 	flush		%g6
-
-	/* Spitfire Errata #32 workaround. */
-	mov		0x8, %o4
-	stxa		%g0, [%o4] ASI_DMMU
+	stxa		%g2, [%o2] ASI_DMMU
 	flush		%g6
-
-	ba,pt		%xcc, 3b
-	 nop
+	retl
+	 wrpr		%g5, 0x0, %pstate
 
 	.align		32
 	.globl		__flush_tlb_kernel_range
@@ -171,33 +100,6 @@ __spitfire_flush_tlb_mm_slow:
 	retl
 	 wrpr		%g1, 0, %pstate
 
-__spitfire_flush_tlb_page_slow:
-	rdpr		%pstate, %g1
-	wrpr		%g1, PSTATE_IE, %pstate
-	stxa		%o0, [%o2] ASI_DMMU
-	stxa		%g0, [%g3] ASI_DMMU_DEMAP
-	stxa		%g0, [%g3] ASI_IMMU_DEMAP
-	flush		%g6
-	stxa		%g2, [%o2] ASI_DMMU
-	flush		%g6
-	retl
-	 wrpr		%g1, 0, %pstate
-
-__spitfire_flush_tlb_range_pbp_slow:
-	rdpr		%pstate, %g1
-	wrpr		%g1, PSTATE_IE, %pstate
-	stxa		%o0, [%o2] ASI_DMMU
-
-2:	stxa		%g0, [%g5 + %o5] ASI_DMMU_DEMAP
-	stxa		%g0, [%g5 + %o5] ASI_IMMU_DEMAP
-	brnz,pt		%o5, 2b
-	 sub		%o5, %o4, %o5
-	flush		%g6
-	stxa		%g2, [%o2] ASI_DMMU
-	flush		%g6
-	retl
-	 wrpr		%g1, 0x0, %pstate
-
 /*
  * The following code flushes one page_size worth.
  */
@@ -356,22 +258,6 @@ __update_mmu_cache:	/* %o0=hw_context, %o1=address, %o2=pte, %o3=fault_code */
 	ba,a,pt		%xcc, __prefill_itlb
 
 	/* Cheetah specific versions, patched at boot time.  */
-__cheetah_flush_tlb_page: /* 14 insns */
-	rdpr		%pstate, %g5
-	andn		%g5, PSTATE_IE, %g2
-	wrpr		%g2, 0x0, %pstate
-	wrpr		%g0, 1, %tl
-	mov		PRIMARY_CONTEXT, %o2
-	ldxa		[%o2] ASI_DMMU, %g2
-	stxa		%o0, [%o2] ASI_DMMU
-	stxa		%g0, [%o1] ASI_DMMU_DEMAP
-	stxa		%g0, [%o1] ASI_IMMU_DEMAP
-	stxa		%g2, [%o2] ASI_DMMU
-	flush		%g6
-	wrpr		%g0, 0, %tl
-	retl
-	 wrpr		%g5, 0x0, %pstate
-
 __cheetah_flush_tlb_mm: /* 15 insns */
 	rdpr		%pstate, %g5
 	andn		%g5, PSTATE_IE, %g2
@@ -389,26 +275,29 @@ __cheetah_flush_tlb_mm: /* 15 insns */
 	retl
 	 wrpr		%g5, 0x0, %pstate
 
-__cheetah_flush_tlb_range: /* 20 insns */
-	cmp		%o5, %o4
-	blu,pt		%xcc, 9f
-	 rdpr		%pstate, %g5
+__cheetah_flush_tlb_pending:	/* 22 insns */
+	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+	rdpr		%pstate, %g5
+	sllx		%o1, 3, %o1
 	andn		%g5, PSTATE_IE, %g2
 	wrpr		%g2, 0x0, %pstate
 	wrpr		%g0, 1, %tl
-	mov		PRIMARY_CONTEXT, %o2
-	sub		%o5, %o4, %o5
-	ldxa		[%o2] ASI_DMMU, %g2
-	stxa		%o0, [%o2] ASI_DMMU
-1:	stxa		%g0, [%o1 + %o5] ASI_DMMU_DEMAP
-	stxa		%g0, [%o1 + %o5] ASI_IMMU_DEMAP
-	membar		#Sync
-	brnz,pt		%o5, 1b
-	 sub		%o5, %o4, %o5
-	stxa		%g2, [%o2] ASI_DMMU
+	mov		PRIMARY_CONTEXT, %o4
+	ldxa		[%o4] ASI_DMMU, %g2
+	stxa		%o0, [%o4] ASI_DMMU
+1:	sub		%o1, (1 << 3), %o1
+	ldx		[%o2 + %o1], %o3
+	andcc		%o3, 1, %g0
+	be,pn		%icc, 2f
+	 andn		%o3, 1, %o3
+	stxa		%g0, [%o3] ASI_IMMU_DEMAP
+2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
+	brnz,pt		%o1, 1b
+	 membar		#Sync
+	stxa		%g2, [%o4] ASI_DMMU
 	flush		%g6
 	wrpr		%g0, 0, %tl
-9:	retl
+	retl
 	 wrpr		%g5, 0x0, %pstate
 
 flush_dcpage_cheetah: /* 11 insns */
@@ -439,13 +328,6 @@ cheetah_patch_one:
 cheetah_patch_cachetlbops:
 	save		%sp, -128, %sp
 
-	sethi		%hi(__flush_tlb_page), %o0
-	or		%o0, %lo(__flush_tlb_page), %o0
-	sethi		%hi(__cheetah_flush_tlb_page), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_page), %o1
-	call		cheetah_patch_one
-	 mov		14, %o2
-
 	sethi		%hi(__flush_tlb_mm), %o0
 	or		%o0, %lo(__flush_tlb_mm), %o0
 	sethi		%hi(__cheetah_flush_tlb_mm), %o1
@@ -453,12 +335,12 @@ cheetah_patch_cachetlbops:
 	call		cheetah_patch_one
 	 mov		15, %o2
 
-	sethi		%hi(__flush_tlb_range), %o0
-	or		%o0, %lo(__flush_tlb_range), %o0
-	sethi		%hi(__cheetah_flush_tlb_range), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_range), %o1
+	sethi		%hi(__flush_tlb_pending), %o0
+	or		%o0, %lo(__flush_tlb_pending), %o0
+	sethi		%hi(__cheetah_flush_tlb_pending), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_pending), %o1
 	call		cheetah_patch_one
-	 mov		20, %o2
+	 mov		22, %o2
 
 	sethi		%hi(__flush_dcache_page), %o0
 	or		%o0, %lo(__flush_dcache_page), %o0
@@ -487,17 +369,7 @@ cheetah_patch_cachetlbops:
 	 * TODO: Make xcall TLB range flushes use the tricks above... -DaveM
 	 */
 	.align		32
-	.globl		xcall_flush_tlb_page, xcall_flush_tlb_mm, xcall_flush_tlb_range
-xcall_flush_tlb_page:
-	mov		PRIMARY_CONTEXT, %g2
-	ldxa		[%g2] ASI_DMMU, %g3
-	stxa		%g5, [%g2] ASI_DMMU
-	stxa		%g0, [%g1] ASI_DMMU_DEMAP
-	stxa		%g0, [%g1] ASI_IMMU_DEMAP
-	stxa		%g3, [%g2] ASI_DMMU
-	retry
-	nop
-
+	.globl		xcall_flush_tlb_mm
 xcall_flush_tlb_mm:
 	mov		PRIMARY_CONTEXT, %g2
 	mov		0x40, %g4
@@ -508,34 +380,25 @@ xcall_flush_tlb_mm:
 	stxa		%g3, [%g2] ASI_DMMU
 	retry
 
-xcall_flush_tlb_range:
-	sethi		%hi(PAGE_SIZE - 1), %g2
-	or		%g2, %lo(PAGE_SIZE - 1), %g2
-	andn		%g1, %g2, %g1
-	andn		%g7, %g2, %g7
-	sub		%g7, %g1, %g3
-	add		%g2, 1, %g2
-	srlx		%g3, PAGE_SHIFT, %g4
-	cmp		%g4, 96
-
-	bgu,pn		%icc, xcall_flush_tlb_mm
-	 mov		PRIMARY_CONTEXT, %g4
-	ldxa		[%g4] ASI_DMMU, %g7
-	sub		%g3, %g2, %g3
+	.globl		xcall_flush_tlb_pending
+xcall_flush_tlb_pending:
+	/* %g5=context, %g1=nr, %g7=vaddrs[] */
+	sllx		%g1, 3, %g1
+	mov		PRIMARY_CONTEXT, %g4
+	ldxa		[%g4] ASI_DMMU, %g2
 	stxa		%g5, [%g4] ASI_DMMU
-	nop
-	nop
-	nop
-
-1:	stxa		%g0, [%g1 + %g3] ASI_DMMU_DEMAP
-	stxa		%g0, [%g1 + %g3] ASI_IMMU_DEMAP
-	membar		#Sync
-	brnz,pt		%g3, 1b
-	 sub		%g3, %g2, %g3
-	stxa		%g7, [%g4] ASI_DMMU
+1:	sub		%g1, (1 << 3), %g1
+	ldx		[%g7 + %g1], %g5
+	andcc		%g5, 0x1, %g0
+	be,pn		%icc, 2f
+
+	 andn		%g5, 0x1, %g5
+	stxa		%g0, [%g5] ASI_IMMU_DEMAP
+2:	stxa		%g0, [%g5] ASI_DMMU_DEMAP
+	brnz,pt		%g1, 1b
+	 membar		#Sync
+	stxa		%g2, [%g4] ASI_DMMU
 	retry
-	nop
-	nop
 
 	.globl		xcall_flush_tlb_kernel_range
 xcall_flush_tlb_kernel_range:
@@ -555,7 +418,6 @@ xcall_flush_tlb_kernel_range:
 	retry
 	nop
 	nop
-	nop
 
 	/* This runs in a very controlled environment, so we do
 	 * not need to worry about BH races etc.
author	David S. Miller <davem@nuts.davemloft.net>	2004-08-07 03:54:42 -0700
committer	David S. Miller <davem@nuts.davemloft.net>	2004-08-07 03:54:42 -0700
commit	fefd14da08f4db1374a8e3c9563ca82948d61851 (patch)
tree	43509cee039a55476c9f56ad945490788e656fd1 /arch
parent	915a29ec1c5e34283a6231af1036114e4d612cb0 (diff)
parent	2b03a2af725991bc3548ef10f5a723f6a0b780fc (diff)
download	history-fefd14da08f4db1374a8e3c9563ca82948d61851.tar.gz