aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@nuts.davemloft.net>2004-08-07 03:54:42 -0700
committerDavid S. Miller <davem@nuts.davemloft.net>2004-08-07 03:54:42 -0700
commitfefd14da08f4db1374a8e3c9563ca82948d61851 (patch)
tree43509cee039a55476c9f56ad945490788e656fd1 /arch
parent915a29ec1c5e34283a6231af1036114e4d612cb0 (diff)
parent2b03a2af725991bc3548ef10f5a723f6a0b780fc (diff)
downloadhistory-fefd14da08f4db1374a8e3c9563ca82948d61851.tar.gz
Merge nuts.davemloft.net:/disk1/BK/sparcwork-2.4
into nuts.davemloft.net:/disk1/BK/sparc-2.6
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/kernel/rtrap.S3
-rw-r--r--arch/sparc64/kernel/smp.c87
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c2
-rw-r--r--arch/sparc64/kernel/time.c19
-rw-r--r--arch/sparc64/mm/Makefile2
-rw-r--r--arch/sparc64/mm/init.c85
-rw-r--r--arch/sparc64/mm/tlb.c158
-rw-r--r--arch/sparc64/mm/ultra.S266
8 files changed, 266 insertions, 356 deletions
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index b58afaeca19354..b7c3277bb92ac9 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -152,7 +152,8 @@ __handle_signal:
.globl rtrap_irq, rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall
rtrap_irq:
rtrap_clr_l6: clr %l6
-rtrap: ldub [%g6 + TI_CPU], %l0
+rtrap:
+ ldub [%g6 + TI_CPU], %l0
sethi %hi(irq_stat), %l2 ! &softirq_active
or %l2, %lo(irq_stat), %l2 ! &softirq_active
irqsz_patchme: sllx %l0, 0, %l0
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 1c8b5436203563..abc65dc6d61e7d 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -35,6 +35,7 @@
#include <asm/uaccess.h>
#include <asm/timer.h>
#include <asm/starfire.h>
+#include <asm/tlb.h>
extern int linux_num_cpus;
extern void calibrate_delay(void);
@@ -635,9 +636,8 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
}
}
-extern unsigned long xcall_flush_tlb_page;
extern unsigned long xcall_flush_tlb_mm;
-extern unsigned long xcall_flush_tlb_range;
+extern unsigned long xcall_flush_tlb_pending;
extern unsigned long xcall_flush_tlb_kernel_range;
extern unsigned long xcall_flush_tlb_all_spitfire;
extern unsigned long xcall_flush_tlb_all_cheetah;
@@ -835,7 +835,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
int cpu = get_cpu();
if (atomic_read(&mm->mm_users) == 1) {
- /* See smp_flush_tlb_page for info about this. */
mm->cpu_vm_mask = cpumask_of_cpu(cpu);
goto local_flush_and_out;
}
@@ -851,27 +850,40 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
}
}
-void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start,
- unsigned long end)
+void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
{
u32 ctx = CTX_HWBITS(mm->context);
int cpu = get_cpu();
- start &= PAGE_MASK;
- end = PAGE_ALIGN(end);
-
if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) {
mm->cpu_vm_mask = cpumask_of_cpu(cpu);
goto local_flush_and_out;
+ } else {
+ /* This optimization is not valid. Normally
+ * we will be holding the page_table_lock, but
+ * there is an exception which is copy_page_range()
+ * when forking. The lock is held during the individual
+ * page table updates in the parent, but not at the
+ * top level, which is where we are invoked.
+ */
+ if (0) {
+ cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
+
+ /* By virtue of running under the mm->page_table_lock,
+ * and mmu_context.h:switch_mm doing the same, the
+ * following operation is safe.
+ */
+ if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
+ goto local_flush_and_out;
+ }
}
- smp_cross_call_masked(&xcall_flush_tlb_range,
- ctx, start, end,
+ smp_cross_call_masked(&xcall_flush_tlb_pending,
+ ctx, nr, (unsigned long) vaddrs,
mm->cpu_vm_mask);
- local_flush_and_out:
- __flush_tlb_range(ctx, start, SECONDARY_CONTEXT,
- end, PAGE_SIZE, (end-start));
+local_flush_and_out:
+ __flush_tlb_pending(ctx, nr, vaddrs);
put_cpu();
}
@@ -888,55 +900,6 @@ void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
}
}
-void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page)
-{
- {
- u32 ctx = CTX_HWBITS(mm->context);
- int cpu = get_cpu();
-
- page &= PAGE_MASK;
- if (mm == current->active_mm &&
- atomic_read(&mm->mm_users) == 1) {
- /* By virtue of being the current address space, and
- * having the only reference to it, the following
- * operation is safe.
- *
- * It would not be a win to perform the xcall tlb
- * flush in this case, because even if we switch back
- * to one of the other processors in cpu_vm_mask it
- * is almost certain that all TLB entries for this
- * context will be replaced by the time that happens.
- */
- mm->cpu_vm_mask = cpumask_of_cpu(cpu);
- goto local_flush_and_out;
- } else {
- cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
-
- /* By virtue of running under the mm->page_table_lock,
- * and mmu_context.h:switch_mm doing the same, the
- * following operation is safe.
- */
- if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
- goto local_flush_and_out;
- }
-
- /* OK, we have to actually perform the cross call. Most
- * likely this is a cloned mm or kswapd is kicking out pages
- * for a task which has run recently on another cpu.
- */
- smp_cross_call_masked(&xcall_flush_tlb_page,
- ctx, page, 0,
- mm->cpu_vm_mask);
- if (!cpu_isset(cpu, mm->cpu_vm_mask))
- return;
-
- local_flush_and_out:
- __flush_tlb_page(ctx, page, SECONDARY_CONTEXT);
-
- put_cpu();
- }
-}
-
/* CPU capture. */
/* #define CAPTURE_DEBUG */
extern unsigned long xcall_capture;
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index d69d4b44189d64..5eafadd28a58fb 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -259,7 +259,7 @@ EXPORT_SYMBOL(verify_compat_iovec);
EXPORT_SYMBOL(dump_thread);
EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(pte_alloc_one_kernel);
+EXPORT_SYMBOL(__pte_alloc_one_kernel);
#ifndef CONFIG_SMP
EXPORT_SYMBOL(pgt_quicklists);
#endif
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index 06e1579f061436..11625cce4891f6 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -423,7 +423,7 @@ static unsigned long timer_ticks_per_nsec_quotient;
#define TICK_SIZE (tick_nsec / 1000)
-static __inline__ void timer_check_rtc(void)
+static inline void timer_check_rtc(void)
{
/* last time the cmos clock got updated */
static long last_rtc_update;
@@ -1139,7 +1139,6 @@ EXPORT_SYMBOL(do_settimeofday);
*/
void do_gettimeofday(struct timeval *tv)
{
- unsigned long flags;
unsigned long seq;
unsigned long usec, sec;
unsigned long max_ntp_tick = tick_usec - tickadj;
@@ -1147,7 +1146,7 @@ void do_gettimeofday(struct timeval *tv)
do {
unsigned long lost;
- seq = read_seqbegin_irqsave(&xtime_lock, flags);
+ seq = read_seqbegin(&xtime_lock);
usec = do_gettimeoffset();
lost = jiffies - wall_jiffies;
@@ -1166,8 +1165,18 @@ void do_gettimeofday(struct timeval *tv)
usec += lost * tick_usec;
sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
+
+ /* Believe it or not, this divide shows up on
+ * kernel profiles. The problem is that it is
+ * both 64-bit and signed. Happily, 32-bits
+ * of precision is all we really need and in
+ * doing so gcc ends up emitting a cheap multiply.
+ *
+ * XXX Why is tv_nsec 'long' and 'signed' in
+ * XXX the first place, can it even be negative?
+ */
+ usec += ((unsigned int) xtime.tv_nsec / 1000U);
+ } while (read_seqretry(&xtime_lock, seq));
while (usec >= 1000000) {
usec -= 1000000;
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index fb85e5a2afd7a8..cda87333a77b81 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
EXTRA_AFLAGS := -ansi
EXTRA_CFLAGS := -Werror
-obj-y := ultra.o fault.o init.o generic.o extable.o
+obj-y := ultra.o tlb.o fault.o init.o generic.o extable.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 994174afe72675..60308cead47045 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -37,8 +37,6 @@
#include <asm/spitfire.h>
#include <asm/sections.h>
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
extern void device_scan(void);
struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
@@ -252,87 +250,6 @@ out:
put_cpu();
}
-/* When shared+writable mmaps of files go away, we lose all dirty
- * page state, so we have to deal with D-cache aliasing here.
- *
- * This code relies on the fact that flush_cache_range() is always
- * called for an area composed by a single VMA. It also assumes that
- * the MM's page_table_lock is held.
- */
-static inline void flush_cache_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long address, unsigned long size)
-{
- unsigned long offset;
- pte_t *ptep;
-
- if (pmd_none(*pmd))
- return;
- ptep = pte_offset_map(pmd, address);
- offset = address & ~PMD_MASK;
- if (offset + size > PMD_SIZE)
- size = PMD_SIZE - offset;
- size &= PAGE_MASK;
- for (offset = 0; offset < size; ptep++, offset += PAGE_SIZE) {
- pte_t pte = *ptep;
-
- if (pte_none(pte))
- continue;
-
- if (pte_present(pte) && pte_dirty(pte)) {
- struct page *page;
- unsigned long pgaddr, uaddr;
- unsigned long pfn = pte_pfn(pte);
-
- if (!pfn_valid(pfn))
- continue;
- page = pfn_to_page(pfn);
- if (PageReserved(page) || !page_mapping(page))
- continue;
- pgaddr = (unsigned long) page_address(page);
- uaddr = address + offset;
- if ((pgaddr ^ uaddr) & (1 << 13))
- flush_dcache_page_all(mm, page);
- }
- }
- pte_unmap(ptep - 1);
-}
-
-static inline void flush_cache_pmd_range(struct mm_struct *mm, pgd_t *dir, unsigned long address, unsigned long size)
-{
- pmd_t *pmd;
- unsigned long end;
-
- if (pgd_none(*dir))
- return;
- pmd = pmd_offset(dir, address);
- end = address + size;
- if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
- end = ((address + PGDIR_SIZE) & PGDIR_MASK);
- do {
- flush_cache_pte_range(mm, pmd, address, end - address);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address < end);
-}
-
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
-{
- struct mm_struct *mm = vma->vm_mm;
- pgd_t *dir = pgd_offset(mm, start);
-
- if (mm == current->mm)
- flushw_user();
-
- if (vma->vm_file == NULL ||
- ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)))
- return;
-
- do {
- flush_cache_pmd_range(mm, dir, start, end - start);
- start = (start + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
- } while (start && (start < end));
-}
-
void flush_icache_range(unsigned long start, unsigned long end)
{
/* Cheetah has coherent I-cache. */
@@ -1173,7 +1090,7 @@ struct pgtable_cache_struct pgt_quicklists;
#else
#define DC_ALIAS_SHIFT 0
#endif
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
struct page *page;
unsigned long color;
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
new file mode 100644
index 00000000000000..11a5078182c460
--- /dev/null
+++ b/arch/sparc64/mm/tlb.c
@@ -0,0 +1,158 @@
+/* arch/sparc64/mm/tlb.c
+ *
+ * Copyright (C) 2004 David S. Miller <davem@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+
+/* Heavily inspired by the ppc64 code. */
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) =
+ { NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, };
+
+void flush_tlb_pending(void)
+{
+ struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+
+ if (mp->tlb_nr) {
+ unsigned long context = mp->mm->context;
+
+ if (CTX_VALID(context)) {
+#ifdef CONFIG_SMP
+ smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
+ &mp->vaddrs[0]);
+#else
+ __flush_tlb_pending(CTX_HWBITS(context), mp->tlb_nr,
+ &mp->vaddrs[0]);
+#endif
+ }
+ mp->tlb_nr = 0;
+ }
+}
+
+void tlb_batch_add(pte_t *ptep, pte_t orig)
+{
+ struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+ struct page *ptepage;
+ struct mm_struct *mm;
+ unsigned long vaddr, nr;
+
+ ptepage = virt_to_page(ptep);
+ mm = (struct mm_struct *) ptepage->mapping;
+
+ /* It is more efficient to let flush_tlb_kernel_range()
+ * handle these cases.
+ */
+ if (mm == &init_mm)
+ return;
+
+ vaddr = ptepage->index +
+ (((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE);
+ if (pte_exec(orig))
+ vaddr |= 0x1UL;
+
+ if (pte_dirty(orig)) {
+ unsigned long paddr, pfn = pte_pfn(orig);
+ struct address_space *mapping;
+ struct page *page;
+
+ if (!pfn_valid(pfn))
+ goto no_cache_flush;
+
+ page = pfn_to_page(pfn);
+ if (PageReserved(page))
+ goto no_cache_flush;
+
+ /* A real file page? */
+ mapping = page_mapping(page);
+ if (!mapping || mapping == &swapper_space)
+ goto no_cache_flush;
+
+ paddr = (unsigned long) page_address(page);
+ if ((paddr ^ vaddr) & (1 << 13))
+ flush_dcache_page_all(mm, page);
+ }
+
+no_cache_flush:
+ if (mp->tlb_frozen)
+ return;
+
+ nr = mp->tlb_nr;
+
+ if (unlikely(nr != 0 && mm != mp->mm)) {
+ flush_tlb_pending();
+ nr = 0;
+ }
+
+ if (nr == 0)
+ mp->mm = mm;
+
+ mp->vaddrs[nr] = vaddr;
+ mp->tlb_nr = ++nr;
+ if (nr >= TLB_BATCH_NR)
+ flush_tlb_pending();
+}
+
+void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+ unsigned long nr = mp->tlb_nr;
+ long s = start, e = end, vpte_base;
+
+ if (mp->tlb_frozen)
+ return;
+
+ /* Nobody should call us with start below VM hole and end above.
+ * See if it is really true.
+ */
+ BUG_ON(s > e);
+
+#if 0
+ /* Currently free_pgtables guarantees this. */
+ s &= PMD_MASK;
+ e = (e + PMD_SIZE - 1) & PMD_MASK;
+#endif
+ vpte_base = (tlb_type == spitfire ?
+ VPTE_BASE_SPITFIRE :
+ VPTE_BASE_CHEETAH);
+
+ if (unlikely(nr != 0 && mm != mp->mm)) {
+ flush_tlb_pending();
+ nr = 0;
+ }
+
+ if (nr == 0)
+ mp->mm = mm;
+
+ start = vpte_base + (s >> (PAGE_SHIFT - 3));
+ end = vpte_base + (e >> (PAGE_SHIFT - 3));
+ while (start < end) {
+ mp->vaddrs[nr] = start;
+ mp->tlb_nr = ++nr;
+ if (nr >= TLB_BATCH_NR) {
+ flush_tlb_pending();
+ nr = 0;
+ }
+ start += PAGE_SIZE;
+ }
+ if (nr)
+ flush_tlb_pending();
+}
+
+unsigned long __ptrs_per_pmd(void)
+{
+ if (test_thread_flag(TIF_32BIT))
+ return (1UL << (32 - (PAGE_SHIFT-3) - PAGE_SHIFT));
+ return REAL_PTRS_PER_PMD;
+}
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 16410ba9555cf5..fae62faf0c81fb 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -26,25 +26,7 @@
*/
.text
.align 32
- .globl __flush_tlb_page, __flush_tlb_mm, __flush_tlb_range
-__flush_tlb_page: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=page&PAGE_MASK, %o2=SECONDARY_CONTEXT */
- ldxa [%o2] ASI_DMMU, %g2
- cmp %g2, %o0
- bne,pn %icc, __spitfire_flush_tlb_page_slow
- or %o1, 0x10, %g3
- stxa %g0, [%g3] ASI_DMMU_DEMAP
- stxa %g0, [%g3] ASI_IMMU_DEMAP
- retl
- flush %g6
- nop
- nop
- nop
- nop
- nop
- nop
- nop
- nop
-
+ .globl __flush_tlb_mm
__flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
ldxa [%o1] ASI_DMMU, %g2
cmp %g2, %o0
@@ -63,84 +45,31 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
nop
nop
-__flush_tlb_range: /* %o0=(ctx&TAG_CONTEXT_BITS), %o1=start&PAGE_MASK, %o2=SECONDARY_CONTEXT,
- * %o3=end&PAGE_MASK, %o4=PAGE_SIZE, %o5=(end - start)
- */
-#define TLB_MAGIC 207 /* Students, do you know how I calculated this? -DaveM */
- cmp %o5, %o4
- bleu,pt %xcc, __flush_tlb_page
- srlx %o5, PAGE_SHIFT, %g5
- cmp %g5, TLB_MAGIC
- bgeu,pn %icc, __spitfire_flush_tlb_range_constant_time
- or %o1, 0x10, %g5
- ldxa [%o2] ASI_DMMU, %g2
- cmp %g2, %o0
-__spitfire_flush_tlb_range_page_by_page:
- bne,pn %icc, __spitfire_flush_tlb_range_pbp_slow
- sub %o5, %o4, %o5
-1: stxa %g0, [%g5 + %o5] ASI_DMMU_DEMAP
- stxa %g0, [%g5 + %o5] ASI_IMMU_DEMAP
- brnz,pt %o5, 1b
- sub %o5, %o4, %o5
- retl
- flush %g6
-__spitfire_flush_tlb_range_constant_time: /* %o0=ctx, %o1=start, %o3=end */
- rdpr %pstate, %g1
- wrpr %g1, PSTATE_IE, %pstate
- mov TLB_TAG_ACCESS, %g3
- mov ((SPITFIRE_HIGHEST_LOCKED_TLBENT-1) << 3), %g2
-
- /* Spitfire Errata #32 workaround. */
- mov 0x8, %o4
- stxa %g0, [%o4] ASI_DMMU
- flush %g6
-
-1: ldxa [%g2] ASI_ITLB_TAG_READ, %o4
- and %o4, TAG_CONTEXT_BITS, %o5
- cmp %o5, %o0
- bne,pt %icc, 2f
- andn %o4, TAG_CONTEXT_BITS, %o4
- cmp %o4, %o1
- blu,pt %xcc, 2f
- cmp %o4, %o3
- blu,pn %xcc, 4f
-2: ldxa [%g2] ASI_DTLB_TAG_READ, %o4
- and %o4, TAG_CONTEXT_BITS, %o5
- cmp %o5, %o0
- andn %o4, TAG_CONTEXT_BITS, %o4
- bne,pt %icc, 3f
- cmp %o4, %o1
- blu,pt %xcc, 3f
- cmp %o4, %o3
- blu,pn %xcc, 5f
- nop
-3: brnz,pt %g2, 1b
- sub %g2, (1 << 3), %g2
- retl
- wrpr %g1, 0x0, %pstate
-4: stxa %g0, [%g3] ASI_IMMU
- stxa %g0, [%g2] ASI_ITLB_DATA_ACCESS
- flush %g6
-
- /* Spitfire Errata #32 workaround. */
- mov 0x8, %o4
- stxa %g0, [%o4] ASI_DMMU
- flush %g6
-
- ba,pt %xcc, 2b
+ .align 32
+ .globl __flush_tlb_pending
+__flush_tlb_pending:
+ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+ rdpr %pstate, %g5
+ sllx %o1, 3, %o1
+ andn %g5, PSTATE_IE, %g2
+ wrpr %g2, %pstate
+ mov SECONDARY_CONTEXT, %o4
+ ldxa [%o4] ASI_DMMU, %g2
+ stxa %o0, [%o4] ASI_DMMU
+1: sub %o1, (1 << 3), %o1
+ ldx [%o2 + %o1], %o3
+ andcc %o3, 1, %g0
+ be,pn %icc, 2f
+ andn %o3, 1, %o3
+ stxa %g0, [%o3] ASI_IMMU_DEMAP
+2: stxa %g0, [%o3] ASI_DMMU_DEMAP
+ brnz,pt %o1, 1b
nop
-
-5: stxa %g0, [%g3] ASI_DMMU
- stxa %g0, [%g2] ASI_DTLB_DATA_ACCESS
flush %g6
-
- /* Spitfire Errata #32 workaround. */
- mov 0x8, %o4
- stxa %g0, [%o4] ASI_DMMU
+ stxa %g2, [%o2] ASI_DMMU
flush %g6
-
- ba,pt %xcc, 3b
- nop
+ retl
+ wrpr %g5, 0x0, %pstate
.align 32
.globl __flush_tlb_kernel_range
@@ -171,33 +100,6 @@ __spitfire_flush_tlb_mm_slow:
retl
wrpr %g1, 0, %pstate
-__spitfire_flush_tlb_page_slow:
- rdpr %pstate, %g1
- wrpr %g1, PSTATE_IE, %pstate
- stxa %o0, [%o2] ASI_DMMU
- stxa %g0, [%g3] ASI_DMMU_DEMAP
- stxa %g0, [%g3] ASI_IMMU_DEMAP
- flush %g6
- stxa %g2, [%o2] ASI_DMMU
- flush %g6
- retl
- wrpr %g1, 0, %pstate
-
-__spitfire_flush_tlb_range_pbp_slow:
- rdpr %pstate, %g1
- wrpr %g1, PSTATE_IE, %pstate
- stxa %o0, [%o2] ASI_DMMU
-
-2: stxa %g0, [%g5 + %o5] ASI_DMMU_DEMAP
- stxa %g0, [%g5 + %o5] ASI_IMMU_DEMAP
- brnz,pt %o5, 2b
- sub %o5, %o4, %o5
- flush %g6
- stxa %g2, [%o2] ASI_DMMU
- flush %g6
- retl
- wrpr %g1, 0x0, %pstate
-
/*
* The following code flushes one page_size worth.
*/
@@ -356,22 +258,6 @@ __update_mmu_cache: /* %o0=hw_context, %o1=address, %o2=pte, %o3=fault_code */
ba,a,pt %xcc, __prefill_itlb
/* Cheetah specific versions, patched at boot time. */
-__cheetah_flush_tlb_page: /* 14 insns */
- rdpr %pstate, %g5
- andn %g5, PSTATE_IE, %g2
- wrpr %g2, 0x0, %pstate
- wrpr %g0, 1, %tl
- mov PRIMARY_CONTEXT, %o2
- ldxa [%o2] ASI_DMMU, %g2
- stxa %o0, [%o2] ASI_DMMU
- stxa %g0, [%o1] ASI_DMMU_DEMAP
- stxa %g0, [%o1] ASI_IMMU_DEMAP
- stxa %g2, [%o2] ASI_DMMU
- flush %g6
- wrpr %g0, 0, %tl
- retl
- wrpr %g5, 0x0, %pstate
-
__cheetah_flush_tlb_mm: /* 15 insns */
rdpr %pstate, %g5
andn %g5, PSTATE_IE, %g2
@@ -389,26 +275,29 @@ __cheetah_flush_tlb_mm: /* 15 insns */
retl
wrpr %g5, 0x0, %pstate
-__cheetah_flush_tlb_range: /* 20 insns */
- cmp %o5, %o4
- blu,pt %xcc, 9f
- rdpr %pstate, %g5
+__cheetah_flush_tlb_pending: /* 22 insns */
+ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+ rdpr %pstate, %g5
+ sllx %o1, 3, %o1
andn %g5, PSTATE_IE, %g2
wrpr %g2, 0x0, %pstate
wrpr %g0, 1, %tl
- mov PRIMARY_CONTEXT, %o2
- sub %o5, %o4, %o5
- ldxa [%o2] ASI_DMMU, %g2
- stxa %o0, [%o2] ASI_DMMU
-1: stxa %g0, [%o1 + %o5] ASI_DMMU_DEMAP
- stxa %g0, [%o1 + %o5] ASI_IMMU_DEMAP
- membar #Sync
- brnz,pt %o5, 1b
- sub %o5, %o4, %o5
- stxa %g2, [%o2] ASI_DMMU
+ mov PRIMARY_CONTEXT, %o4
+ ldxa [%o4] ASI_DMMU, %g2
+ stxa %o0, [%o4] ASI_DMMU
+1: sub %o1, (1 << 3), %o1
+ ldx [%o2 + %o1], %o3
+ andcc %o3, 1, %g0
+ be,pn %icc, 2f
+ andn %o3, 1, %o3
+ stxa %g0, [%o3] ASI_IMMU_DEMAP
+2: stxa %g0, [%o3] ASI_DMMU_DEMAP
+ brnz,pt %o1, 1b
+ membar #Sync
+ stxa %g2, [%o4] ASI_DMMU
flush %g6
wrpr %g0, 0, %tl
-9: retl
+ retl
wrpr %g5, 0x0, %pstate
flush_dcpage_cheetah: /* 11 insns */
@@ -439,13 +328,6 @@ cheetah_patch_one:
cheetah_patch_cachetlbops:
save %sp, -128, %sp
- sethi %hi(__flush_tlb_page), %o0
- or %o0, %lo(__flush_tlb_page), %o0
- sethi %hi(__cheetah_flush_tlb_page), %o1
- or %o1, %lo(__cheetah_flush_tlb_page), %o1
- call cheetah_patch_one
- mov 14, %o2
-
sethi %hi(__flush_tlb_mm), %o0
or %o0, %lo(__flush_tlb_mm), %o0
sethi %hi(__cheetah_flush_tlb_mm), %o1
@@ -453,12 +335,12 @@ cheetah_patch_cachetlbops:
call cheetah_patch_one
mov 15, %o2
- sethi %hi(__flush_tlb_range), %o0
- or %o0, %lo(__flush_tlb_range), %o0
- sethi %hi(__cheetah_flush_tlb_range), %o1
- or %o1, %lo(__cheetah_flush_tlb_range), %o1
+ sethi %hi(__flush_tlb_pending), %o0
+ or %o0, %lo(__flush_tlb_pending), %o0
+ sethi %hi(__cheetah_flush_tlb_pending), %o1
+ or %o1, %lo(__cheetah_flush_tlb_pending), %o1
call cheetah_patch_one
- mov 20, %o2
+ mov 22, %o2
sethi %hi(__flush_dcache_page), %o0
or %o0, %lo(__flush_dcache_page), %o0
@@ -487,17 +369,7 @@ cheetah_patch_cachetlbops:
* TODO: Make xcall TLB range flushes use the tricks above... -DaveM
*/
.align 32
- .globl xcall_flush_tlb_page, xcall_flush_tlb_mm, xcall_flush_tlb_range
-xcall_flush_tlb_page:
- mov PRIMARY_CONTEXT, %g2
- ldxa [%g2] ASI_DMMU, %g3
- stxa %g5, [%g2] ASI_DMMU
- stxa %g0, [%g1] ASI_DMMU_DEMAP
- stxa %g0, [%g1] ASI_IMMU_DEMAP
- stxa %g3, [%g2] ASI_DMMU
- retry
- nop
-
+ .globl xcall_flush_tlb_mm
xcall_flush_tlb_mm:
mov PRIMARY_CONTEXT, %g2
mov 0x40, %g4
@@ -508,34 +380,25 @@ xcall_flush_tlb_mm:
stxa %g3, [%g2] ASI_DMMU
retry
-xcall_flush_tlb_range:
- sethi %hi(PAGE_SIZE - 1), %g2
- or %g2, %lo(PAGE_SIZE - 1), %g2
- andn %g1, %g2, %g1
- andn %g7, %g2, %g7
- sub %g7, %g1, %g3
- add %g2, 1, %g2
- srlx %g3, PAGE_SHIFT, %g4
- cmp %g4, 96
-
- bgu,pn %icc, xcall_flush_tlb_mm
- mov PRIMARY_CONTEXT, %g4
- ldxa [%g4] ASI_DMMU, %g7
- sub %g3, %g2, %g3
+ .globl xcall_flush_tlb_pending
+xcall_flush_tlb_pending:
+ /* %g5=context, %g1=nr, %g7=vaddrs[] */
+ sllx %g1, 3, %g1
+ mov PRIMARY_CONTEXT, %g4
+ ldxa [%g4] ASI_DMMU, %g2
stxa %g5, [%g4] ASI_DMMU
- nop
- nop
- nop
-
-1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP
- stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP
- membar #Sync
- brnz,pt %g3, 1b
- sub %g3, %g2, %g3
- stxa %g7, [%g4] ASI_DMMU
+1: sub %g1, (1 << 3), %g1
+ ldx [%g7 + %g1], %g5
+ andcc %g5, 0x1, %g0
+ be,pn %icc, 2f
+
+ andn %g5, 0x1, %g5
+ stxa %g0, [%g5] ASI_IMMU_DEMAP
+2: stxa %g0, [%g5] ASI_DMMU_DEMAP
+ brnz,pt %g1, 1b
+ membar #Sync
+ stxa %g2, [%g4] ASI_DMMU
retry
- nop
- nop
.globl xcall_flush_tlb_kernel_range
xcall_flush_tlb_kernel_range:
@@ -555,7 +418,6 @@ xcall_flush_tlb_kernel_range:
retry
nop
nop
- nop
/* This runs in a very controlled environment, so we do
* not need to worry about BH races etc.