diff options
author | David S. Miller <davem@nuts.davemloft.net> | 2004-08-06 09:01:29 -0700 |
---|---|---|
committer | David S. Miller <davem@nuts.davemloft.net> | 2004-08-06 09:01:29 -0700 |
commit | 121f0a4e43b7f20ed7300c7862b0d5b5273b3a97 (patch) | |
tree | e183dc3176728afe4df87eec0566db77c9099602 /arch | |
parent | 2ae23d292eecae72577b26b13356becdf257cdb7 (diff) | |
download | history-121f0a4e43b7f20ed7300c7862b0d5b5273b3a97.tar.gz |
[SPARC64]: Implement tlb flush batching just like ppc64.
Signed-off-by: David S. Miller <davem@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sparc64/kernel/rtrap.S | 7 | ||||
-rw-r--r-- | arch/sparc64/kernel/smp.c | 87 | ||||
-rw-r--r-- | arch/sparc64/kernel/sparc64_ksyms.c | 2 | ||||
-rw-r--r-- | arch/sparc64/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 85 | ||||
-rw-r--r-- | arch/sparc64/mm/tlb.c | 173 | ||||
-rw-r--r-- | arch/sparc64/mm/ultra.S | 266 |
7 files changed, 271 insertions, 351 deletions
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index b58afaeca19354..9c0b76d237636c 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -152,7 +152,12 @@ __handle_signal: .globl rtrap_irq, rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall rtrap_irq: rtrap_clr_l6: clr %l6 -rtrap: ldub [%g6 + TI_CPU], %l0 +rtrap: +#if 1 + call tlb_batch_rtrap_debug + add %sp, PTREGS_OFF, %o0 +#endif + ldub [%g6 + TI_CPU], %l0 sethi %hi(irq_stat), %l2 ! &softirq_active or %l2, %lo(irq_stat), %l2 ! &softirq_active irqsz_patchme: sllx %l0, 0, %l0 diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 1c8b5436203563..abc65dc6d61e7d 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -35,6 +35,7 @@ #include <asm/uaccess.h> #include <asm/timer.h> #include <asm/starfire.h> +#include <asm/tlb.h> extern int linux_num_cpus; extern void calibrate_delay(void); @@ -635,9 +636,8 @@ void smp_call_function_client(int irq, struct pt_regs *regs) } } -extern unsigned long xcall_flush_tlb_page; extern unsigned long xcall_flush_tlb_mm; -extern unsigned long xcall_flush_tlb_range; +extern unsigned long xcall_flush_tlb_pending; extern unsigned long xcall_flush_tlb_kernel_range; extern unsigned long xcall_flush_tlb_all_spitfire; extern unsigned long xcall_flush_tlb_all_cheetah; @@ -835,7 +835,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm) int cpu = get_cpu(); if (atomic_read(&mm->mm_users) == 1) { - /* See smp_flush_tlb_page for info about this. */ mm->cpu_vm_mask = cpumask_of_cpu(cpu); goto local_flush_and_out; } @@ -851,27 +850,40 @@ void smp_flush_tlb_mm(struct mm_struct *mm) } } -void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start, - unsigned long end) +void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs) { u32 ctx = CTX_HWBITS(mm->context); int cpu = get_cpu(); - start &= PAGE_MASK; - end = PAGE_ALIGN(end); - if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) { mm->cpu_vm_mask = cpumask_of_cpu(cpu); goto local_flush_and_out; + } else { + /* This optimization is not valid. Normally + * we will be holding the page_table_lock, but + * there is an exception which is copy_page_range() + * when forking. The lock is held during the individual + * page table updates in the parent, but not at the + * top level, which is where we are invoked. + */ + if (0) { + cpumask_t this_cpu_mask = cpumask_of_cpu(cpu); + + /* By virtue of running under the mm->page_table_lock, + * and mmu_context.h:switch_mm doing the same, the + * following operation is safe. + */ + if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask)) + goto local_flush_and_out; + } } - smp_cross_call_masked(&xcall_flush_tlb_range, - ctx, start, end, + smp_cross_call_masked(&xcall_flush_tlb_pending, + ctx, nr, (unsigned long) vaddrs, mm->cpu_vm_mask); - local_flush_and_out: - __flush_tlb_range(ctx, start, SECONDARY_CONTEXT, - end, PAGE_SIZE, (end-start)); +local_flush_and_out: + __flush_tlb_pending(ctx, nr, vaddrs); put_cpu(); } @@ -888,55 +900,6 @@ void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end) } } -void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page) -{ - { - u32 ctx = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - - page &= PAGE_MASK; - if (mm == current->active_mm && - atomic_read(&mm->mm_users) == 1) { - /* By virtue of being the current address space, and - * having the only reference to it, the following - * operation is safe. - * - * It would not be a win to perform the xcall tlb - * flush in this case, because even if we switch back - * to one of the other processors in cpu_vm_mask it - * is almost certain that all TLB entries for this - * context will be replaced by the time that happens. - */ - mm->cpu_vm_mask = cpumask_of_cpu(cpu); - goto local_flush_and_out; - } else { - cpumask_t this_cpu_mask = cpumask_of_cpu(cpu); - - /* By virtue of running under the mm->page_table_lock, - * and mmu_context.h:switch_mm doing the same, the - * following operation is safe. - */ - if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask)) - goto local_flush_and_out; - } - - /* OK, we have to actually perform the cross call. Most - * likely this is a cloned mm or kswapd is kicking out pages - * for a task which has run recently on another cpu. - */ - smp_cross_call_masked(&xcall_flush_tlb_page, - ctx, page, 0, - mm->cpu_vm_mask); - if (!cpu_isset(cpu, mm->cpu_vm_mask)) - return; - - local_flush_and_out: - __flush_tlb_page(ctx, page, SECONDARY_CONTEXT); - - put_cpu(); - } -} - /* CPU capture. */ /* #define CAPTURE_DEBUG */ extern unsigned long xcall_capture; diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index d69d4b44189d64..5eafadd28a58fb 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -259,7 +259,7 @@ EXPORT_SYMBOL(verify_compat_iovec); EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(pte_alloc_one_kernel); +EXPORT_SYMBOL(__pte_alloc_one_kernel); #ifndef CONFIG_SMP EXPORT_SYMBOL(pgt_quicklists); #endif diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile index fb85e5a2afd7a8..cda87333a77b81 100644 --- a/arch/sparc64/mm/Makefile +++ b/arch/sparc64/mm/Makefile @@ -5,6 +5,6 @@ EXTRA_AFLAGS := -ansi EXTRA_CFLAGS := -Werror -obj-y := ultra.o fault.o init.o generic.o extable.o +obj-y := ultra.o tlb.o fault.o init.o generic.o extable.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 994174afe72675..60308cead47045 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -37,8 +37,6 @@ #include <asm/spitfire.h> #include <asm/sections.h> -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - extern void device_scan(void); struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS]; @@ -252,87 +250,6 @@ out: put_cpu(); } -/* When shared+writable mmaps of files go away, we lose all dirty - * page state, so we have to deal with D-cache aliasing here. - * - * This code relies on the fact that flush_cache_range() is always - * called for an area composed by a single VMA. It also assumes that - * the MM's page_table_lock is held. - */ -static inline void flush_cache_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long address, unsigned long size) -{ - unsigned long offset; - pte_t *ptep; - - if (pmd_none(*pmd)) - return; - ptep = pte_offset_map(pmd, address); - offset = address & ~PMD_MASK; - if (offset + size > PMD_SIZE) - size = PMD_SIZE - offset; - size &= PAGE_MASK; - for (offset = 0; offset < size; ptep++, offset += PAGE_SIZE) { - pte_t pte = *ptep; - - if (pte_none(pte)) - continue; - - if (pte_present(pte) && pte_dirty(pte)) { - struct page *page; - unsigned long pgaddr, uaddr; - unsigned long pfn = pte_pfn(pte); - - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - if (PageReserved(page) || !page_mapping(page)) - continue; - pgaddr = (unsigned long) page_address(page); - uaddr = address + offset; - if ((pgaddr ^ uaddr) & (1 << 13)) - flush_dcache_page_all(mm, page); - } - } - pte_unmap(ptep - 1); -} - -static inline void flush_cache_pmd_range(struct mm_struct *mm, pgd_t *dir, unsigned long address, unsigned long size) -{ - pmd_t *pmd; - unsigned long end; - - if (pgd_none(*dir)) - return; - pmd = pmd_offset(dir, address); - end = address + size; - if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) - end = ((address + PGDIR_SIZE) & PGDIR_MASK); - do { - flush_cache_pte_range(mm, pmd, address, end - address); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); -} - -void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - pgd_t *dir = pgd_offset(mm, start); - - if (mm == current->mm) - flushw_user(); - - if (vma->vm_file == NULL || - ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))) - return; - - do { - flush_cache_pmd_range(mm, dir, start, end - start); - start = (start + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (start && (start < end)); -} - void flush_icache_range(unsigned long start, unsigned long end) { /* Cheetah has coherent I-cache. */ @@ -1173,7 +1090,7 @@ struct pgtable_cache_struct pgt_quicklists; #else #define DC_ALIAS_SHIFT 0 #endif -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +pte_t *__pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { struct page *page; unsigned long color; diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c new file mode 100644 index 00000000000000..b3b1709e3ab4db --- /dev/null +++ b/arch/sparc64/mm/tlb.c @@ -0,0 +1,173 @@ +/* arch/sparc64/mm/tlb.c + * + * Copyright (C) 2004 David S. Miller <davem@redhat.com> + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/mm.h> +#include <linux/swap.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> +#include <asm/tlb.h> + +/* Heavily inspired by the ppc64 code. */ + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) = + { NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, }; + +void flush_tlb_pending(void) +{ + struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); + + if (mp->tlb_nr) { + unsigned long context = mp->mm->context; + + if (CTX_VALID(context)) { +#ifdef CONFIG_SMP + smp_flush_tlb_pending(mp->mm, mp->tlb_nr, + &mp->vaddrs[0]); +#else + __flush_tlb_pending(CTX_HWBITS(context), mp->tlb_nr, + &mp->vaddrs[0]); +#endif + } + mp->tlb_nr = 0; + } +} + +void tlb_batch_rtrap_debug(struct pt_regs *regs) +{ + /* If we are returning to userspace and have pending + * tlb batch work on this processor, all hope is lost. + */ + if (!(regs->tstate & TSTATE_PRIV)) { + struct mmu_gather *mp = &get_cpu_var(mmu_gathers); + + if (mp->tlb_nr) + BUG(); + + put_cpu_var(mmu_gathers); + } +} + +void tlb_batch_add(pte_t *ptep, pte_t orig) +{ + struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); + struct page *ptepage; + struct mm_struct *mm; + unsigned long vaddr, nr; + + ptepage = virt_to_page(ptep); + mm = (struct mm_struct *) ptepage->mapping; + + /* It is more efficient to let flush_tlb_kernel_range() + * handle these cases. + */ + if (mm == &init_mm) + return; + + vaddr = ptepage->index + + (((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE); + if (pte_exec(orig)) + vaddr |= 0x1UL; + + if (pte_dirty(orig)) { + unsigned long paddr, pfn = pte_pfn(orig); + struct address_space *mapping; + struct page *page; + + if (!pfn_valid(pfn)) + goto no_cache_flush; + + page = pfn_to_page(pfn); + if (PageReserved(page)) + goto no_cache_flush; + + /* A real file page? */ + mapping = page_mapping(page); + if (!mapping || mapping == &swapper_space) + goto no_cache_flush; + + paddr = (unsigned long) page_address(page); + if ((paddr ^ vaddr) & (1 << 13)) + flush_dcache_page_all(mm, page); + } + +no_cache_flush: + if (mp->tlb_frozen) + return; + + nr = mp->tlb_nr; + + if (unlikely(nr != 0 && mm != mp->mm)) { + flush_tlb_pending(); + nr = 0; + } + + if (nr == 0) + mp->mm = mm; + + mp->vaddrs[nr] = vaddr; + mp->tlb_nr = ++nr; + if (nr >= TLB_BATCH_NR) + flush_tlb_pending(); +} + +void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + struct mmu_gather *mp = &__get_cpu_var(mmu_gathers); + unsigned long nr = mp->tlb_nr; + long s = start, e = end, vpte_base; + + if (mp->tlb_frozen) + return; + + /* Nobody should call us with start below VM hole and end above. + * See if it is really true. + */ + BUG_ON(s > e); + +#if 0 + /* Currently free_pgtables guarantees this. */ + s &= PMD_MASK; + e = (e + PMD_SIZE - 1) & PMD_MASK; +#endif + vpte_base = (tlb_type == spitfire ? + VPTE_BASE_SPITFIRE : + VPTE_BASE_CHEETAH); + + if (unlikely(nr != 0 && mm != mp->mm)) { + flush_tlb_pending(); + nr = 0; + } + + if (nr == 0) + mp->mm = mm; + + start = vpte_base + (s >> (PAGE_SHIFT - 3)); + end = vpte_base + (e >> (PAGE_SHIFT - 3)); + while (start < end) { + mp->vaddrs[nr] = start; + mp->tlb_nr = ++nr; + if (nr >= TLB_BATCH_NR) { + flush_tlb_pending(); + nr = 0; + } + start += PAGE_SIZE; + } + if (nr) + flush_tlb_pending(); +} + +unsigned long __ptrs_per_pmd(void) +{ + if (test_thread_flag(TIF_32BIT)) + return (1UL << (32 - (PAGE_SHIFT-3) - PAGE_SHIFT)); + return REAL_PTRS_PER_PMD; +} diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 16410ba9555cf5..fae62faf0c81fb 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -26,25 +26,7 @@ */ .text .align 32 - .globl __flush_tlb_page, __flush_tlb_mm, __flush_tlb_range -__flush_tlb_page: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=page&PAGE_MASK, %o2=SECONDARY_CONTEXT */ - ldxa [%o2] ASI_DMMU, %g2 - cmp %g2, %o0 - bne,pn %icc, __spitfire_flush_tlb_page_slow - or %o1, 0x10, %g3 - stxa %g0, [%g3] ASI_DMMU_DEMAP - stxa %g0, [%g3] ASI_IMMU_DEMAP - retl - flush %g6 - nop - nop - nop - nop - nop - nop - nop - nop - + .globl __flush_tlb_mm __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ ldxa [%o1] ASI_DMMU, %g2 cmp %g2, %o0 @@ -63,84 +45,31 @@ __flush_tlb_mm: /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ nop nop -__flush_tlb_range: /* %o0=(ctx&TAG_CONTEXT_BITS), %o1=start&PAGE_MASK, %o2=SECONDARY_CONTEXT, - * %o3=end&PAGE_MASK, %o4=PAGE_SIZE, %o5=(end - start) - */ -#define TLB_MAGIC 207 /* Students, do you know how I calculated this? -DaveM */ - cmp %o5, %o4 - bleu,pt %xcc, __flush_tlb_page - srlx %o5, PAGE_SHIFT, %g5 - cmp %g5, TLB_MAGIC - bgeu,pn %icc, __spitfire_flush_tlb_range_constant_time - or %o1, 0x10, %g5 - ldxa [%o2] ASI_DMMU, %g2 - cmp %g2, %o0 -__spitfire_flush_tlb_range_page_by_page: - bne,pn %icc, __spitfire_flush_tlb_range_pbp_slow - sub %o5, %o4, %o5 -1: stxa %g0, [%g5 + %o5] ASI_DMMU_DEMAP - stxa %g0, [%g5 + %o5] ASI_IMMU_DEMAP - brnz,pt %o5, 1b - sub %o5, %o4, %o5 - retl - flush %g6 -__spitfire_flush_tlb_range_constant_time: /* %o0=ctx, %o1=start, %o3=end */ - rdpr %pstate, %g1 - wrpr %g1, PSTATE_IE, %pstate - mov TLB_TAG_ACCESS, %g3 - mov ((SPITFIRE_HIGHEST_LOCKED_TLBENT-1) << 3), %g2 - - /* Spitfire Errata #32 workaround. */ - mov 0x8, %o4 - stxa %g0, [%o4] ASI_DMMU - flush %g6 - -1: ldxa [%g2] ASI_ITLB_TAG_READ, %o4 - and %o4, TAG_CONTEXT_BITS, %o5 - cmp %o5, %o0 - bne,pt %icc, 2f - andn %o4, TAG_CONTEXT_BITS, %o4 - cmp %o4, %o1 - blu,pt %xcc, 2f - cmp %o4, %o3 - blu,pn %xcc, 4f -2: ldxa [%g2] ASI_DTLB_TAG_READ, %o4 - and %o4, TAG_CONTEXT_BITS, %o5 - cmp %o5, %o0 - andn %o4, TAG_CONTEXT_BITS, %o4 - bne,pt %icc, 3f - cmp %o4, %o1 - blu,pt %xcc, 3f - cmp %o4, %o3 - blu,pn %xcc, 5f - nop -3: brnz,pt %g2, 1b - sub %g2, (1 << 3), %g2 - retl - wrpr %g1, 0x0, %pstate -4: stxa %g0, [%g3] ASI_IMMU - stxa %g0, [%g2] ASI_ITLB_DATA_ACCESS - flush %g6 - - /* Spitfire Errata #32 workaround. */ - mov 0x8, %o4 - stxa %g0, [%o4] ASI_DMMU - flush %g6 - - ba,pt %xcc, 2b + .align 32 + .globl __flush_tlb_pending +__flush_tlb_pending: + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + rdpr %pstate, %g5 + sllx %o1, 3, %o1 + andn %g5, PSTATE_IE, %g2 + wrpr %g2, %pstate + mov SECONDARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU +1: sub %o1, (1 << 3), %o1 + ldx [%o2 + %o1], %o3 + andcc %o3, 1, %g0 + be,pn %icc, 2f + andn %o3, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +2: stxa %g0, [%o3] ASI_DMMU_DEMAP + brnz,pt %o1, 1b nop - -5: stxa %g0, [%g3] ASI_DMMU - stxa %g0, [%g2] ASI_DTLB_DATA_ACCESS flush %g6 - - /* Spitfire Errata #32 workaround. */ - mov 0x8, %o4 - stxa %g0, [%o4] ASI_DMMU + stxa %g2, [%o2] ASI_DMMU flush %g6 - - ba,pt %xcc, 3b - nop + retl + wrpr %g5, 0x0, %pstate .align 32 .globl __flush_tlb_kernel_range @@ -171,33 +100,6 @@ __spitfire_flush_tlb_mm_slow: retl wrpr %g1, 0, %pstate -__spitfire_flush_tlb_page_slow: - rdpr %pstate, %g1 - wrpr %g1, PSTATE_IE, %pstate - stxa %o0, [%o2] ASI_DMMU - stxa %g0, [%g3] ASI_DMMU_DEMAP - stxa %g0, [%g3] ASI_IMMU_DEMAP - flush %g6 - stxa %g2, [%o2] ASI_DMMU - flush %g6 - retl - wrpr %g1, 0, %pstate - -__spitfire_flush_tlb_range_pbp_slow: - rdpr %pstate, %g1 - wrpr %g1, PSTATE_IE, %pstate - stxa %o0, [%o2] ASI_DMMU - -2: stxa %g0, [%g5 + %o5] ASI_DMMU_DEMAP - stxa %g0, [%g5 + %o5] ASI_IMMU_DEMAP - brnz,pt %o5, 2b - sub %o5, %o4, %o5 - flush %g6 - stxa %g2, [%o2] ASI_DMMU - flush %g6 - retl - wrpr %g1, 0x0, %pstate - /* * The following code flushes one page_size worth. */ @@ -356,22 +258,6 @@ __update_mmu_cache: /* %o0=hw_context, %o1=address, %o2=pte, %o3=fault_code */ ba,a,pt %xcc, __prefill_itlb /* Cheetah specific versions, patched at boot time. */ -__cheetah_flush_tlb_page: /* 14 insns */ - rdpr %pstate, %g5 - andn %g5, PSTATE_IE, %g2 - wrpr %g2, 0x0, %pstate - wrpr %g0, 1, %tl - mov PRIMARY_CONTEXT, %o2 - ldxa [%o2] ASI_DMMU, %g2 - stxa %o0, [%o2] ASI_DMMU - stxa %g0, [%o1] ASI_DMMU_DEMAP - stxa %g0, [%o1] ASI_IMMU_DEMAP - stxa %g2, [%o2] ASI_DMMU - flush %g6 - wrpr %g0, 0, %tl - retl - wrpr %g5, 0x0, %pstate - __cheetah_flush_tlb_mm: /* 15 insns */ rdpr %pstate, %g5 andn %g5, PSTATE_IE, %g2 @@ -389,26 +275,29 @@ __cheetah_flush_tlb_mm: /* 15 insns */ retl wrpr %g5, 0x0, %pstate -__cheetah_flush_tlb_range: /* 20 insns */ - cmp %o5, %o4 - blu,pt %xcc, 9f - rdpr %pstate, %g5 +__cheetah_flush_tlb_pending: /* 22 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + rdpr %pstate, %g5 + sllx %o1, 3, %o1 andn %g5, PSTATE_IE, %g2 wrpr %g2, 0x0, %pstate wrpr %g0, 1, %tl - mov PRIMARY_CONTEXT, %o2 - sub %o5, %o4, %o5 - ldxa [%o2] ASI_DMMU, %g2 - stxa %o0, [%o2] ASI_DMMU -1: stxa %g0, [%o1 + %o5] ASI_DMMU_DEMAP - stxa %g0, [%o1 + %o5] ASI_IMMU_DEMAP - membar #Sync - brnz,pt %o5, 1b - sub %o5, %o4, %o5 - stxa %g2, [%o2] ASI_DMMU + mov PRIMARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU +1: sub %o1, (1 << 3), %o1 + ldx [%o2 + %o1], %o3 + andcc %o3, 1, %g0 + be,pn %icc, 2f + andn %o3, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +2: stxa %g0, [%o3] ASI_DMMU_DEMAP + brnz,pt %o1, 1b + membar #Sync + stxa %g2, [%o4] ASI_DMMU flush %g6 wrpr %g0, 0, %tl -9: retl + retl wrpr %g5, 0x0, %pstate flush_dcpage_cheetah: /* 11 insns */ @@ -439,13 +328,6 @@ cheetah_patch_one: cheetah_patch_cachetlbops: save %sp, -128, %sp - sethi %hi(__flush_tlb_page), %o0 - or %o0, %lo(__flush_tlb_page), %o0 - sethi %hi(__cheetah_flush_tlb_page), %o1 - or %o1, %lo(__cheetah_flush_tlb_page), %o1 - call cheetah_patch_one - mov 14, %o2 - sethi %hi(__flush_tlb_mm), %o0 or %o0, %lo(__flush_tlb_mm), %o0 sethi %hi(__cheetah_flush_tlb_mm), %o1 @@ -453,12 +335,12 @@ cheetah_patch_cachetlbops: call cheetah_patch_one mov 15, %o2 - sethi %hi(__flush_tlb_range), %o0 - or %o0, %lo(__flush_tlb_range), %o0 - sethi %hi(__cheetah_flush_tlb_range), %o1 - or %o1, %lo(__cheetah_flush_tlb_range), %o1 + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__cheetah_flush_tlb_pending), %o1 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1 call cheetah_patch_one - mov 20, %o2 + mov 22, %o2 sethi %hi(__flush_dcache_page), %o0 or %o0, %lo(__flush_dcache_page), %o0 @@ -487,17 +369,7 @@ cheetah_patch_cachetlbops: * TODO: Make xcall TLB range flushes use the tricks above... -DaveM */ .align 32 - .globl xcall_flush_tlb_page, xcall_flush_tlb_mm, xcall_flush_tlb_range -xcall_flush_tlb_page: - mov PRIMARY_CONTEXT, %g2 - ldxa [%g2] ASI_DMMU, %g3 - stxa %g5, [%g2] ASI_DMMU - stxa %g0, [%g1] ASI_DMMU_DEMAP - stxa %g0, [%g1] ASI_IMMU_DEMAP - stxa %g3, [%g2] ASI_DMMU - retry - nop - + .globl xcall_flush_tlb_mm xcall_flush_tlb_mm: mov PRIMARY_CONTEXT, %g2 mov 0x40, %g4 @@ -508,34 +380,25 @@ xcall_flush_tlb_mm: stxa %g3, [%g2] ASI_DMMU retry -xcall_flush_tlb_range: - sethi %hi(PAGE_SIZE - 1), %g2 - or %g2, %lo(PAGE_SIZE - 1), %g2 - andn %g1, %g2, %g1 - andn %g7, %g2, %g7 - sub %g7, %g1, %g3 - add %g2, 1, %g2 - srlx %g3, PAGE_SHIFT, %g4 - cmp %g4, 96 - - bgu,pn %icc, xcall_flush_tlb_mm - mov PRIMARY_CONTEXT, %g4 - ldxa [%g4] ASI_DMMU, %g7 - sub %g3, %g2, %g3 + .globl xcall_flush_tlb_pending +xcall_flush_tlb_pending: + /* %g5=context, %g1=nr, %g7=vaddrs[] */ + sllx %g1, 3, %g1 + mov PRIMARY_CONTEXT, %g4 + ldxa [%g4] ASI_DMMU, %g2 stxa %g5, [%g4] ASI_DMMU - nop - nop - nop - -1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP - stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP - membar #Sync - brnz,pt %g3, 1b - sub %g3, %g2, %g3 - stxa %g7, [%g4] ASI_DMMU +1: sub %g1, (1 << 3), %g1 + ldx [%g7 + %g1], %g5 + andcc %g5, 0x1, %g0 + be,pn %icc, 2f + + andn %g5, 0x1, %g5 + stxa %g0, [%g5] ASI_IMMU_DEMAP +2: stxa %g0, [%g5] ASI_DMMU_DEMAP + brnz,pt %g1, 1b + membar #Sync + stxa %g2, [%g4] ASI_DMMU retry - nop - nop .globl xcall_flush_tlb_kernel_range xcall_flush_tlb_kernel_range: @@ -555,7 +418,6 @@ xcall_flush_tlb_kernel_range: retry nop nop - nop /* This runs in a very controlled environment, so we do * not need to worry about BH races etc. |