diff -urN pte-highref/arch/alpha/kernel/smp.c pte-high/arch/alpha/kernel/smp.c --- pte-highref/arch/alpha/kernel/smp.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/alpha/kernel/smp.c Fri Feb 22 18:51:24 2002 @@ -120,6 +120,7 @@ cpu_data[cpuid].asn_lock = 0; local_irq_count(cpuid) = 0; local_bh_count(cpuid) = 0; + INIT_LIST_HEAD(&cpu_data[cpuid].pte_cache); } /* diff -urN pte-highref/arch/alpha/mm/init.c pte-high/arch/alpha/mm/init.c --- pte-highref/arch/alpha/mm/init.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/alpha/mm/init.c Fri Feb 22 18:51:24 2002 @@ -43,7 +43,9 @@ struct thread_struct original_pcb; #ifndef CONFIG_SMP -struct pgtable_cache_struct quicklists; +struct pgtable_cache_struct quicklists = { + pte_cache: LIST_HEAD_INIT(quicklists.pte_cache), +}; #endif pgd_t * @@ -82,8 +84,8 @@ pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); freed++; } - if(pte_quicklist) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); + if (!list_empty(&pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast_lifo(NULL, 0)); freed++; } } while(pgtable_cache_size > low); diff -urN pte-highref/arch/i386/kernel/setup.c pte-high/arch/i386/kernel/setup.c --- pte-highref/arch/i386/kernel/setup.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/i386/kernel/setup.c Fri Feb 22 18:51:24 2002 @@ -118,7 +118,12 @@ */ char ignore_irq13; /* set if exception 16 works */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +struct cpuinfo_x86 boot_cpu_data = { + wp_works_ok: -1, + hlt_works_ok: 1, + cpuid_level: -1, + pte_quick: LIST_HEAD_INIT(boot_cpu_data.pte_quick), +}; unsigned long mmu_cr4_features; diff -urN pte-highref/arch/i386/kernel/smpboot.c pte-high/arch/i386/kernel/smpboot.c --- pte-highref/arch/i386/kernel/smpboot.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/i386/kernel/smpboot.c Fri Feb 22 18:51:24 2002 @@ -144,7 +144,7 @@ struct cpuinfo_x86 *c = cpu_data + id; *c = boot_cpu_data; - c->pte_quick = 0; + INIT_LIST_HEAD(&c->pte_quick); c->pmd_quick = 0; c->pgd_quick = 0; c->pgtable_cache_sz = 0; diff -urN pte-highref/arch/i386/kernel/traps.c pte-high/arch/i386/kernel/traps.c --- pte-highref/arch/i386/kernel/traps.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/i386/kernel/traps.c Fri Feb 22 18:51:24 2002 @@ -737,6 +737,7 @@ pte = pte_offset(pmd, page); __free_page(pte_page(*pte)); *pte = mk_pte_phys(__pa(&idt_table), PAGE_KERNEL_RO); + pte_kunmap(pte); /* * Not that any PGE-capable kernel should have the f00f bug ... */ diff -urN pte-highref/arch/i386/kernel/vm86.c pte-high/arch/i386/kernel/vm86.c --- pte-highref/arch/i386/kernel/vm86.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/i386/kernel/vm86.c Fri Feb 22 18:51:24 2002 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -94,7 +95,7 @@ { pgd_t *pgd; pmd_t *pmd; - pte_t *pte; + pte_t *pte, *pte_orig; int i; pgd = pgd_offset(tsk->mm, 0xA0000); @@ -113,12 +114,13 @@ pmd_clear(pmd); return; } - pte = pte_offset(pmd, 0xA0000); + pte_orig = pte = pte_offset(pmd, 0xA0000); for (i = 0; i < 32; i++) { if (pte_present(*pte)) set_pte(pte, pte_wrprotect(*pte)); pte++; } + pte_kunmap(pte_orig); flush_tlb(); } diff -urN pte-highref/arch/i386/mm/fault.c pte-high/arch/i386/mm/fault.c --- pte-highref/arch/i386/mm/fault.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/i386/mm/fault.c Fri Feb 22 18:51:24 2002 @@ -19,6 +19,7 @@ #include #include #include /* For unblank_screen() */ +#include #include #include @@ -327,12 +328,14 @@ asm("movl %%cr3,%0":"=r" (page)); page = ((unsigned long *) __va(page))[address >> 22]; printk(KERN_ALERT "*pde = %08lx\n", page); +#ifndef CONFIG_HIGHMEM if (page & 1) { page &= PAGE_MASK; address &= 0x003ff000; page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; printk(KERN_ALERT "*pte = %08lx\n", page); } +#endif die("Oops", regs, error_code); bust_spinlocks(0); do_exit(SIGKILL); @@ -387,7 +390,9 @@ int offset = __pgd_offset(address); pgd_t *pgd, *pgd_k; pmd_t *pmd, *pmd_k; - pte_t *pte_k; + pte_t *pte_k, *pte_k_orig; + struct page * page; + int present; asm("movl %%cr3,%0":"=r" (pgd)); pgd = offset + (pgd_t *)__va(pgd); @@ -403,8 +408,14 @@ goto no_context; set_pmd(pmd, *pmd_k); - pte_k = pte_offset(pmd_k, address); - if (!pte_present(*pte_k)) + local_irq_disable(); + page = __pmd_page(*pmd_k); + pte_k_orig = pte_k = (pte_t *) kmap_atomic(page, KM_BH_IRQ); + pte_k += __pte_offset(address); + present = pte_present(*pte_k); + kunmap_atomic(pte_k_orig, KM_BH_IRQ); + local_irq_enable(); + if (!present) goto no_context; return; } diff -urN pte-highref/arch/i386/mm/init.c pte-high/arch/i386/mm/init.c --- pte-highref/arch/i386/mm/init.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/i386/mm/init.c Fri Feb 22 18:51:24 2002 @@ -56,8 +56,8 @@ pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); freed++; } - if (pte_quicklist) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); + if (!list_empty(&pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast_lifo(NULL, 0)); freed++; } } while(pgtable_cache_size > low); @@ -76,7 +76,7 @@ pgprot_t kmap_prot; #define kmap_get_fixmap_pte(vaddr) \ - pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + pte_offset_lowmem(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) void __init kmap_init(void) { @@ -143,7 +143,7 @@ printk("PAE BUG #01!\n"); return; } - pte = pte_offset(pmd, vaddr); + pte = pte_offset_lowmem(pmd, vaddr); if (pte_val(*pte)) pte_ERROR(*pte); pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags); @@ -167,42 +167,54 @@ set_pte_phys(address, phys, flags); } -static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) +static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base, int contigous_pte) { pgd_t *pgd; pmd_t *pmd; pte_t *pte; int i, j; - unsigned long vaddr; + int nr_pte; + void * pte_array; - vaddr = start; - i = __pgd_offset(vaddr); - j = __pmd_offset(vaddr); + if (start & ~PAGE_MASK) + BUG(); + + start &= PMD_MASK; + + i = __pgd_offset(start); + j = __pmd_offset(start); pgd = pgd_base + i; - for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { -#if CONFIG_X86_PAE - if (pgd_none(*pgd)) { - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); - if (pmd != pmd_offset(pgd, 0)) - printk("PAE BUG #02!\n"); - } - pmd = pmd_offset(pgd, vaddr); -#else - pmd = (pmd_t *)pgd; -#endif - for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + if (contigous_pte) { + if (start >= end) + BUG(); + nr_pte = (end - start + PMD_SIZE - 1) >> PMD_SHIFT; + pte_array = alloc_bootmem_low_pages(PAGE_SIZE * nr_pte); + } + for ( ; (i < PTRS_PER_PGD) && (start < end); pgd++, i++) { + pmd = pmd_offset(pgd, start); + for (; (j < PTRS_PER_PMD) && (start < end); pmd++, j++) { if (pmd_none(*pmd)) { - pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); - if (pte != pte_offset(pmd, 0)) + if (contigous_pte) { + pte = (pte_t *) pte_array; + pte_array += PAGE_SIZE; + nr_pte--; + } else + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, mk_pmd_phys(__pa(pte), __pgprot(_KERNPG_TABLE))); + if (pte != pte_offset_lowmem(pmd, 0)) BUG(); } - vaddr += PMD_SIZE; + start += PMD_SIZE; } j = 0; } + if (contigous_pte) { + if (nr_pte < 0) + BUG(); + if (nr_pte > 0) + free_bootmem((unsigned long) pte_array, nr_pte * PAGE_SIZE); + } } static void __init pagetable_init (void) @@ -221,8 +233,24 @@ pgd_base = swapper_pg_dir; #if CONFIG_X86_PAE - for (i = 0; i < PTRS_PER_PGD; i++) + /* + * First set all four entries of the pgd. + * Usually only one page is needed here: if PAGE_OFFSET lowered, + * maybe three pages: need not be contiguous, but might as well. + */ + pmd = (pmd_t *)alloc_bootmem_low_pages(KERNEL_PGD_PTRS*PAGE_SIZE); + for (i = 1; i < USER_PGD_PTRS; i++) set_pgd(pgd_base + i, __pgd(1 + __pa(empty_zero_page))); + for (; i < PTRS_PER_PGD; i++, pmd += PTRS_PER_PMD) + set_pgd(pgd_base + i, __pgd(1 + __pa(pmd))); + /* + * Add low memory identity-mappings - SMP needs it when + * starting up on an AP from real-mode. In the non-PAE + * case we already have these mappings through head.S. + * All user-space mappings are explicitly cleared after + * SMP startup. + */ + pgd_base[0] = pgd_base[USER_PGD_PTRS]; #endif i = __pgd_offset(PAGE_OFFSET); pgd = pgd_base + i; @@ -231,30 +259,23 @@ vaddr = i*PGDIR_SIZE; if (end && (vaddr >= end)) break; -#if CONFIG_X86_PAE - pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); -#else - pmd = (pmd_t *)pgd; -#endif - if (pmd != pmd_offset(pgd, 0)) - BUG(); + pmd = pmd_offset(pgd, 0); for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { vaddr = i*PGDIR_SIZE + j*PMD_SIZE; if (end && (vaddr >= end)) break; if (cpu_has_pse) { - unsigned long __pe; + unsigned long prot; set_in_cr4(X86_CR4_PSE); boot_cpu_data.wp_works_ok = 1; - __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr); + prot = _KERNPG_TABLE + _PAGE_PSE; /* Make it "global" too if supported */ if (cpu_has_pge) { set_in_cr4(X86_CR4_PGE); - __pe += _PAGE_GLOBAL; + prot += _PAGE_GLOBAL; } - set_pmd(pmd, __pmd(__pe)); + set_pmd(pmd, mk_pmd_phys(__pa(vaddr), __pgprot(prot))); continue; } @@ -266,43 +287,33 @@ break; *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); } - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); - if (pte_base != pte_offset(pmd, 0)) + set_pmd(pmd, mk_pmd_phys(__pa(pte_base), __pgprot(_KERNPG_TABLE))); + if (pte_base != pte_offset_lowmem(pmd, 0)) BUG(); } } - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, 0, pgd_base); - #if CONFIG_HIGHMEM /* - * Permanent kmaps: + * Permanent kmaps: initialize before the fixmap area + * because here the ptes needs to be contigous. */ vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + fixrange_init(vaddr, vaddr + PKMAP_SIZE, pgd_base, 1); pgd = swapper_pg_dir + __pgd_offset(vaddr); pmd = pmd_offset(pgd, vaddr); - pte = pte_offset(pmd, vaddr); + pte = pte_offset_lowmem(pmd, vaddr); pkmap_page_table = pte; #endif -#if CONFIG_X86_PAE /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; -#endif + vaddr = FIXADDR_START; + fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base, 0); } void __init zap_low_mappings (void) @@ -398,7 +409,7 @@ pgd = swapper_pg_dir + __pgd_offset(vaddr); pmd = pmd_offset(pgd, vaddr); - pte = pte_offset(pmd, vaddr); + pte = pte_offset_lowmem(pmd, vaddr); old_pte = *pte; *pte = mk_pte_phys(0, PAGE_READONLY); local_flush_tlb(); diff -urN pte-highref/arch/i386/mm/ioremap.c pte-high/arch/i386/mm/ioremap.c --- pte-highref/arch/i386/mm/ioremap.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/i386/mm/ioremap.c Fri Feb 22 18:51:24 2002 @@ -9,6 +9,7 @@ */ #include +#include #include #include @@ -53,6 +54,7 @@ if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); + pte_kunmap(pte); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); diff -urN pte-highref/arch/ia64/kernel/setup.c pte-high/arch/ia64/kernel/setup.c --- pte-highref/arch/ia64/kernel/setup.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/ia64/kernel/setup.c Fri Feb 22 18:51:24 2002 @@ -615,5 +615,8 @@ printk ("cpu_init: PAL RSE info failed, assuming 96 physical stacked regs\n"); num_phys_stacked = 96; } + + INIT_LIST_HEAD(&local_cpu_data->pte_quick); + local_cpu_data->phys_stacked_size_p8 = num_phys_stacked*8 + 8; } diff -urN pte-highref/arch/ia64/mm/init.c pte-high/arch/ia64/mm/init.c --- pte-highref/arch/ia64/mm/init.c Fri Feb 22 18:51:15 2002 +++ pte-high/arch/ia64/mm/init.c Fri Feb 22 18:51:24 2002 @@ -48,8 +48,8 @@ free_page((unsigned long)pgd_alloc_one_fast(0)), ++freed; if (pmd_quicklist) free_page((unsigned long)pmd_alloc_one_fast(0, 0)), ++freed; - if (pte_quicklist) - free_page((unsigned long)pte_alloc_one_fast(0, 0)), ++freed; + if (!list_empty(&pte_quicklist)) + pte_free_slow(pte_alloc_one_fast_lifo(0, 0)), ++freed; } while (pgtable_cache_size > low); } return freed; diff -urN pte-highref/drivers/char/drm/drm_proc.h pte-high/drivers/char/drm/drm_proc.h --- pte-highref/drivers/char/drm/drm_proc.h Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/char/drm/drm_proc.h Fri Feb 22 18:51:24 2002 @@ -449,7 +449,7 @@ for (i = vma->vm_start; i < vma->vm_end; i += PAGE_SIZE) { pgd = pgd_offset(vma->vm_mm, i); pmd = pmd_offset(pgd, i); - pte = pte_offset(pmd, i); + pte = pte_offset_atomic(pmd, i); if (pte_present(*pte)) { address = __pa(pte_page(*pte)) + (i & (PAGE_SIZE-1)); @@ -465,6 +465,7 @@ } else { DRM_PROC_PRINT(" 0x%08lx\n", i); } + pte_kunmap(pte); } #endif } diff -urN pte-highref/drivers/char/drm/drm_scatter.h pte-high/drivers/char/drm/drm_scatter.h --- pte-highref/drivers/char/drm/drm_scatter.h Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/char/drm/drm_scatter.h Fri Feb 22 18:51:24 2002 @@ -145,9 +145,10 @@ pte = pte_offset( pmd, i ); if ( !pte_present( *pte ) ) - goto failed; + goto failed_unmap; entry->pagelist[j] = pte_page( *pte ); + pte_kunmap(pte); SetPageReserved( entry->pagelist[j] ); } @@ -205,6 +206,8 @@ return 0; + failed_unmap: + pte_kunmap(pte); failed: DRM(sg_cleanup)( entry ); return -ENOMEM; diff -urN pte-highref/drivers/char/drm/drm_vm.h pte-high/drivers/char/drm/drm_vm.h --- pte-highref/drivers/char/drm/drm_vm.h Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/char/drm/drm_vm.h Fri Feb 22 18:51:24 2002 @@ -170,9 +170,10 @@ pmd = pmd_offset( pgd, i ); if( !pmd_present( *pmd ) ) return NOPAGE_OOM; pte = pte_offset( pmd, i ); - if( !pte_present( *pte ) ) return NOPAGE_OOM; + if( !pte_present( *pte ) ) { pte_kunmap(pte); return NOPAGE_OOM; } page = pte_page(*pte); + pte_kunmap(pte); get_page(page); DRM_DEBUG("shm_nopage 0x%lx\n", address); diff -urN pte-highref/drivers/char/drm-4.0/proc.c pte-high/drivers/char/drm-4.0/proc.c --- pte-highref/drivers/char/drm-4.0/proc.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/char/drm-4.0/proc.c Fri Feb 22 18:51:24 2002 @@ -425,6 +425,7 @@ } else { DRM_PROC_PRINT(" 0x%08lx\n", i); } + pte_kunmap(pte); } #endif } diff -urN pte-highref/drivers/ieee1394/video1394.c pte-high/drivers/ieee1394/video1394.c --- pte-highref/drivers/ieee1394/video1394.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/ieee1394/video1394.c Fri Feb 22 18:51:24 2002 @@ -187,8 +187,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if(pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); diff -urN pte-highref/drivers/media/video/bttv-driver.c pte-high/drivers/media/video/bttv-driver.c --- pte-highref/drivers/media/video/bttv-driver.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/media/video/bttv-driver.c Fri Feb 22 18:51:24 2002 @@ -154,8 +154,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if(pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE - 1)); diff -urN pte-highref/drivers/media/video/cpia.c pte-high/drivers/media/video/cpia.c --- pte-highref/drivers/media/video/cpia.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/media/video/cpia.c Fri Feb 22 18:51:24 2002 @@ -201,8 +201,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if (pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE-1)); diff -urN pte-highref/drivers/media/video/meye.c pte-high/drivers/media/video/meye.c --- pte-highref/drivers/media/video/meye.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/media/video/meye.c Fri Feb 22 18:51:24 2002 @@ -129,8 +129,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if(pte_present(pte)) { ret = (unsigned long)page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE - 1)); diff -urN pte-highref/drivers/sgi/char/graphics.c pte-high/drivers/sgi/char/graphics.c --- pte-highref/drivers/sgi/char/graphics.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/sgi/char/graphics.c Fri Feb 22 18:51:24 2002 @@ -221,6 +221,7 @@ int board = GRAPHICS_CARD (vma->vm_dentry->d_inode->i_rdev); unsigned long virt_add, phys_add; + struct page * page; #ifdef DEBUG printk ("Got a page fault for board %d address=%lx guser=%lx\n", board, @@ -248,7 +249,9 @@ pgd = pgd_offset(current->mm, address); pmd = pmd_offset(pgd, address); pte = pte_offset(pmd, address); - return pte_page(*pte); + page = pte_page(*pte); + pte_kunmap(pte); + return page; } /* diff -urN pte-highref/drivers/usb/ov511.c pte-high/drivers/usb/ov511.c --- pte-highref/drivers/usb/ov511.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/usb/ov511.c Fri Feb 22 18:51:24 2002 @@ -401,8 +401,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if (pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); diff -urN pte-highref/drivers/usb/pwc-if.c pte-high/drivers/usb/pwc-if.c --- pte-highref/drivers/usb/pwc-if.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/usb/pwc-if.c Fri Feb 22 18:51:24 2002 @@ -194,8 +194,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if(pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE - 1)); diff -urN pte-highref/drivers/usb/se401.c pte-high/drivers/usb/se401.c --- pte-highref/drivers/usb/se401.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/usb/se401.c Fri Feb 22 18:51:24 2002 @@ -106,8 +106,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if (pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE - 1)); diff -urN pte-highref/drivers/usb/stv680.c pte-high/drivers/usb/stv680.c --- pte-highref/drivers/usb/stv680.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/usb/stv680.c Fri Feb 22 18:51:24 2002 @@ -133,8 +133,9 @@ if (!pgd_none (*pgd)) { pmd = pmd_offset (pgd, adr); if (!pmd_none (*pmd)) { - ptep = pte_offset (pmd, adr); + ptep = pte_offset_atomic (pmd, adr); pte = *ptep; + pte_kunmap(ptep); if (pte_present (pte)) { ret = (unsigned long) page_address (pte_page (pte)); ret |= (adr & (PAGE_SIZE - 1)); diff -urN pte-highref/drivers/usb/usbvideo.c pte-high/drivers/usb/usbvideo.c --- pte-highref/drivers/usb/usbvideo.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/usb/usbvideo.c Fri Feb 22 18:51:24 2002 @@ -72,8 +72,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if (pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE-1)); diff -urN pte-highref/drivers/usb/vicam.c pte-high/drivers/usb/vicam.c --- pte-highref/drivers/usb/vicam.c Fri Feb 22 18:51:15 2002 +++ pte-high/drivers/usb/vicam.c Fri Feb 22 18:51:24 2002 @@ -115,8 +115,9 @@ if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { - ptep = pte_offset(pmd, adr); + ptep = pte_offset_atomic(pmd, adr); pte = *ptep; + pte_kunmap(ptep); if(pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE - 1)); diff -urN pte-highref/fs/exec.c pte-high/fs/exec.c --- pte-highref/fs/exec.c Fri Feb 22 18:51:15 2002 +++ pte-high/fs/exec.c Fri Feb 22 18:51:24 2002 @@ -274,7 +274,7 @@ if (!pte) goto out; if (!pte_none(*pte)) - goto out; + goto out_unmap; lru_cache_add(page); flush_dcache_page(page); flush_page_to_ram(page); @@ -282,8 +282,12 @@ tsk->mm->rss++; spin_unlock(&tsk->mm->page_table_lock); + pte_kunmap(pte); + /* no need for flush_tlb */ return; +out_unmap: + pte_kunmap(pte); out: spin_unlock(&tsk->mm->page_table_lock); __free_page(page); diff -urN pte-highref/fs/proc/array.c pte-high/fs/proc/array.c --- pte-highref/fs/proc/array.c Fri Feb 22 18:51:15 2002 +++ pte-high/fs/proc/array.c Fri Feb 22 18:51:24 2002 @@ -395,7 +395,7 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, int * pages, int * shared, int * dirty, int * total) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; if (pmd_none(*pmd)) @@ -405,7 +405,7 @@ pmd_clear(pmd); return; } - pte = pte_offset(pmd, address); + pte_orig = pte = pte_offset(pmd, address); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) @@ -432,6 +432,7 @@ if (page_count(pte_page(page)) > 1) ++*shared; } while (address < end); + pte_kunmap(pte_orig); } static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size, diff -urN pte-highref/include/asm-alpha/pgalloc.h pte-high/include/asm-alpha/pgalloc.h --- pte-highref/include/asm-alpha/pgalloc.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-alpha/pgalloc.h Fri Feb 22 18:51:24 2002 @@ -2,6 +2,7 @@ #define _ALPHA_PGALLOC_H #include +#include #ifndef __EXTERN_INLINE #define __EXTERN_INLINE extern inline @@ -234,7 +235,7 @@ extern struct pgtable_cache_struct { unsigned long *pgd_cache; unsigned long *pmd_cache; - unsigned long *pte_cache; + struct list_head pte_cache; unsigned long pgtable_cache_sz; } quicklists; #else @@ -246,7 +247,7 @@ #define pte_quicklist (quicklists.pte_cache) #define pgtable_cache_size (quicklists.pgtable_cache_sz) -#define pmd_populate(mm, pmd, pte) pmd_set(pmd, pte) +#define pmd_populate(mm, pmd, page) do { *(pmd) = mk_pmd(page, __pgprot(_PAGE_TABLE)); } while (0) #define pgd_populate(mm, pgd, pmd) pgd_set(pgd, pmd) extern pgd_t *get_pgd_slow(void); @@ -288,8 +289,8 @@ { unsigned long *ret; - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); + if ((ret = (unsigned long *)pmd_quicklist) != NULL) { + pmd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; } @@ -298,8 +299,8 @@ static inline void pmd_free_fast(pmd_t *pmd) { - *(unsigned long *)pmd = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pmd; + *(unsigned long *)pmd = (unsigned long) pmd_quicklist; + pmd_quicklist = (unsigned long *) pmd; pgtable_cache_size++; } @@ -308,36 +309,48 @@ free_page((unsigned long)pmd); } -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - return pte; + struct list_head * entry = pte_quicklist.next; /* FIFO */ + struct page * page = NULL; + + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); + pgtable_cache_size--; + } + return page; } -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast_lifo(struct mm_struct *mm, + unsigned long address) { - unsigned long *ret; + struct list_head * entry = pte_quicklist.prev; /* LIFO */ + struct page * page = NULL; - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = 0; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); pgtable_cache_size--; } - return (pte_t *)ret; + return page; } -static inline void pte_free_fast(pte_t *pte) +static inline void pte_free_fast(struct page * page) { - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; + list_add(&page->list, &pte_quicklist); pgtable_cache_size++; } -static inline void pte_free_slow(pte_t *pte) +static __inline__ void pte_free_slow(struct page * page) +{ + __free_page(page); +} + +static inline void pte_free_via_pmd(pmd_t pmd) { - free_page((unsigned long)pte); + pte_free_fast(virt_to_page(pte_offset(&pmd, 0))); } #define pte_free(pte) pte_free_fast(pte) diff -urN pte-highref/include/asm-alpha/pgtable.h pte-high/include/asm-alpha/pgtable.h --- pte-highref/include/asm-alpha/pgtable.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-alpha/pgtable.h Fri Feb 22 18:51:24 2002 @@ -221,6 +221,29 @@ }) #endif +#ifndef CONFIG_DISCONTIGMEM +#define mk_pmd(page, pgprot) \ +({ \ + pmd_t pmd; \ + \ + pmd_val(pmd) = ((unsigned long)(page - mem_map) << 32) | \ + pgprot_val(pgprot); \ + pmd; \ +}) +#else +#define mk_pmd(page, pgprot) \ +({ \ + pmd_t pmd; \ + unsigned long pfn; \ + \ + pfn = ((unsigned long)((page)-(page)->zone->zone_mem_map)) << 32; \ + pfn += (page)->zone->zone_start_paddr << (32-PAGE_SHIFT); \ + pmd_val(pmd) = pfn | pgprot_val(pgprot); \ + \ + pmd; \ +}) +#endif + extern inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t pte; pte_val(pte) = (PHYS_TWIDDLE(physpage) << (32-PAGE_SHIFT)) | pgprot_val(pgprot); return pte; } @@ -312,6 +335,12 @@ { return (pte_t *) pmd_page(*dir) + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1)); } +#define pte_offset2(dir, address) pte_offset(dir, address) +#define pte_offset_atomic(dir, address) pte_offset(dir, address) +#define pte_offset_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_offset2_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_kunmap(ptep) do { } while(0) +#define pte_kunmap2(ptep) do { } while(0) extern pgd_t swapper_pg_dir[1024]; diff -urN pte-highref/include/asm-alpha/prefetch.h pte-high/include/asm-alpha/prefetch.h --- pte-highref/include/asm-alpha/prefetch.h Thu Jan 1 01:00:00 1970 +++ pte-high/include/asm-alpha/prefetch.h Fri Feb 22 18:51:24 2002 @@ -0,0 +1,23 @@ +#ifndef __ASM_ALPHA_PREFETCH_H +#define __ASM_ALPHA_PREFETCH_H + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *ptr) +{ + __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); +} + +extern inline void prefetchw(const void *ptr) +{ + __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); +} + +extern inline void spin_lock_prefetch(const void *ptr) +{ + __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); +} + +#endif /* __ASM_ALPHA_PREFETCH_H */ diff -urN pte-highref/include/asm-alpha/processor.h pte-high/include/asm-alpha/processor.h --- pte-highref/include/asm-alpha/processor.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-alpha/processor.h Fri Feb 22 18:51:24 2002 @@ -150,25 +150,4 @@ #define cpu_relax() do { } while (0) -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -extern inline void prefetch(const void *ptr) -{ - __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); -} - -extern inline void prefetchw(const void *ptr) -{ - __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); -} - -extern inline void spin_lock_prefetch(const void *ptr) -{ - __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); -} - - - #endif /* __ASM_ALPHA_PROCESSOR_H */ diff -urN pte-highref/include/asm-alpha/smp.h pte-high/include/asm-alpha/smp.h --- pte-highref/include/asm-alpha/smp.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-alpha/smp.h Fri Feb 22 18:51:24 2002 @@ -3,6 +3,7 @@ #include #include +#include /* HACK: Cabrio WHAMI return value is bogus if more than 8 bits used.. :-( */ @@ -30,7 +31,7 @@ int asn_lock; unsigned long *pgd_cache; unsigned long *pmd_cache; - unsigned long *pte_cache; + struct list_head pte_cache; unsigned long pgtable_cache_sz; unsigned long ipi_count; unsigned long irq_attempt[NR_IRQS]; diff -urN pte-highref/include/asm-i386/fixmap.h pte-high/include/asm-i386/fixmap.h --- pte-highref/include/asm-i386/fixmap.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-i386/fixmap.h Fri Feb 22 18:51:24 2002 @@ -87,7 +87,7 @@ */ #define FIXADDR_TOP (0xffffe000UL) #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE + PAGE_SIZE) #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) diff -urN pte-highref/include/asm-i386/highmem.h pte-high/include/asm-i386/highmem.h --- pte-highref/include/asm-i386/highmem.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-i386/highmem.h Fri Feb 22 18:51:25 2002 @@ -41,40 +41,43 @@ extern void kmap_init(void) __init; -/* - * Right now we initialize only a single pte table. It can be extended - * easily, subsequent pte tables have to be allocated in one physical - * chunk of RAM. - */ -#define PKMAP_BASE (0xfe000000UL) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else +enum km_serie_type { + KM_SERIE_DEFAULT, + KM_SERIE_PAGETABLE, + KM_NR_SERIES, +}; + #define LAST_PKMAP 1024 -#endif -#define LAST_PKMAP_MASK (LAST_PKMAP-1) -#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_SIZE ((LAST_PKMAP*KM_NR_SERIES) << PAGE_SHIFT) +#define PKMAP_BASE (FIXADDR_START - PKMAP_SIZE - PAGE_SIZE) /* left a page in between */ +#define PKMAP_NR(virt) (((virt)-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -extern void * FASTCALL(kmap_high(struct page *page)); -extern void FASTCALL(kunmap_high(struct page *page)); +extern void * FASTCALL(kmap_high(struct page *page, int serie)); +extern void FASTCALL(kunmap_high(void * vaddr, int serie)); -static inline void *kmap(struct page *page) +#define kmap(page) kmap_serie(page, KM_SERIE_DEFAULT) +#define kmap_pagetable(page) kmap_serie(page, KM_SERIE_PAGETABLE) + +static inline void *kmap_serie(struct page *page, int serie) { - if (in_interrupt()) - BUG(); if (page < highmem_start_page) return page_address(page); - return kmap_high(page); + return kmap_high(page, serie); } static inline void kunmap(struct page *page) { - if (in_interrupt()) - BUG(); if (page < highmem_start_page) return; - kunmap_high(page); + kunmap_high(page->virtual, KM_SERIE_DEFAULT); +} + +static inline void kunmap_vaddr(void *kvaddr) +{ + if ((unsigned long) kvaddr < PKMAP_BASE) + return; + kunmap_high(kvaddr, KM_SERIE_DEFAULT); } /* @@ -122,6 +125,20 @@ pte_clear(kmap_pte-idx); __flush_tlb_one(vaddr); #endif +} + +static inline void kunmap_pagetable(void *kvaddr) +{ + /* direct map */ + if ((unsigned long) kvaddr < PKMAP_BASE) + return; + /* atomic kmap */ + if ((unsigned long) kvaddr >= FIXADDR_START) { + kvaddr = (void *) ((unsigned long) kvaddr & PAGE_MASK); + kunmap_atomic(kvaddr, KM_USER0); + return; + } + kunmap_high(kvaddr, KM_SERIE_PAGETABLE); } #endif /* __KERNEL__ */ diff -urN pte-highref/include/asm-i386/page.h pte-high/include/asm-i386/page.h --- pte-highref/include/asm-i386/page.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-i386/page.h Fri Feb 22 18:51:25 2002 @@ -38,20 +38,21 @@ */ #if CONFIG_X86_PAE typedef struct { unsigned long pte_low, pte_high; } pte_t; -typedef struct { unsigned long long pmd; } pmd_t; +typedef struct { unsigned long pmd_low, pmd_high; } pmd_t; typedef struct { unsigned long long pgd; } pgd_t; #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +#define pmd_val(x) ((x).pmd_low | ((unsigned long long)(x).pmd_high << 32)) #else typedef struct { unsigned long pte_low; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pmd_low; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; #define pte_val(x) ((x).pte_low) +#define pmd_val(x) ((x).pmd_low) #endif #define PTE_MASK PAGE_MASK typedef struct { unsigned long pgprot; } pgprot_t; -#define pmd_val(x) ((x).pmd) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) diff -urN pte-highref/include/asm-i386/pgalloc.h pte-high/include/asm-i386/pgalloc.h --- pte-highref/include/asm-i386/pgalloc.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-i386/pgalloc.h Fri Feb 22 18:51:25 2002 @@ -11,8 +11,7 @@ #define pte_quicklist (current_cpu_data.pte_quick) #define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) -#define pmd_populate(mm, pmd, pte) \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) +#define pmd_populate(mm, pmd, page) set_pmd(pmd, mk_pmd(page, __pgprot(_PAGE_TABLE))) /* * Allocate and free page tables. @@ -104,39 +103,48 @@ #endif } -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) { - pte_t *pte; + struct list_head * entry = pte_quicklist.next; /* FIFO */ + struct page * page = NULL; - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - return pte; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); + pgtable_cache_size--; + } + return page; } -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, - unsigned long address) +static inline struct page * pte_alloc_one_fast_lifo(struct mm_struct *mm, + unsigned long address) { - unsigned long *ret; + struct list_head * entry = pte_quicklist.prev; /* LIFO */ + struct page * page = NULL; - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); pgtable_cache_size--; } - return (pte_t *)ret; + return page; } -static inline void pte_free_fast(pte_t *pte) +static inline void pte_free_fast(struct page * page) { - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; + list_add(&page->list, &pte_quicklist); pgtable_cache_size++; } -static __inline__ void pte_free_slow(pte_t *pte) +static inline void pte_free_via_pmd(pmd_t pmd) +{ + pte_free_fast(__pmd_page(pmd)); +} + +static __inline__ void pte_free_slow(struct page * page) { - free_page((unsigned long)pte); + __free_page(page); } #define pte_free(pte) pte_free_fast(pte) diff -urN pte-highref/include/asm-i386/pgtable-2level.h pte-high/include/asm-i386/pgtable-2level.h --- pte-highref/include/asm-i386/pgtable-2level.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-i386/pgtable-2level.h Fri Feb 22 18:51:25 2002 @@ -58,6 +58,9 @@ #define pte_same(a, b) ((a).pte_low == (b).pte_low) #define pte_page(x) (mem_map+((unsigned long)(((x).pte_low >> PAGE_SHIFT)))) #define pte_none(x) (!(x).pte_low) +#define __pmd_page(x) (mem_map + ((x).pmd_low >> PAGE_SHIFT)) +#define pmd_none(x) (!(x).pmd_low) #define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) +#define __mk_pmd(page_nr,pgprot) __pmd(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) #endif /* _I386_PGTABLE_2LEVEL_H */ diff -urN pte-highref/include/asm-i386/pgtable-3level.h pte-high/include/asm-i386/pgtable-3level.h --- pte-highref/include/asm-i386/pgtable-3level.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-i386/pgtable-3level.h Fri Feb 22 18:51:25 2002 @@ -49,8 +49,13 @@ smp_wmb(); ptep->pte_low = pte.pte_low; } -#define set_pmd(pmdptr,pmdval) \ - set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + pmdp->pmd_high = pmd.pmd_high; + smp_wmb(); + pmdp->pmd_low = pmd.pmd_low; +} #define set_pgd(pgdptr,pgdval) \ set_64bit((unsigned long long *)(pgdptr),pgd_val(pgdval)) @@ -88,6 +93,8 @@ #define pte_page(x) (mem_map+(((x).pte_low >> PAGE_SHIFT) | ((x).pte_high << (32 - PAGE_SHIFT)))) #define pte_none(x) (!(x).pte_low && !(x).pte_high) +#define __pmd_page(x) (mem_map + (((x).pmd_low >> PAGE_SHIFT) | ((x).pmd_high << (32-PAGE_SHIFT)))) +#define pmd_none(x) (!(x).pmd_low && !(x).pmd_high) static inline pte_t __mk_pte(unsigned long page_nr, pgprot_t pgprot) { @@ -96,6 +103,15 @@ pte.pte_high = page_nr >> (32 - PAGE_SHIFT); pte.pte_low = (page_nr << PAGE_SHIFT) | pgprot_val(pgprot); return pte; +} + +static inline pmd_t __mk_pmd(unsigned long page_nr, pgprot_t pgprot) +{ + pmd_t pmd; + + pmd.pmd_high = page_nr >> (32 - PAGE_SHIFT); + pmd.pmd_low = (page_nr << PAGE_SHIFT) | pgprot_val(pgprot); + return pmd; } #endif /* _I386_PGTABLE_3LEVEL_H */ diff -urN pte-highref/include/asm-i386/pgtable.h pte-high/include/asm-i386/pgtable.h --- pte-highref/include/asm-i386/pgtable.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-i386/pgtable.h Fri Feb 22 18:51:25 2002 @@ -259,10 +259,9 @@ #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0) -#define pmd_none(x) (!pmd_val(x)) -#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_present(x) ((x).pmd_low & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pmd_bad(x) (((x).pmd_low & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) /* * Permanent address of a page. Obviously must never be @@ -307,9 +306,11 @@ */ #define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) +#define mk_pmd(page, pgprot) __mk_pmd((page) - mem_map, (pgprot)) /* This takes a physical page address that is used by the remapping functions */ #define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) +#define mk_pmd_phys(physpage, pgprot) __mk_pmd((physpage) >> PAGE_SHIFT, pgprot) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { @@ -320,8 +321,54 @@ #define page_pte(page) page_pte_prot(page, __pgprot(0)) -#define pmd_page(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +#define pmd_page(pmd) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + kmap_pagetable(__page); \ +}) + +#define pmd_page_atomic(pmd) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + kmap_atomic(__page, KM_USER0); \ +}) + +#define pmd_page_under_lock(pmd, mm) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + int page_highmem = PageHighMem(__page); \ + void *__kvaddr; \ + \ + if (page_highmem) \ + spin_unlock(&(mm)->page_table_lock); \ + __kvaddr = kmap_pagetable(__page); \ + if (page_highmem) \ + spin_lock(&(mm)->page_table_lock); \ + __kvaddr; \ +}) + +/* + * pte_offset2_under_lock, invoking pmd_page2_under_lock, + * is used by nothing except mremap's get_one_pte: it uses + * the default kmap on src pagetable, before kmap_pagetable + * is used on dst pagetable, to avoid potential deadlock. + */ +#define pmd_page2_under_lock(pmd, mm) \ +({ \ + struct page * __page = __pmd_page(pmd); \ + int page_highmem = PageHighMem(__page); \ + void *__kvaddr; \ + \ + if (page_highmem) \ + spin_unlock(&(mm)->page_table_lock); \ + __kvaddr = kmap(__page); \ + if (page_highmem) \ + spin_lock(&(mm)->page_table_lock); \ + __kvaddr; \ +}) + +#define pmd_page_lowmem(pmd) \ + (__va((pmd).pmd_low & PAGE_MASK)) /* to find an entry in a page-table-directory. */ #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) @@ -341,6 +388,16 @@ ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ __pte_offset(address)) +#define pte_offset_atomic(dir, address) ((pte_t *) pmd_page_atomic(*(dir)) + \ + __pte_offset(address)) +#define pte_offset_under_lock(dir, address, mm) ((pte_t *) pmd_page_under_lock(*(dir), mm) + \ + __pte_offset(address)) +#define pte_offset2_under_lock(dir, address, mm) ((pte_t *) pmd_page2_under_lock(*(dir), mm) + \ + __pte_offset(address)) +#define pte_offset_lowmem(dir, address) ((pte_t *) pmd_page_lowmem(*(dir)) + \ + __pte_offset(address)) +#define pte_kunmap(ptep) kunmap_pagetable(ptep) +#define pte_kunmap2(ptep) kunmap_vaddr(ptep) /* * The i386 doesn't have any external MMU info: the kernel page diff -urN pte-highref/include/asm-i386/prefetch.h pte-high/include/asm-i386/prefetch.h --- pte-highref/include/asm-i386/prefetch.h Thu Jan 1 01:00:00 1970 +++ pte-high/include/asm-i386/prefetch.h Fri Feb 22 18:51:25 2002 @@ -0,0 +1,34 @@ +#ifndef __ASM_I386_PREFETCH_H +#define __ASM_I386_PREFETCH_H + +#include + +/* Prefetch instructions for Pentium III and AMD Athlon */ +#ifdef CONFIG_MPENTIUMIII + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + +#endif /* __ASM_I386_PREFETCH_H */ diff -urN pte-highref/include/asm-i386/processor.h pte-high/include/asm-i386/processor.h --- pte-highref/include/asm-i386/processor.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-i386/processor.h Fri Feb 22 18:51:25 2002 @@ -17,6 +17,7 @@ #include #include #include +#include /* * Default implementation of macro that returns current @@ -51,7 +52,7 @@ unsigned long loops_per_jiffy; unsigned long *pgd_quick; unsigned long *pmd_quick; - unsigned long *pte_quick; + struct list_head pte_quick; unsigned long pgtable_cache_sz; } __attribute__((__aligned__(SMP_CACHE_BYTES))); @@ -481,33 +482,5 @@ } #define cpu_relax() rep_nop() - -/* Prefetch instructions for Pentium III and AMD Athlon */ -#ifdef CONFIG_MPENTIUMIII - -#define ARCH_HAS_PREFETCH -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); -} - -#elif CONFIG_X86_USE_3DNOW - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -extern inline void prefetch(const void *x) -{ - __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); -} - -extern inline void prefetchw(const void *x) -{ - __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); -} -#define spin_lock_prefetch(x) prefetchw(x) - -#endif #endif /* __ASM_I386_PROCESSOR_H */ diff -urN pte-highref/include/asm-ia64/pgalloc.h pte-high/include/asm-ia64/pgalloc.h --- pte-highref/include/asm-ia64/pgalloc.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-ia64/pgalloc.h Fri Feb 22 18:51:25 2002 @@ -108,41 +108,55 @@ } static inline void -pmd_populate (struct mm_struct *mm, pmd_t *pmd_entry, pte_t *pte) +pmd_populate (struct mm_struct *mm, pmd_t *pmd_entry, struct page *page) { - pmd_val(*pmd_entry) = __pa(pte); + *pmd_entry = mk_pmd(page, __pgprot(0)); } -static inline pte_t* +static inline struct page * pte_alloc_one_fast (struct mm_struct *mm, unsigned long addr) { - unsigned long *ret = (unsigned long *)pte_quicklist; + struct list_head *entry = pte_quicklist.next; /* FIFO */ + struct page *page = NULL; - if (__builtin_expect(ret != NULL, 1)) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = 0; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); --pgtable_cache_size; } - return (pte_t *)ret; + return page; } - -static inline pte_t* -pte_alloc_one (struct mm_struct *mm, unsigned long addr) +static inline struct page * +pte_alloc_one_fast_lifo (struct mm_struct *mm, unsigned long addr) { - pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL); + struct list_head *entry = pte_quicklist.prev; /* LIFO */ + struct page *page = NULL; - if (__builtin_expect(pte != NULL, 1)) - clear_page(pte); - return pte; + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); + --pgtable_cache_size; + } + return page; } static inline void -pte_free (pte_t *pte) +pte_free (struct page *page) { - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; + list_add(&page->list, &pte_quicklist); ++pgtable_cache_size; +} + +static inline void +pte_free_slow(struct page * page) +{ + __free_page(page); +} + +static inline void pte_free_via_pmd(pmd_t pmd) +{ + pte_free(virt_to_page(pte_offset(&pmd, 0))); } extern int do_check_pgt_cache (int, int); diff -urN pte-highref/include/asm-ia64/pgtable.h pte-high/include/asm-ia64/pgtable.h --- pte-highref/include/asm-ia64/pgtable.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-ia64/pgtable.h Fri Feb 22 18:51:38 2002 @@ -223,6 +223,13 @@ pte_val(__pte) = ((page - mem_map) << PAGE_SHIFT) | pgprot_val(pgprot); \ __pte; \ }) +#define mk_pmd(page,pgprot) \ +({ \ + pmd_t __pmd; \ + \ + pmd_val(__pmd) = ((page - mem_map) << PAGE_SHIFT) | pgprot_val(pgprot); \ + __pmd; \ +}) /* This takes a physical page address that is used by the remapping functions */ #define mk_pte_phys(physpage, pgprot) \ @@ -347,6 +354,12 @@ /* Find an entry in the third-level page table.. */ #define pte_offset(dir,addr) \ ((pte_t *) pmd_page(*(dir)) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) +#define pte_offset2(dir, address) pte_offset(dir, address) +#define pte_offset_atomic(dir, address) pte_offset(dir, address) +#define pte_offset_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_offset2_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_kunmap(ptep) do { } while(0) +#define pte_kunmap2(ptep) do { } while(0) /* atomic versions of the some PTE manipulations: */ diff -urN pte-highref/include/asm-ia64/processor.h pte-high/include/asm-ia64/processor.h --- pte-highref/include/asm-ia64/processor.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/asm-ia64/processor.h Fri Feb 22 18:51:25 2002 @@ -187,6 +187,7 @@ #ifndef __ASSEMBLY__ #include +#include #include #include @@ -253,7 +254,7 @@ __u64 itm_next; /* interval timer mask value to use for next clock tick */ __u64 *pgd_quick; __u64 *pmd_quick; - __u64 *pte_quick; + struct list_head pte_quick; __u64 pgtable_cache_sz; /* CPUID-derived information: */ __u64 ppn; diff -urN pte-highref/include/linux/highmem.h pte-high/include/linux/highmem.h --- pte-highref/include/linux/highmem.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/linux/highmem.h Fri Feb 22 18:51:25 2002 @@ -67,10 +67,12 @@ static inline void *kmap(struct page *page) { return page_address(page); } -#define kunmap(page) do { } while (0) - +#define kunmap(page) do { } while (0) +#define kunmap_vaddr(vaddr) do { } while (0) +#define kmap_pagetable(page) kmap(page) +#define kunmap_pagetable(vaddr) do { } while (0) #define kmap_atomic(page,idx) kmap(page) -#define kunmap_atomic(page,idx) kunmap(page) +#define kunmap_atomic(vaddr,idx) do { } while (0) #define bh_kmap(bh) ((bh)->b_data) #define bh_kunmap(bh) do { } while (0) @@ -91,6 +93,13 @@ { clear_page(kmap(page)); kunmap(page); +} + +static inline void clear_pagetable(struct page *page) +{ + void * vaddr = kmap_pagetable(page); + clear_page(vaddr); + kunmap_pagetable(vaddr); } /* diff -urN pte-highref/include/linux/list.h pte-high/include/linux/list.h --- pte-highref/include/linux/list.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/linux/list.h Fri Feb 22 18:51:25 2002 @@ -3,8 +3,6 @@ #if defined(__KERNEL__) || defined(_LVM_H_INCLUDE) -#include - /* * Simple doubly linked list implementation. * @@ -142,6 +140,8 @@ */ #define list_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +#include /** * list_for_each - iterate over a list diff -urN pte-highref/include/linux/prefetch.h pte-high/include/linux/prefetch.h --- pte-highref/include/linux/prefetch.h Fri Feb 22 18:51:15 2002 +++ pte-high/include/linux/prefetch.h Fri Feb 22 18:51:25 2002 @@ -10,7 +10,7 @@ #ifndef _LINUX_PREFETCH_H #define _LINUX_PREFETCH_H -#include +#include #include /* diff -urN pte-highref/mm/filemap.c pte-high/mm/filemap.c --- pte-highref/mm/filemap.c Fri Feb 22 18:51:15 2002 +++ pte-high/mm/filemap.c Fri Feb 22 18:51:25 2002 @@ -2023,7 +2023,7 @@ unsigned long address, unsigned long size, struct vm_area_struct *vma, unsigned long offset, unsigned int flags) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; int error; @@ -2034,7 +2034,7 @@ pmd_clear(pmd); return 0; } - pte = pte_offset(pmd, address); + pte_orig = pte = pte_offset_atomic(pmd, address); offset += address & PMD_MASK; address &= ~PMD_MASK; end = address + size; @@ -2046,6 +2046,7 @@ address += PAGE_SIZE; pte++; } while (address && (address < end)); + pte_kunmap(pte_orig); return error; } diff -urN pte-highref/mm/highmem.c pte-high/mm/highmem.c --- pte-highref/mm/highmem.c Fri Feb 22 18:51:15 2002 +++ pte-high/mm/highmem.c Fri Feb 22 18:51:25 2002 @@ -21,6 +21,8 @@ #include #include #include +#include +#include /* * Virtual_count is not a pure "count". @@ -30,14 +32,31 @@ * since the last TLB flush - so we can't use it. * n means that there are (n-1) current users of it. */ -static int pkmap_count[LAST_PKMAP]; +static int pkmap_count[LAST_PKMAP*KM_NR_SERIES]; +static int pkmap_holds[KM_NR_SERIES]; static unsigned int last_pkmap_nr; static spinlock_cacheline_t kmap_lock_cacheline = {SPIN_LOCK_UNLOCKED}; #define kmap_lock kmap_lock_cacheline.lock +#if HIGHMEM_DEBUG +static int kmap_ready; +#endif pte_t * pkmap_page_table; -static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); +static wait_queue_head_t pkmap_map_wait[KM_NR_SERIES]; + +static __init int init_kmap(void) +{ + int i; + + for (i = 0; i < KM_NR_SERIES; i++) + init_waitqueue_head(pkmap_map_wait + i); +#if HIGHMEM_DEBUG + kmap_ready = 1; +#endif + return 0; +} +__initcall(init_kmap); static void flush_all_zero_pkmaps(void) { @@ -45,7 +64,7 @@ flush_cache_all(); - for (i = 0; i < LAST_PKMAP; i++) { + for (i = 0; i < LAST_PKMAP*KM_NR_SERIES; i++) { struct page *page; /* @@ -62,6 +81,8 @@ if (pte_none(pkmap_page_table[i])) BUG(); + page = pte_page(pkmap_page_table[i]); + page->virtual = NULL; /* * Don't need an atomic fetch-and-clear op here; * no-one has the page mapped, and cannot get at @@ -69,10 +90,8 @@ * getting the kmap_lock (which is held here). * So no dangers, even with speculative execution. */ - page = pte_page(pkmap_page_table[i]); pte_clear(&pkmap_page_table[i]); - page->virtual = NULL; } flush_tlb_all(); } @@ -80,43 +99,14 @@ static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; - int count; -start: - count = LAST_PKMAP; /* Find an empty entry */ - for (;;) { - last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; - if (!last_pkmap_nr) { + do { + if (++last_pkmap_nr >= LAST_PKMAP*KM_NR_SERIES) { + last_pkmap_nr = 0; flush_all_zero_pkmaps(); - count = LAST_PKMAP; } - if (!pkmap_count[last_pkmap_nr]) - break; /* Found a usable entry */ - if (--count) - continue; - - /* - * Sleep for somebody else to unmap their entries - */ - { - DECLARE_WAITQUEUE(wait, current); - - current->state = TASK_UNINTERRUPTIBLE; - add_wait_queue(&pkmap_map_wait, &wait); - spin_unlock(&kmap_lock); - schedule(); - remove_wait_queue(&pkmap_map_wait, &wait); - spin_lock(&kmap_lock); - - /* Somebody else might have mapped it while we slept */ - if (page->virtual) - return (unsigned long) page->virtual; - - /* Re-start */ - goto start; - } - } + } while (pkmap_count[last_pkmap_nr]); vaddr = PKMAP_ADDR(last_pkmap_nr); set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); @@ -126,17 +116,39 @@ return vaddr; } -void *kmap_high(struct page *page) +static inline void wait_for_map(int serie) +{ + DECLARE_WAITQUEUE(wait, current); + + current->state = TASK_UNINTERRUPTIBLE; + add_wait_queue(&pkmap_map_wait[serie], &wait); + spin_unlock(&kmap_lock); + schedule(); + remove_wait_queue(&pkmap_map_wait[serie], &wait); + spin_lock(&kmap_lock); +} + +void *kmap_high(struct page *page, int serie) { unsigned long vaddr; +#if HIGHMEM_DEBUG + if (!kmap_ready) + BUG(); +#endif + if (in_interrupt()) + BUG(); + /* * For highmem pages, we can't trust "virtual" until - * after we have the lock. - * - * We cannot call this from interrupts, as it may block + * after we have the lock; and even if virtual is already + * set, we cannot let a serie exceed its quota of maps + * concurrently in use, or else we _might_ deadlock. */ spin_lock(&kmap_lock); + while (pkmap_holds[serie] >= LAST_PKMAP) + wait_for_map(serie); + pkmap_holds[serie]++; vaddr = (unsigned long) page->virtual; if (!vaddr) vaddr = map_new_virtual(page); @@ -147,44 +159,41 @@ return (void*) vaddr; } -void kunmap_high(struct page *page) +void kunmap_high(void * vaddr, int serie) { - unsigned long vaddr; unsigned long nr; int need_wakeup; - spin_lock(&kmap_lock); - vaddr = (unsigned long) page->virtual; - if (!vaddr) + if (in_interrupt()) + BUG(); + if ((unsigned long) vaddr < PKMAP_BASE) BUG(); - nr = PKMAP_NR(vaddr); + nr = PKMAP_NR((unsigned long) vaddr); /* * A count must never go down to zero * without a TLB flush! */ need_wakeup = 0; - switch (--pkmap_count[nr]) { - case 0: + spin_lock(&kmap_lock); + if (--pkmap_count[nr] <= 0) BUG(); - case 1: - /* - * Avoid an unnecessary wake_up() function call. - * The common case is pkmap_count[] == 1, but - * no waiters. - * The tasks queued in the wait-queue are guarded - * by both the lock in the wait-queue-head and by - * the kmap_lock. As the kmap_lock is held here, - * no need for the wait-queue-head's lock. Simply - * test if the queue is empty. - */ - need_wakeup = waitqueue_active(&pkmap_map_wait); - } + if (--pkmap_holds[serie] < 0) + BUG(); + /* + * Avoid an unnecessary wake_up() function call. + * The tasks queued in the wait-queue are guarded + * by both the lock in the wait-queue-head and by + * the kmap_lock. As the kmap_lock is held here, + * no need for the wait-queue-head's lock. Simply + * test if the queue is empty. + */ + need_wakeup = waitqueue_active(&pkmap_map_wait[serie]); spin_unlock(&kmap_lock); /* do wake-up, if needed, race-free outside of the spin lock */ if (need_wakeup) - wake_up(&pkmap_map_wait); + wake_up(&pkmap_map_wait[serie]); } #define POOL_SIZE 32 diff -urN pte-highref/mm/memory.c pte-high/mm/memory.c --- pte-highref/mm/memory.c Fri Feb 22 18:51:15 2002 +++ pte-high/mm/memory.c Fri Feb 22 18:51:25 2002 @@ -90,7 +90,7 @@ */ static inline void free_one_pmd(pmd_t * dir) { - pte_t * pte; + pmd_t pmd; if (pmd_none(*dir)) return; @@ -99,9 +99,9 @@ pmd_clear(dir); return; } - pte = pte_offset(dir, 0); + pmd = *dir; pmd_clear(dir); - pte_free(pte); + pte_free_via_pmd(pmd); } static inline void free_one_pgd(pgd_t * dir) @@ -234,10 +234,10 @@ goto cont_copy_pmd_range; } - src_pte = pte_offset(src_pmd, address); dst_pte = pte_alloc(dst, dst_pmd, address); if (!dst_pte) goto nomem; + src_pte = pte_offset_atomic(src_pmd, address); spin_lock(&src->page_table_lock); do { @@ -272,13 +272,19 @@ cont_copy_pte_range: set_pte(dst_pte, pte); cont_copy_pte_range_noset: address += PAGE_SIZE; - if (address >= end) + if (address >= end) { + pte_kunmap(src_pte); + pte_kunmap(dst_pte); goto out_unlock; + } src_pte++; dst_pte++; } while ((unsigned long)src_pte & PTE_TABLE_MASK); spin_unlock(&src->page_table_lock); - + + pte_kunmap((src_pte - 1)); + pte_kunmap((dst_pte - 1)); + cont_copy_pmd_range: src_pmd++; dst_pmd++; } while ((unsigned long)src_pmd & PMD_TABLE_MASK); @@ -305,7 +311,7 @@ static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) { unsigned long offset; - pte_t * ptep; + pte_t * ptep, * ptep_orig; int freed = 0; if (pmd_none(*pmd)) @@ -315,7 +321,7 @@ pmd_clear(pmd); return 0; } - ptep = pte_offset(pmd, address); + ptep_orig = ptep = pte_offset_atomic(pmd, address); offset = address & ~PMD_MASK; if (offset + size > PMD_SIZE) size = PMD_SIZE - offset; @@ -335,6 +341,7 @@ pte_clear(ptep); } } + pte_kunmap(ptep_orig); return freed; } @@ -427,11 +434,10 @@ if (pmd_none(*pmd) || pmd_bad(*pmd)) goto out; - ptep = pte_offset(pmd, address); - if (!ptep) - goto out; + ptep = pte_offset_atomic(pmd, address); pte = *ptep; + pte_kunmap(ptep); if (pte_present(pte)) { if (!write || (pte_write(pte) && pte_dirty(pte))) @@ -788,6 +794,7 @@ if (!pte) return -ENOMEM; zeromap_pte_range(pte, address, end - address, prot); + pte_kunmap(pte); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -868,6 +875,7 @@ if (!pte) return -ENOMEM; remap_pte_range(pte, address, end - address, address + phys_addr, prot); + pte_kunmap(pte); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -1392,8 +1400,11 @@ if (pmd) { pte_t * pte = pte_alloc(mm, pmd, address); - if (pte) - return handle_pte_fault(mm, vma, address, write_access, pte); + if (pte) { + int ret = handle_pte_fault(mm, vma, address, write_access, pte); + pte_kunmap(pte); + return ret; + } } spin_unlock(&mm->page_table_lock); return -1; @@ -1436,6 +1447,16 @@ return pmd_offset(pgd, address); } +static inline struct page * pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page * page; + + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (page) + clear_pagetable(page); + return page; +} + /* * Allocate the page table directory. * @@ -1444,16 +1465,18 @@ */ pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { + pte_t * pte; + if (pmd_none(*pmd)) { - pte_t *new; + struct page * page; /* "fast" allocation can happen without dropping the lock.. */ - new = pte_alloc_one_fast(mm, address); - if (!new) { + page = pte_alloc_one_fast(mm, address); + if (!page) { spin_unlock(&mm->page_table_lock); - new = pte_alloc_one(mm, address); + page = pte_alloc_one(mm, address); spin_lock(&mm->page_table_lock); - if (!new) + if (unlikely(!page)) return NULL; /* @@ -1461,15 +1484,16 @@ * entry, as somebody else could have populated it.. */ if (!pmd_none(*pmd)) { - pte_free(new); + pte_free(page); check_pgt_cache(); goto out; } } - pmd_populate(mm, pmd, new); + pmd_populate(mm, pmd, page); } out: - return pte_offset(pmd, address); + pte = pte_offset_under_lock(pmd, address, mm); + return pte; } int make_pages_present(unsigned long addr, unsigned long end) diff -urN pte-highref/mm/mprotect.c pte-high/mm/mprotect.c --- pte-highref/mm/mprotect.c Fri Feb 22 18:51:15 2002 +++ pte-high/mm/mprotect.c Fri Feb 22 18:51:25 2002 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -15,7 +16,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, pgprot_t newprot) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; if (pmd_none(*pmd)) @@ -25,7 +26,7 @@ pmd_clear(pmd); return; } - pte = pte_offset(pmd, address); + pte_orig = pte = pte_offset_atomic(pmd, address); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) @@ -44,6 +45,7 @@ address += PAGE_SIZE; pte++; } while (address && (address < end)); + pte_kunmap(pte_orig); } static inline void change_pmd_range(pgd_t * pgd, unsigned long address, diff -urN pte-highref/mm/mremap.c pte-high/mm/mremap.c --- pte-highref/mm/mremap.c Fri Feb 22 18:51:15 2002 +++ pte-high/mm/mremap.c Fri Feb 22 18:51:25 2002 @@ -9,13 +9,14 @@ #include #include #include +#include #include #include extern int vm_enough_memory(long pages); -static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) +static pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) { pgd_t * pgd; pmd_t * pmd; @@ -39,9 +40,11 @@ goto end; } - pte = pte_offset(pmd, addr); - if (pte_none(*pte)) + pte = pte_offset2_under_lock(pmd, addr, mm); + if (pte_none(*pte)) { + pte_kunmap2(pte); pte = NULL; + } end: return pte; } @@ -57,32 +60,32 @@ return pte; } -static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst) +static inline void copy_one_pte(pte_t * src, pte_t * dst) { - int error = 0; pte_t pte; if (!pte_none(*src)) { pte = ptep_get_and_clear(src); - if (!dst) { - /* No dest? We must put it back. */ - dst = src; - error++; - } set_pte(dst, pte); } - return error; } static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned long new_addr) { int error = 0; - pte_t * src; + pte_t * src, * dst; spin_lock(&mm->page_table_lock); src = get_one_pte(mm, old_addr); - if (src) - error = copy_one_pte(mm, src, alloc_one_pte(mm, new_addr)); + if (src) { + dst = alloc_one_pte(mm, new_addr); + if (dst) { + copy_one_pte(src, dst); + pte_kunmap(dst); + } else + error = 1; + pte_kunmap2(src); + } spin_unlock(&mm->page_table_lock); return error; } diff -urN pte-highref/mm/swapfile.c pte-high/mm/swapfile.c --- pte-highref/mm/swapfile.c Fri Feb 22 18:51:15 2002 +++ pte-high/mm/swapfile.c Fri Feb 22 18:51:25 2002 @@ -402,7 +402,7 @@ unsigned long address, unsigned long size, unsigned long offset, swp_entry_t entry, struct page* page) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; if (pmd_none(*dir)) @@ -412,7 +412,7 @@ pmd_clear(dir); return; } - pte = pte_offset(dir, address); + pte_orig = pte = pte_offset_atomic(dir, address); offset += address & PMD_MASK; address &= ~PMD_MASK; end = address + size; @@ -423,6 +423,7 @@ address += PAGE_SIZE; pte++; } while (address && (address < end)); + pte_kunmap(pte_orig); } /* mmlist_lock and vma->vm_mm->page_table_lock are held */ diff -urN pte-highref/mm/vmalloc.c pte-high/mm/vmalloc.c --- pte-highref/mm/vmalloc.c Fri Feb 22 18:51:15 2002 +++ pte-high/mm/vmalloc.c Fri Feb 22 18:51:25 2002 @@ -21,7 +21,7 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long end; if (pmd_none(*pmd)) @@ -31,7 +31,7 @@ pmd_clear(pmd); return; } - pte = pte_offset(pmd, address); + pte_orig = pte = pte_offset_atomic(pmd, address); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) @@ -51,6 +51,7 @@ } printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); } while (address < end); + pte_kunmap(pte_orig); } static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size) @@ -126,10 +127,13 @@ if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { + int err; pte_t * pte = pte_alloc(&init_mm, pmd, address); if (!pte) return -ENOMEM; - if (alloc_area_pte(pte, address, end - address, gfp_mask, prot)) + err = alloc_area_pte(pte, address, end - address, gfp_mask, prot); + pte_kunmap(pte); + if (err) return -ENOMEM; address = (address + PMD_SIZE) & PMD_MASK; pmd++; diff -urN pte-highref/mm/vmscan.c pte-high/mm/vmscan.c --- pte-highref/mm/vmscan.c Fri Feb 22 18:51:15 2002 +++ pte-high/mm/vmscan.c Fri Feb 22 18:51:25 2002 @@ -170,7 +170,7 @@ /* mm->page_table_lock is held. mmap_sem is not held */ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone) { - pte_t * pte; + pte_t * pte, * pte_orig; unsigned long pmd_end; if (pmd_none(*dir)) @@ -181,7 +181,7 @@ return count; } - pte = pte_offset(dir, address); + pte_orig = pte = pte_offset_atomic(dir, address); pmd_end = (address + PMD_SIZE) & PMD_MASK; if (end > pmd_end) @@ -202,6 +202,7 @@ address += PAGE_SIZE; pte++; } while (address && (address < end)); + pte_kunmap(pte_orig); mm->swap_address = address; return count; }