From: William Lee Irwin III Here's highpmd. This allocates L2 pagetables from highmem, decreasing the per-process lowmem overhead on CONFIG_HIGHMEM64G from 20KB to 8KB. Some attempts were made to update non-i386 architectures to the new API's, though they're entirely untested. It's been tested for a while in -wli on i386 machines, both lowmem and highmem boxen. arch/alpha/mm/remap.c | 2 arch/arm/mach-arc/mm.c | 11 +- arch/arm/mm/consistent.c | 2 arch/arm/mm/ioremap.c | 2 arch/arm/mm/minicache.c | 2 arch/arm/mm/mm-armv.c | 12 +- arch/arm26/mm/mm-memc.c | 4 arch/cris/mm/ioremap.c | 2 arch/i386/Kconfig | 10 ++ arch/i386/kernel/vm86.c | 9 + arch/i386/mm/fault.c | 14 ++ arch/i386/mm/hugetlbpage.c | 20 +++- arch/i386/mm/init.c | 21 +--- arch/i386/mm/ioremap.c | 2 arch/i386/mm/pageattr.c | 6 - arch/i386/mm/pgtable.c | 23 ++-- arch/ia64/mm/hugetlbpage.c | 4 arch/ia64/mm/init.c | 4 arch/m68k/kernel/head.S | 2 arch/m68k/mm/kmap.c | 2 arch/m68k/sun3x/dvma.c | 2 arch/mips/mm/ioremap.c | 2 arch/parisc/kernel/pci-dma.c | 2 arch/parisc/mm/ioremap.c | 2 arch/ppc64/mm/init.c | 2 arch/s390/mm/ioremap.c | 2 arch/sh/mm/ioremap.c | 6 - arch/sparc/mm/generic.c | 8 - arch/sparc/mm/srmmu.c | 4 arch/sparc/mm/sun4c.c | 4 arch/sparc64/mm/generic.c | 8 - arch/sparc64/mm/hugetlbpage.c | 14 +- arch/x86_64/ia32/syscall32.c | 7 + arch/x86_64/mm/ioremap.c | 2 drivers/char/drm/drm_memory.h | 2 fs/exec.c | 7 + include/asm-alpha/pgalloc.h | 28 +++-- include/asm-alpha/pgtable.h | 12 ++ include/asm-arm/pgalloc.h | 3 include/asm-arm/pgtable.h | 5 + include/asm-arm26/pgalloc.h | 3 include/asm-arm26/pgtable.h | 10 +- include/asm-cris/pgalloc.h | 3 include/asm-cris/pgtable.h | 6 + include/asm-h8300/pgtable.h | 5 + include/asm-i386/highmem.h | 4 include/asm-i386/kmap_types.h | 18 ++- include/asm-i386/pgalloc.h | 3 include/asm-i386/pgtable-2level.h | 14 +- include/asm-i386/pgtable-3level.h | 28 +++++ include/asm-i386/pgtable.h | 2 include/asm-ia64/pgalloc.h | 17 ++- include/asm-ia64/pgtable.h | 11 +- include/asm-m68k/motorola_pgalloc.h | 23 +++- include/asm-m68k/motorola_pgtable.h | 7 + include/asm-m68k/sun3_pgalloc.h | 3 include/asm-m68knommu/pgtable.h | 7 + include/asm-mips/pgalloc.h | 3 include/asm-mips/pgtable.h | 6 + include/asm-mips64/pgalloc.h | 17 ++- include/asm-mips64/pgtable.h | 12 ++ include/asm-parisc/pgalloc.h | 22 +++- include/asm-parisc/pgtable.h | 11 +- include/asm-ppc/pgalloc.h | 3 include/asm-ppc/pgtable.h | 9 + include/asm-ppc64/pgalloc.h | 17 ++- include/asm-ppc64/pgtable.h | 11 +- include/asm-s390/pgalloc.h | 22 +++- include/asm-s390/pgtable.h | 7 + include/asm-sh/pgalloc.h | 3 include/asm-sh/pgtable-2level.h | 9 + include/asm-sparc/pgalloc.h | 17 ++- include/asm-sparc/pgtable.h | 10 +- include/asm-sparc64/pgalloc.h | 15 ++- include/asm-sparc64/pgtable.h | 10 +- include/asm-um/pgalloc.h | 3 include/asm-um/pgtable.h | 6 + include/asm-v850/pgtable.h | 5 + include/asm-x86_64/pgalloc.h | 19 ++- include/asm-x86_64/pgtable.h | 10 +- include/linux/mm.h | 14 +- mm/fremap.c | 5 - mm/memory.c | 172 ++++++++++++++++++++++++++---------- mm/mprotect.c | 3 mm/mremap.c | 17 ++- mm/msync.c | 3 mm/slab.c | 2 mm/swapfile.c | 3 mm/vmalloc.c | 4 89 files changed, 644 insertions(+), 266 deletions(-) diff -puN arch/alpha/mm/remap.c~highpmd arch/alpha/mm/remap.c --- 25/arch/alpha/mm/remap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/alpha/mm/remap.c 2003-07-04 22:24:13.000000000 -0700 @@ -73,7 +73,7 @@ __alpha_remap_area_pages(unsigned long a spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_kernel(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; diff -puN arch/arm26/mm/mm-memc.c~highpmd arch/arm26/mm/mm-memc.c --- 25/arch/arm26/mm/mm-memc.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/arm26/mm/mm-memc.c 2003-07-04 22:24:13.000000000 -0700 @@ -81,7 +81,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm goto no_pgd; /* - * This lock is here just to satisfy pmd_alloc and pte_lock + * This lock is here just to satisfy pmd_alloc_kernel() and pte_lock * FIXME: I bet we could avoid taking it pretty much altogether */ spin_lock(&mm->page_table_lock); @@ -90,7 +90,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm * On ARM, first page must always be allocated since it contains * the machine vectors. */ - new_pmd = pmd_alloc(mm, new_pgd, 0); + new_pmd = pmd_alloc_kernel(mm, new_pgd, 0); if (!new_pmd) goto no_pmd; diff -puN arch/arm/mach-arc/mm.c~highpmd arch/arm/mach-arc/mm.c --- 25/arch/arm/mach-arc/mm.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/arm/mach-arc/mm.c 2003-07-04 22:24:13.000000000 -0700 @@ -66,7 +66,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm goto no_pgd; /* - * This lock is here just to satisfy pmd_alloc and pte_lock + * This lock is here just to satisfy pmd_alloc_map() and pte_lock */ spin_lock(&mm->page_table_lock); @@ -74,13 +74,15 @@ pgd_t *get_pgd_slow(struct mm_struct *mm * On ARM, first page must always be allocated since it contains * the machine vectors. */ - new_pmd = pmd_alloc(mm, new_pgd, 0); + new_pmd = pmd_alloc_map(mm, new_pgd, 0); if (!new_pmd) goto no_pmd; - new_pte = pte_alloc_map(mm, new_pmd, 0); - if (!new_pte) + new_pte = pte_alloc_map(mm, new_pgd, &new_pmd, 0); + if (!new_pte) { + pmd_unmap(new_pmd); goto no_pte; + } init_pgd = pgd_offset_k(0); init_pmd = pmd_offset(init_pgd, 0); @@ -88,6 +90,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm set_pte(new_pte, *init_pte); pte_unmap_nested(init_pte); pte_unmap(new_pte); + pmd_unmap(new_pmd); /* * most of the page table entries are zeroed diff -puN arch/arm/mm/consistent.c~highpmd arch/arm/mm/consistent.c --- 25/arch/arm/mm/consistent.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/arm/mm/consistent.c 2003-07-04 22:24:13.000000000 -0700 @@ -325,7 +325,7 @@ static int __init consistent_init(void) do { pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); + pmd = pmd_alloc_kernel(&init_mm, pgd, CONSISTENT_BASE); if (!pmd) { printk(KERN_ERR "consistent_init: out of pmd tables\n"); return -ENOMEM; diff -puN arch/arm/mm/ioremap.c~highpmd arch/arm/mm/ioremap.c --- 25/arch/arm/mm/ioremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/arm/mm/ioremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -95,7 +95,7 @@ static int remap_area_pages(unsigned lon spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_kernel(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; diff -puN arch/arm/mm/minicache.c~highpmd arch/arm/mm/minicache.c --- 25/arch/arm/mm/minicache.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/arm/mm/minicache.c 2003-07-04 22:24:13.000000000 -0700 @@ -57,7 +57,7 @@ static int __init minicache_init(void) pmd_t *pmd; pgd = pgd_offset_k(minicache_address); - pmd = pmd_alloc(&init_mm, pgd, minicache_address); + pmd = pmd_alloc_kernel(&init_mm, pgd, minicache_address); if (!pmd) BUG(); minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address); diff -puN arch/arm/mm/mm-armv.c~highpmd arch/arm/mm/mm-armv.c --- 25/arch/arm/mm/mm-armv.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/arm/mm/mm-armv.c 2003-07-04 22:24:13.000000000 -0700 @@ -131,7 +131,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm if (vectors_base() == 0) { /* - * This lock is here just to satisfy pmd_alloc and pte_lock + * This lock is here just to satisfy pmd_alloc_map() and pte_lock */ spin_lock(&mm->page_table_lock); @@ -139,20 +139,22 @@ pgd_t *get_pgd_slow(struct mm_struct *mm * On ARM, first page must always be allocated since it * contains the machine vectors. */ - new_pmd = pmd_alloc(mm, new_pgd, 0); + new_pmd = pmd_alloc_map(mm, new_pgd, 0); if (!new_pmd) goto no_pmd; - new_pte = pte_alloc_map(mm, new_pmd, 0); - if (!new_pte) + new_pte = pte_alloc_map(mm, new_pgd, &new_pmd, 0); + if (!new_pte) { + pmd_unmap(new_pmd); goto no_pte; + } init_pmd = pmd_offset(init_pgd, 0); init_pte = pte_offset_map_nested(init_pmd, 0); set_pte(new_pte, *init_pte); pte_unmap_nested(init_pte); pte_unmap(new_pte); - + pmd_unmap(new_pmd); spin_unlock(&mm->page_table_lock); } diff -puN arch/cris/mm/ioremap.c~highpmd arch/cris/mm/ioremap.c --- 25/arch/cris/mm/ioremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/cris/mm/ioremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -78,7 +78,7 @@ static int remap_area_pages(unsigned lon spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_kernel(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; diff -puN arch/i386/Kconfig~highpmd arch/i386/Kconfig --- 25/arch/i386/Kconfig~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/i386/Kconfig 2003-07-04 22:24:13.000000000 -0700 @@ -723,6 +723,16 @@ config HIGHPTE low memory. Setting this option will put user-space page table entries in high memory. +config HIGHPMD + bool "Allocate 2nd-level pagetables from highmem" + depends on HIGHMEM64G && HIGHPTE + help + The VM uses one lowmem-allocated pmd entry for each pagetable + page of physical memory allocated, and preallocates them all + for 12KB of per-process lowmem overhead. For systems with + extreme amounts of highmem, this cannot be tolerated. Setting + this option will put userspace 2nd-level pagetables in highmem. + config MATH_EMULATION bool "Math emulation" ---help--- diff -puN arch/i386/kernel/vm86.c~highpmd arch/i386/kernel/vm86.c --- 25/arch/i386/kernel/vm86.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/i386/kernel/vm86.c 2003-07-04 22:24:13.000000000 -0700 @@ -144,12 +144,14 @@ static void mark_screen_rdonly(struct ta pgd_clear(pgd); goto out; } - pmd = pmd_offset(pgd, 0xA0000); - if (pmd_none(*pmd)) + pmd = pmd_offset_map(pgd, 0xA0000); + if (pmd_none(*pmd)) { + pmd_unmap(pmd); goto out; - if (pmd_bad(*pmd)) { + } else if (pmd_bad(*pmd)) { pmd_ERROR(*pmd); pmd_clear(pmd); + pmd_unmap(pmd); goto out; } pte = mapped = pte_offset_map(pmd, 0xA0000); @@ -159,6 +161,7 @@ static void mark_screen_rdonly(struct ta pte++; } pte_unmap(mapped); + pmd_unmap(pmd); out: spin_unlock(&tsk->mm->page_table_lock); preempt_enable(); diff -puN arch/i386/mm/fault.c~highpmd arch/i386/mm/fault.c --- 25/arch/i386/mm/fault.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/i386/mm/fault.c 2003-07-04 22:24:13.000000000 -0700 @@ -253,6 +253,13 @@ no_context: printk(" printing eip:\n"); printk("%08lx\n", regs->eip); asm("movl %%cr3,%0":"=r" (page)); +#ifdef CONFIG_HIGHPMD /* Oh boy. Error reporting is going to blow major goats. */ + printk(KERN_ALERT "%%cr3 = 0x%lx\n", page); + /* Mask off flag bits. It should end up 32B-aligned. */ + page &= ~(PTRS_PER_PGD*sizeof(pgd_t) - 1); + printk(KERN_ALERT "*pdpte = 0x%Lx\n", + pgd_val(((pgd_t *)__va(page))[address >> PGDIR_SHIFT])); +#else /* !CONFIG_HIGHPMD */ page = ((unsigned long *) __va(page))[address >> 22]; printk(KERN_ALERT "*pde = %08lx\n", page); /* @@ -268,7 +275,8 @@ no_context: page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; printk(KERN_ALERT "*pte = %08lx\n", page); } -#endif +#endif /* !CONFIG_HIGHPTE */ +#endif /* CONFIG_HIGHPMD */ die("Oops", regs, error_code); bust_spinlocks(0); do_exit(SIGKILL); @@ -336,8 +344,8 @@ vmalloc_fault: * and redundant with the set_pmd() on non-PAE. */ - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); + pmd = pmd_offset_kernel(pgd, address); + pmd_k = pmd_offset_kernel(pgd_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); diff -puN arch/i386/mm/hugetlbpage.c~highpmd arch/i386/mm/hugetlbpage.c --- 25/arch/i386/mm/hugetlbpage.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/i386/mm/hugetlbpage.c 2003-07-04 22:24:13.000000000 -0700 @@ -87,8 +87,8 @@ static pte_t *huge_pte_alloc(struct mm_s pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - pmd = pmd_alloc(mm, pgd, addr); - return (pte_t *) pmd; + pmd = pmd_alloc_map(mm, pgd, addr); + return (pte_t *)pmd; } static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) @@ -97,8 +97,8 @@ static pte_t *huge_pte_offset(struct mm_ pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - pmd = pmd_offset(pgd, addr); - return (pte_t *) pmd; + pmd = pmd_offset_map_nested(pgd, addr); + return (pte_t *)pmd; } static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access) @@ -145,6 +145,8 @@ int copy_hugetlb_page_range(struct mm_st ptepage = pte_page(entry); get_page(ptepage); set_pte(dst_pte, entry); + pmd_unmap(dst_pte); + pmd_unmap_nested(src_pte); dst->rss += (HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } @@ -182,6 +184,7 @@ follow_hugetlb_page(struct mm_struct *mm get_page(page); pages[i] = page; + pmd_unmap_nested(pte); } if (vmas) @@ -271,6 +274,7 @@ follow_huge_pmd(struct mm_struct *mm, un page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); get_page(page); } + pmd_unmap(pmd); return page; } #endif @@ -314,6 +318,7 @@ void unmap_hugepage_range(struct vm_area page = pte_page(*pte); huge_page_release(page); pte_clear(pte); + pmd_unmap_nested(pte); } mm->rss -= (end - start) >> PAGE_SHIFT; flush_tlb_range(vma, start, end); @@ -348,8 +353,10 @@ int hugetlb_prefault(struct address_spac ret = -ENOMEM; goto out; } - if (!pte_none(*pte)) + if (!pte_none(*pte)) { + pmd_unmap(pte); continue; + } idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); @@ -358,16 +365,19 @@ int hugetlb_prefault(struct address_spac page = alloc_hugetlb_page(); if (!page) { ret = -ENOMEM; + pmd_unmap(pte); goto out; } ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); unlock_page(page); if (ret) { free_huge_page(page); + pmd_unmap(pte); goto out; } } set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); + pmd_unmap(pte); } out: spin_unlock(&mm->page_table_lock); diff -puN arch/i386/mm/init.c~highpmd arch/i386/mm/init.c --- 25/arch/i386/mm/init.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/i386/mm/init.c 2003-07-04 22:24:13.000000000 -0700 @@ -59,10 +59,10 @@ static pmd_t * __init one_md_table_init( #ifdef CONFIG_X86_PAE pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - if (pmd_table != pmd_offset(pgd, 0)) + if (pmd_table != pmd_offset_kernel(pgd, 0)) BUG(); #else - pmd_table = pmd_offset(pgd, 0); + pmd_table = pmd_offset_kernel(pgd, 0); #endif return pmd_table; @@ -113,7 +113,7 @@ static void __init page_table_range_init if (pgd_none(*pgd)) one_md_table_init(pgd); - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset_kernel(pgd, vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { if (pmd_none(*pmd)) one_page_table_init(pmd); @@ -194,7 +194,7 @@ pte_t *kmap_pte; pgprot_t kmap_prot; #define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + pte_offset_kernel(pmd_offset_kernel(pgd_offset_k(vaddr), (vaddr)), (vaddr)) void __init kmap_init(void) { @@ -218,7 +218,7 @@ void __init permanent_kmaps_init(pgd_t * page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); pgd = swapper_pg_dir + pgd_index(vaddr); - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset_kernel(pgd, vaddr); pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; } @@ -513,20 +513,9 @@ void __init mem_init(void) } kmem_cache_t *pgd_cache; -kmem_cache_t *pmd_cache; void __init pgtable_cache_init(void) { - if (PTRS_PER_PMD > 1) { - pmd_cache = kmem_cache_create("pmd", - PTRS_PER_PMD*sizeof(pmd_t), - 0, - SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, - pmd_ctor, - NULL); - if (!pmd_cache) - panic("pgtable_cache_init(): cannot create pmd cache"); - } pgd_cache = kmem_cache_create("pgd", PTRS_PER_PGD*sizeof(pgd_t), 0, diff -puN arch/i386/mm/ioremap.c~highpmd arch/i386/mm/ioremap.c --- 25/arch/i386/mm/ioremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/i386/mm/ioremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -82,7 +82,7 @@ static int remap_area_pages(unsigned lon spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_kernel(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; diff -puN arch/i386/mm/pageattr.c~highpmd arch/i386/mm/pageattr.c --- 25/arch/i386/mm/pageattr.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/i386/mm/pageattr.c 2003-07-04 22:24:13.000000000 -0700 @@ -23,7 +23,7 @@ static inline pte_t *lookup_address(unsi pmd_t *pmd; if (pgd_none(*pgd)) return NULL; - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_kernel(pgd, address); if (pmd_none(*pmd)) return NULL; if (pmd_large(*pmd)) @@ -79,7 +79,7 @@ static void set_pmd_pte(pte_t *kpte, uns pgd_t *pgd; pmd_t *pmd; pgd = (pgd_t *)page_address(page) + pgd_index(address); - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_kernel(pgd, address); set_pte_atomic((pte_t *)pmd, pte); } spin_unlock_irqrestore(&pgd_lock, flags); @@ -92,7 +92,7 @@ static void set_pmd_pte(pte_t *kpte, uns static inline void revert_page(struct page *kpte_page, unsigned long address) { pte_t *linear = (pte_t *) - pmd_offset(pgd_offset(&init_mm, address), address); + pmd_offset_kernel(pgd_offset_k(address), address); set_pmd_pte(linear, address, pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); diff -puN arch/i386/mm/pgtable.c~highpmd arch/i386/mm/pgtable.c --- 25/arch/i386/mm/pgtable.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/i386/mm/pgtable.c 2003-07-04 22:24:13.000000000 -0700 @@ -70,7 +70,7 @@ static void set_pte_pfn(unsigned long va BUG(); return; } - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset_kernel(pgd, vaddr); if (pmd_none(*pmd)) { BUG(); return; @@ -110,7 +110,7 @@ void set_pmd_pfn(unsigned long vaddr, un printk ("set_pmd_pfn: pgd_none\n"); return; /* BUG(); */ } - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset_kernel(pgd, vaddr); set_pmd(pmd, pfn_pmd(pfn, flags)); /* * It's enough to flush this one mapping. @@ -152,11 +152,6 @@ struct page *pte_alloc_one(struct mm_str return pte; } -void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) -{ - memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); -} - /* * List of all pgd's needed for non-PAE so it can invalidate entries * in both cached and uncached pgd's; not needed for PAE since the @@ -212,16 +207,22 @@ pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; for (i = 0; i < USER_PTRS_PER_PGD; ++i) { - pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + struct page *pmd; +#ifdef CONFIG_HIGHPMD + pmd = alloc_page(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT); +#else + pmd = alloc_page(GFP_KERNEL|__GFP_REPEAT); +#endif if (!pmd) goto out_oom; - set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd)))); + clear_highpage(pmd); + set_pgd(&pgd[i], __pgd(1ULL | (u64)page_to_pfn(pmd) << PAGE_SHIFT)); } return pgd; out_oom: for (i--; i >= 0; i--) - kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); + __free_page(pgd_page(pgd[i])); kmem_cache_free(pgd_cache, pgd); return NULL; } @@ -233,7 +234,7 @@ void pgd_free(pgd_t *pgd) /* in the PAE case user pgd entries are overwritten before usage */ if (PTRS_PER_PMD > 1) for (i = 0; i < USER_PTRS_PER_PGD; ++i) - kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); + __free_page(pgd_page(pgd[i])); /* in the non-PAE case, clear_page_tables() clears user pgd entries */ kmem_cache_free(pgd_cache, pgd); } diff -puN arch/ia64/mm/hugetlbpage.c~highpmd arch/ia64/mm/hugetlbpage.c --- 25/arch/ia64/mm/hugetlbpage.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/ia64/mm/hugetlbpage.c 2003-07-04 22:24:13.000000000 -0700 @@ -60,9 +60,9 @@ huge_pte_alloc (struct mm_struct *mm, un pte_t *pte = NULL; pgd = pgd_offset(mm, taddr); - pmd = pmd_alloc(mm, pgd, taddr); + pmd = pmd_alloc_map(mm, pgd, taddr); if (pmd) - pte = pte_alloc_map(mm, pmd, taddr); + pte = pte_alloc_map(mm, pgd, &pmd, taddr); return pte; } diff -puN arch/ia64/mm/init.c~highpmd arch/ia64/mm/init.c --- 25/arch/ia64/mm/init.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/ia64/mm/init.c 2003-07-04 22:24:13.000000000 -0700 @@ -286,10 +286,10 @@ put_kernel_page (struct page *page, unsi spin_lock(&init_mm.page_table_lock); { - pmd = pmd_alloc(&init_mm, pgd, address); + pmd = pmd_alloc_kernel(&init_mm, pgd, address); if (!pmd) goto out; - pte = pte_alloc_map(&init_mm, pmd, address); + pte = pte_alloc_map(&init_mm, pgd, &pmd, address); if (!pte) goto out; if (!pte_none(*pte)) { diff -puN arch/m68k/kernel/head.S~highpmd arch/m68k/kernel/head.S --- 25/arch/m68k/kernel/head.S~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/m68k/kernel/head.S 2003-07-04 22:24:13.000000000 -0700 @@ -110,7 +110,7 @@ * * These routines are used by other mmu routines to get a pointer into * a table, if necessary a new table is allocated. These routines are working - * basically like pmd_alloc() and pte_alloc() in . The root + * basically like pmd_alloc_map() and pte_alloc_map() in . The root * table needs of course only to be allocated once in mmu_get_root_table_entry, * so that here also some mmu specific initialization is done. The second page * at the start of the kernel (the first page is unmapped later) is used for diff -puN arch/m68k/mm/kmap.c~highpmd arch/m68k/mm/kmap.c --- 25/arch/m68k/mm/kmap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/m68k/mm/kmap.c 2003-07-04 22:24:13.000000000 -0700 @@ -189,7 +189,7 @@ void *__ioremap(unsigned long physaddr, printk ("\npa=%#lx va=%#lx ", physaddr, virtaddr); #endif pgd_dir = pgd_offset_k(virtaddr); - pmd_dir = pmd_alloc(&init_mm, pgd_dir, virtaddr); + pmd_dir = pmd_alloc_kernel(&init_mm, pgd_dir, virtaddr); if (!pmd_dir) { printk("ioremap: no mem for pmd_dir\n"); return NULL; diff -puN arch/m68k/sun3x/dvma.c~highpmd arch/m68k/sun3x/dvma.c --- 25/arch/m68k/sun3x/dvma.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/m68k/sun3x/dvma.c 2003-07-04 22:24:13.000000000 -0700 @@ -102,7 +102,7 @@ inline int dvma_map_cpu(unsigned long ka pmd_t *pmd; unsigned long end2; - if((pmd = pmd_alloc(&init_mm, pgd, vaddr)) == NULL) { + if((pmd = pmd_alloc_kernel(&init_mm, pgd, vaddr)) == NULL) { ret = -ENOMEM; goto out; } diff -puN arch/mips/mm/ioremap.c~highpmd arch/mips/mm/ioremap.c --- 25/arch/mips/mm/ioremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/mips/mm/ioremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -81,7 +81,7 @@ static int remap_area_pages(unsigned lon spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_kernel(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; diff -puN arch/parisc/kernel/pci-dma.c~highpmd arch/parisc/kernel/pci-dma.c --- 25/arch/parisc/kernel/pci-dma.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/parisc/kernel/pci-dma.c 2003-07-04 22:24:13.000000000 -0700 @@ -133,7 +133,7 @@ static inline int map_uncached_pages(uns do { pmd_t *pmd; - pmd = pmd_alloc(NULL, dir, vaddr); + pmd = pmd_alloc_kernel(NULL, dir, vaddr); if (!pmd) return -ENOMEM; if (map_pmd_uncached(pmd, vaddr, end - vaddr, &paddr)) diff -puN arch/parisc/mm/ioremap.c~highpmd arch/parisc/mm/ioremap.c --- 25/arch/parisc/mm/ioremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/parisc/mm/ioremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -77,7 +77,7 @@ static int remap_area_pages(unsigned lon spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(dir, address); + pmd = pmd_alloc_kernel(dir, address); error = -ENOMEM; if (!pmd) break; diff -puN arch/ppc64/mm/init.c~highpmd arch/ppc64/mm/init.c --- 25/arch/ppc64/mm/init.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/ppc64/mm/init.c 2003-07-04 22:24:13.000000000 -0700 @@ -211,7 +211,7 @@ static void map_io_page(unsigned long ea if (mem_init_done) { spin_lock(&ioremap_mm.page_table_lock); pgdp = pgd_offset_i(ea); - pmdp = pmd_alloc(&ioremap_mm, pgdp, ea); + pmdp = pmd_alloc_kernel(&ioremap_mm, pgdp, ea); ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea); pa = absolute_to_phys(pa); diff -puN arch/s390/mm/ioremap.c~highpmd arch/s390/mm/ioremap.c --- 25/arch/s390/mm/ioremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/s390/mm/ioremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -83,7 +83,7 @@ static int remap_area_pages(unsigned lon spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_kernel(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; diff -puN arch/sh/mm/ioremap.c~highpmd arch/sh/mm/ioremap.c --- 25/arch/sh/mm/ioremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/sh/mm/ioremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -45,7 +45,7 @@ static inline void remap_area_pte(pte_t } while (address && (address < end)); } -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, +static inline int remap_area_pmd(pgd_t *pgd, pmd_t * pmd, unsigned long address, unsigned long size, unsigned long phys_addr, unsigned long flags) { unsigned long end; @@ -83,11 +83,11 @@ int remap_area_pages(unsigned long addre spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_map(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; - if (remap_area_pmd(pmd, address, end - address, + if (remap_area_pmd(dir, pmd, address, end - address, phys_addr + address, flags)) break; error = 0; diff -puN arch/sparc64/mm/generic.c~highpmd arch/sparc64/mm/generic.c --- 25/arch/sparc64/mm/generic.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/sparc64/mm/generic.c 2003-07-04 22:24:13.000000000 -0700 @@ -85,7 +85,7 @@ static inline void io_remap_pte_range(pt } while (address < end); } -static inline int io_remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, +static inline int io_remap_pmd_range(pgd_t *pgd, pmd_t * pmd, unsigned long address, unsigned long size, unsigned long offset, pgprot_t prot, int space) { unsigned long end; @@ -96,7 +96,7 @@ static inline int io_remap_pmd_range(pmd end = PGDIR_SIZE; offset -= address; do { - pte_t * pte = pte_alloc_map(current->mm, pmd, address); + pte_t * pte = pte_alloc_map(current->mm, pgd, &pmd, address); if (!pte) return -ENOMEM; io_remap_pte_range(pte, address, end - address, address + offset, prot, space); @@ -122,11 +122,11 @@ int io_remap_page_range(struct vm_area_s spin_lock(&mm->page_table_lock); while (from < end) { - pmd_t *pmd = pmd_alloc(current->mm, dir, from); + pmd_t *pmd = pmd_alloc_map(current->mm, dir, from); error = -ENOMEM; if (!pmd) break; - error = io_remap_pmd_range(pmd, from, end - from, offset + from, prot, space); + error = io_remap_pmd_range(pgd, pmd, from, end - from, offset + from, prot, space); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; diff -puN arch/sparc64/mm/hugetlbpage.c~highpmd arch/sparc64/mm/hugetlbpage.c --- 25/arch/sparc64/mm/hugetlbpage.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/sparc64/mm/hugetlbpage.c 2003-07-04 22:24:13.000000000 -0700 @@ -107,9 +107,11 @@ static pte_t *huge_pte_alloc_map(struct pgd = pgd_offset(mm, addr); if (pgd) { - pmd = pmd_alloc(mm, pgd, addr); - if (pmd) - pte = pte_alloc_map(mm, pmd, addr); + pmd = pmd_alloc_map(mm, pgd, addr); + if (pmd) { + pte = pte_alloc_map(mm, pgd, &pmd, addr); + pmd_unmap(pmd); + } } return pte; } @@ -122,9 +124,11 @@ static pte_t *huge_pte_offset_map(struct pgd = pgd_offset(mm, addr); if (pgd) { - pmd = pmd_offset(pgd, addr); - if (pmd) + pmd = pmd_offset_map(pgd, addr); + if (pmd) { pte = pte_offset_map(pmd, addr); + pmd_unmap(pmd); + } } return pte; } diff -puN arch/sparc/mm/generic.c~highpmd arch/sparc/mm/generic.c --- 25/arch/sparc/mm/generic.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/sparc/mm/generic.c 2003-07-04 22:24:13.000000000 -0700 @@ -67,7 +67,7 @@ static inline void io_remap_pte_range(pt } while (address < end); } -static inline int io_remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, +static inline int io_remap_pmd_range(pgd_t *pgd, pmd_t * pmd, unsigned long address, unsigned long size, unsigned long offset, pgprot_t prot, int space) { unsigned long end; @@ -78,7 +78,7 @@ static inline int io_remap_pmd_range(pmd end = PGDIR_SIZE; offset -= address; do { - pte_t * pte = pte_alloc_map(current->mm, pmd, address); + pte_t * pte = pte_alloc_map(current->mm, pgd, &pmd, address); if (!pte) return -ENOMEM; io_remap_pte_range(pte, address, end - address, address + offset, prot, space); @@ -103,11 +103,11 @@ int io_remap_page_range(struct vm_area_s spin_lock(&mm->page_table_lock); while (from < end) { - pmd_t *pmd = pmd_alloc(current->mm, dir, from); + pmd_t *pmd = pmd_alloc_map(current->mm, dir, from); error = -ENOMEM; if (!pmd) break; - error = io_remap_pmd_range(pmd, from, end - from, offset + from, prot, space); + error = io_remap_pmd_range(pgd, pmd, from, end - from, offset + from, prot, space); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; diff -puN arch/sparc/mm/srmmu.c~highpmd arch/sparc/mm/srmmu.c --- 25/arch/sparc/mm/srmmu.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/sparc/mm/srmmu.c 2003-07-04 22:24:13.000000000 -0700 @@ -2180,7 +2180,7 @@ void __init ld_mmu_srmmu(void) BTFIXUPSET_CALL(pte_pfn, srmmu_pte_pfn, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_page, srmmu_pgd_page, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(__pgd_page, srmmu_pgd_page, BTFIXUPCALL_NORM); BTFIXUPSET_SETHI(none_mask, 0xF0000000); @@ -2212,7 +2212,7 @@ void __init ld_mmu_srmmu(void) BTFIXUPSET_CALL(pte_alloc_one_kernel, srmmu_pte_alloc_one_kernel, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(pte_alloc_one, srmmu_pte_alloc_one, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(free_pmd_fast, srmmu_pmd_free, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_alloc_one, srmmu_pmd_alloc_one, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(__pmd_alloc_one, srmmu_pmd_alloc_one, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(free_pgd_fast, srmmu_free_pgd_fast, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(get_pgd_fast, srmmu_get_pgd_fast, BTFIXUPCALL_NORM); diff -puN arch/sparc/mm/sun4c.c~highpmd arch/sparc/mm/sun4c.c --- 25/arch/sparc/mm/sun4c.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/sparc/mm/sun4c.c 2003-07-04 22:24:13.000000000 -0700 @@ -2211,7 +2211,7 @@ void __init ld_mmu_sun4c(void) BTFIXUPSET_CALL(pte_alloc_one_kernel, sun4c_pte_alloc_one_kernel, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(pte_alloc_one, sun4c_pte_alloc_one, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(free_pmd_fast, sun4c_free_pmd_fast, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(pmd_alloc_one, sun4c_pmd_alloc_one, BTFIXUPCALL_RETO0); + BTFIXUPSET_CALL(__pmd_alloc_one, sun4c_pmd_alloc_one, BTFIXUPCALL_RETO0); BTFIXUPSET_CALL(free_pgd_fast, sun4c_free_pgd_fast, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(get_pgd_fast, sun4c_get_pgd_fast, BTFIXUPCALL_NORM); @@ -2252,5 +2252,5 @@ void __init ld_mmu_sun4c(void) /* These should _never_ get called with two level tables. */ BTFIXUPSET_CALL(pgd_set, sun4c_pgd_set, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(pgd_page, sun4c_pgd_page, BTFIXUPCALL_RETO0); + BTFIXUPSET_CALL(__pgd_page, sun4c_pgd_page, BTFIXUPCALL_RETO0); } diff -puN arch/x86_64/ia32/syscall32.c~highpmd arch/x86_64/ia32/syscall32.c --- 25/arch/x86_64/ia32/syscall32.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/x86_64/ia32/syscall32.c 2003-07-04 22:24:13.000000000 -0700 @@ -29,12 +29,15 @@ char *syscall32_page; and let it be handled by generic VM */ int map_syscall32(struct mm_struct *mm, unsigned long address) { + pgd_t *pgd; + pmd_t *pmd; pte_t *pte; int err = 0; down_read(&mm->mmap_sem); spin_lock(&mm->page_table_lock); - pmd_t *pmd = pmd_alloc(mm, pgd_offset(mm, address), address); - if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) { + pgd = pgd_offset(mm, address); + pmd = pmd_alloc_map(mm, pgd, address); + if (pmd && (pte = pte_alloc_map(mm, pgd, &pmd, address)) != NULL) { if (pte_none(*pte)) { set_pte(pte, mk_pte(virt_to_page(syscall32_page), diff -puN arch/x86_64/mm/ioremap.c~highpmd arch/x86_64/mm/ioremap.c --- 25/arch/x86_64/mm/ioremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/arch/x86_64/mm/ioremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -82,7 +82,7 @@ static int remap_area_pages(unsigned lon spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_kernel(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; diff -puN drivers/char/drm/drm_memory.h~highpmd drivers/char/drm/drm_memory.h --- 25/drivers/char/drm/drm_memory.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/drivers/char/drm/drm_memory.h 2003-07-04 22:24:13.000000000 -0700 @@ -123,7 +123,7 @@ static inline unsigned long drm_follow_page (void *vaddr) { pgd_t *pgd = pgd_offset_k((unsigned long) vaddr); - pmd_t *pmd = pmd_offset(pgd, (unsigned long) vaddr); + pmd_t *pmd = pmd_offset_kernel(pgd, (unsigned long)vaddr); pte_t *ptep = pte_offset_kernel(pmd, (unsigned long) vaddr); return pte_pfn(*ptep) << PAGE_SHIFT; } diff -puN fs/exec.c~highpmd fs/exec.c --- 25/fs/exec.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/fs/exec.c 2003-07-04 22:24:13.000000000 -0700 @@ -305,10 +305,10 @@ void put_dirty_page(struct task_struct * if (!pte_chain) goto out_sig; spin_lock(&tsk->mm->page_table_lock); - pmd = pmd_alloc(tsk->mm, pgd, address); + pmd = pmd_alloc_map(tsk->mm, pgd, address); if (!pmd) goto out; - pte = pte_alloc_map(tsk->mm, pmd, address); + pte = pte_alloc_map(tsk->mm, pgd, &pmd, address); if (!pte) goto out; if (!pte_none(*pte)) { @@ -320,6 +320,7 @@ void put_dirty_page(struct task_struct * set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot)))); pte_chain = page_add_rmap(page, pte, pte_chain); pte_unmap(pte); + pmd_unmap(pmd); tsk->mm->rss++; spin_unlock(&tsk->mm->page_table_lock); @@ -327,6 +328,8 @@ void put_dirty_page(struct task_struct * pte_chain_free(pte_chain); return; out: + if (pmd) + pmd_unmap(pmd); spin_unlock(&tsk->mm->page_table_lock); out_sig: __free_page(page); diff -puN include/asm-alpha/pgalloc.h~highpmd include/asm-alpha/pgalloc.h --- 25/include/asm-alpha/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-alpha/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -24,9 +24,9 @@ pmd_populate_kernel(struct mm_struct *mm } static inline void -pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +pgd_populate(struct mm_struct *mm, pgd_t *pgd, struct page *pmd) { - pgd_set(pgd, pmd); + pgd_set(pgd, page_address(pmd)); } extern pgd_t *pgd_alloc(struct mm_struct *mm); @@ -37,19 +37,29 @@ pgd_free(pgd_t *pgd) free_page((unsigned long)pgd); } -static inline pmd_t * +static inline struct page * pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); - if (ret) - clear_page(ret); - return ret; + struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (page) + clear_highpage(page); + return page; +} + +static inline pmd_t * +pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) +{ + struct page *page = pmd_alloc_one(mm, addr); + if (page) + return page_address(page); + else + return NULL; } static inline void -pmd_free(pmd_t *pmd) +pmd_free(struct page *pmd) { - free_page((unsigned long)pmd); + __free_page(pmd); } extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); diff -puN include/asm-alpha/pgtable.h~highpmd include/asm-alpha/pgtable.h --- 25/include/asm-alpha/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-alpha/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -229,9 +229,11 @@ pmd_page_kernel(pmd_t pmd) #define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & _PFN_MASK) >> 32)) #endif -extern inline unsigned long pgd_page(pgd_t pgd) +extern inline unsigned long __pgd_page(pgd_t pgd) { return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) + extern inline int pte_none(pte_t pte) { return !pte_val(pte); } extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_VALID; } extern inline void pte_clear(pte_t *ptep) { pte_val(*ptep) = 0; } @@ -280,9 +282,15 @@ extern inline pte_t pte_mkyoung(pte_t pt /* Find an entry in the second-level page table.. */ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { - return (pmd_t *) pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); + return (pmd_t *)__pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); } +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + /* Find an entry in the third-level page table.. */ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) { diff -puN include/asm-arm26/pgalloc.h~highpmd include/asm-arm26/pgalloc.h --- 25/include/asm-arm26/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-arm26/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -55,7 +55,8 @@ pmd_populate_kernel(struct mm_struct *mm * is thrown away. It just cant be zero. -IM */ -#define pmd_alloc_one(mm,addr) ((pmd_t *)2); BUG() +#define pmd_alloc_one(mm,addr) ((struct page *)2); BUG() +#define pmd_alloc_one_kernel(mm,addr) ((pmd_t *)2); BUG() #define pmd_free(pmd) do { } while (0) #define pgd_populate(mm,pmd,pte) (0) diff -puN include/asm-arm26/pgtable.h~highpmd include/asm-arm26/pgtable.h --- 25/include/asm-arm26/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-arm26/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -99,7 +99,7 @@ extern struct page *empty_zero_page; * on arm26 we have no 2nd level page table. we simulate this by removing the * PMD. * - * pgd_none is 0 to prevernt pmd_alloc() calling __pmd_alloc(). This causes it + * pgd_none is 0 to prevernt pmd_alloc_map() calling __pmd_alloc(). This causes it * to return pmd_offset(pgd,addr) which is a pointer to the pgd (IOW, a no-op). * * however, to work this way, whilst we are allocating 32 pgds, containing 32 @@ -134,7 +134,7 @@ extern struct page *empty_zero_page; #define _PMD_PRESENT (0x01) -/* These definitions allow us to optimise out stuff like pmd_alloc() */ +/* These definitions allow us to optimise out stuff like pmd_alloc_map() */ #define pgd_none(pgd) (0) #define pgd_bad(pgd) (0) #define pgd_present(pgd) (1) @@ -189,6 +189,12 @@ extern struct page *empty_zero_page; #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pgd, addr) do { } while (0) +#define pmd_unmap_nested(pgd, addr) do { } while (0) + #define _PAGE_PRESENT 0x01 #define _PAGE_READONLY 0x02 diff -puN include/asm-arm/pgalloc.h~highpmd include/asm-arm/pgalloc.h --- 25/include/asm-arm/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-arm/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -16,7 +16,8 @@ /* * Since we have only two-level page tables, these are trivial */ -#define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm,addr) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(pmd) do { } while (0) #define pgd_populate(mm,pmd,pte) BUG() diff -puN include/asm-arm/pgtable.h~highpmd include/asm-arm/pgtable.h --- 25/include/asm-arm/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-arm/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -125,6 +125,11 @@ extern struct page *empty_zero_page; /* Find an entry in the second-level page table.. */ #define pmd_offset(dir, addr) ((pmd_t *)(dir)) +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) /* Find an entry in the third-level page table.. */ #define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) diff -puN include/asm-cris/pgalloc.h~highpmd include/asm-cris/pgalloc.h --- 25/include/asm-cris/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-cris/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -102,7 +102,8 @@ static __inline__ void pte_free_slow(pte */ #define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm, addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free_slow(x) do { } while (0) #define pmd_free_fast(x) do { } while (0) #define pmd_free(x) do { } while (0) diff -puN include/asm-cris/pgtable.h~highpmd include/asm-cris/pgtable.h --- 25/include/asm-cris/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-cris/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -470,6 +470,12 @@ static inline pmd_t * pmd_offset(pgd_t * return (pmd_t *) dir; } +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + /* Find an entry in the third-level page table.. */ static inline pte_t * pte_offset(pmd_t * dir, unsigned long address) { diff -puN include/asm-h8300/pgtable.h~highpmd include/asm-h8300/pgtable.h --- 25/include/asm-h8300/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-h8300/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -15,6 +15,11 @@ typedef pte_t *pte_addr_t; #define pgd_clear(pgdp) #define kern_addr_valid(addr) (1) #define pmd_offset(a, b) ((void *)0) +#define pmd_offset_kernel(a,b) pmd_offset(a,b) +#define pmd_offset_map(a,b) pmd_offset(a,b) +#define pmd_offset_map_nested(a,b) pmd_offset(a,b) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) #define PAGE_NONE __pgprot(0) /* these mean nothing to NO_MM */ #define PAGE_SHARED __pgprot(0) /* these mean nothing to NO_MM */ diff -puN include/asm-i386/highmem.h~highpmd include/asm-i386/highmem.h --- 25/include/asm-i386/highmem.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-i386/highmem.h 2003-07-04 22:24:13.000000000 -0700 @@ -41,9 +41,9 @@ extern void kmap_init(void); * chunk of RAM. */ #if NR_CPUS <= 32 -#define PKMAP_BASE (0xff800000UL) +#define PKMAP_BASE (0xff400000UL) #else -#define PKMAP_BASE (0xff600000UL) +#define PKMAP_BASE (0xfe800000UL) #endif #ifdef CONFIG_X86_PAE #define LAST_PKMAP 512 diff -puN include/asm-i386/kmap_types.h~highpmd include/asm-i386/kmap_types.h --- 25/include/asm-i386/kmap_types.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-i386/kmap_types.h 2003-07-04 22:24:13.000000000 -0700 @@ -17,14 +17,16 @@ D(3) KM_USER0, D(4) KM_USER1, D(5) KM_BIO_SRC_IRQ, D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_PTE2, -D(10) KM_IRQ0, -D(11) KM_IRQ1, -D(12) KM_SOFTIRQ0, -D(13) KM_SOFTIRQ1, -D(14) KM_TYPE_NR +D(7) KM_PMD0, +D(8) KM_PMD1, +D(9) KM_PTE0, +D(10) KM_PTE1, +D(11) KM_PTE2, +D(12) KM_IRQ0, +D(13) KM_IRQ1, +D(14) KM_SOFTIRQ0, +D(15) KM_SOFTIRQ1, +D(16) KM_TYPE_NR }; #undef D diff -puN include/asm-i386/pgalloc.h~highpmd include/asm-i386/pgalloc.h --- 25/include/asm-i386/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-i386/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -45,7 +45,8 @@ static inline void pte_free(struct page * (In the PAE case we free the pmds as part of the pgd.) */ -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm, addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() diff -puN include/asm-i386/pgtable-2level.h~highpmd include/asm-i386/pgtable-2level.h --- 25/include/asm-i386/pgtable-2level.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-i386/pgtable-2level.h 2003-07-04 22:24:13.000000000 -0700 @@ -48,13 +48,15 @@ static inline int pgd_present(pgd_t pgd) #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) + +#define pmd_offset_map(pgd, addr) ({ (pmd_t *)(pgd); }) +#define pmd_offset_map_nested(pgd, addr) pmd_offset_map(pgd, addr) +#define pmd_offset_kernel(pgd, addr) pmd_offset_map(pgd, addr) + +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) -static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) dir; -} #define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) #define pte_same(a, b) ((a).pte_low == (b).pte_low) #define pte_page(x) pfn_to_page(pte_pfn(x)) diff -puN include/asm-i386/pgtable-3level.h~highpmd include/asm-i386/pgtable-3level.h --- 25/include/asm-i386/pgtable-3level.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-i386/pgtable-3level.h 2003-07-04 22:24:13.000000000 -0700 @@ -64,12 +64,32 @@ static inline void set_pte(pte_t *ptep, */ static inline void pgd_clear (pgd_t * pgd) { } -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +static inline unsigned long pgd_pfn(pgd_t pgd) +{ + return pgd_val(pgd) >> PAGE_SHIFT; +} + +#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) + +#define pmd_offset_kernel(pgd, addr) \ + ((pmd_t *)__va(pgd_val(*(pgd)) & PAGE_MASK) + pmd_index(addr)) /* Find an entry in the second-level page table.. */ -#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ - pmd_index(address)) +#ifdef CONFIG_HIGHPMD +#define __pmd_offset(pgd, addr, type) \ + ((pmd_t *)kmap_atomic(pgd_page(*(pgd)), type) + pmd_index(addr)) +#define __pmd_unmap(pmd, type) kunmap_atomic(pmd, type) +#else +#define __pmd_offset(pgd, addr, type) \ + ((pmd_t *)__va(pgd_val(*(pgd)) & PAGE_MASK) + pmd_index(addr)) +#define __pmd_unmap(pmd, type) do { } while (0) +#endif + +#define pmd_offset_map(pgd, addr) __pmd_offset(pgd, addr, KM_PMD0) +#define pmd_offset_map_nested(pgd, addr) __pmd_offset(pgd, addr, KM_PMD1) + +#define pmd_unmap(pmd) __pmd_unmap(pmd, KM_PMD0); +#define pmd_unmap_nested(pmd) __pmd_unmap(pmd, KM_PMD1); static inline pte_t ptep_get_and_clear(pte_t *ptep) { diff -puN include/asm-i386/pgtable.h~highpmd include/asm-i386/pgtable.h --- 25/include/asm-i386/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-i386/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -33,11 +33,9 @@ extern unsigned long empty_zero_page[1024]; extern pgd_t swapper_pg_dir[1024]; extern kmem_cache_t *pgd_cache; -extern kmem_cache_t *pmd_cache; extern spinlock_t pgd_lock; extern struct list_head pgd_list; -void pmd_ctor(void *, kmem_cache_t *, unsigned long); void pgd_ctor(void *, kmem_cache_t *, unsigned long); void pgd_dtor(void *, kmem_cache_t *, unsigned long); void pgtable_cache_init(void); diff -puN include/asm-ia64/pgalloc.h~highpmd include/asm-ia64/pgalloc.h --- 25/include/asm-ia64/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-ia64/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -71,9 +71,9 @@ pgd_free (pgd_t *pgd) } static inline void -pgd_populate (struct mm_struct *mm, pgd_t *pgd_entry, pmd_t *pmd) +pgd_populate (struct mm_struct *mm, pgd_t *pgd_entry, struct page *pmd) { - pgd_val(*pgd_entry) = __pa(pmd); + pgd_val(*pgd_entry) = __pa(page_address(pmd)); } @@ -90,8 +90,8 @@ pmd_alloc_one_fast (struct mm_struct *mm return (pmd_t *)ret; } -static inline pmd_t* -pmd_alloc_one (struct mm_struct *mm, unsigned long addr) +static inline pmd_t * +pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); @@ -100,9 +100,16 @@ pmd_alloc_one (struct mm_struct *mm, uns return pmd; } +static inline struct page *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + return pmd ? virt_to_page(pmd) : NULL; +} + static inline void -pmd_free (pmd_t *pmd) +pmd_free(struct page *page) { + pmd_t *pmd = page_address(page); *(unsigned long *)pmd = (unsigned long) pmd_quicklist; pmd_quicklist = (unsigned long *) pmd; ++pgtable_cache_size; diff -puN include/asm-ia64/pgtable.h~highpmd include/asm-ia64/pgtable.h --- 25/include/asm-ia64/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-ia64/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -257,7 +257,8 @@ ia64_phys_addr_valid (unsigned long addr #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) #define pgd_present(pgd) (pgd_val(pgd) != 0UL) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK)) +#define __pgd_page(pgd) ((unsigned long)__va(pgd_val(pgd) & _PFN_MASK)) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) /* * The following have defined behavior only work if pte_present() is true. @@ -326,7 +327,13 @@ pgd_offset (struct mm_struct *mm, unsign /* Find an entry in the second-level page table.. */ #define pmd_offset(dir,addr) \ - ((pmd_t *) pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + ((pmd_t *)__pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) /* * Find an entry in the third-level page table. This looks more complicated than it diff -puN include/asm-m68k/motorola_pgalloc.h~highpmd include/asm-m68k/motorola_pgalloc.h --- 25/include/asm-m68k/motorola_pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-m68k/motorola_pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -63,19 +63,28 @@ static inline void __pte_free_tlb(struct } -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +static inline pmd_t *pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { return get_pointer_table(); } -static inline int pmd_free(pmd_t *pmd) +static inline struct page *pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - return free_pointer_table(pmd); + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + if (pmd) + return virt_to_page(pmd); + else + return NULL; } -static inline int __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +static inline int pmd_free(struct page *pmd) { - return free_pointer_table(pmd); + return free_pointer_table(page_address(pmd)); +} + +static inline int __pmd_free_tlb(struct mmu_gather *tlb, struct page *pmd) +{ + return free_pointer_table(page_address(pmd)); } @@ -100,9 +109,9 @@ static inline void pmd_populate(struct m pmd_set(pmd, page_address(page)); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, struct page *pmd) { - pgd_set(pgd, pmd); + pgd_set(pgd, page_address(pmd)); } #endif /* _MOTOROLA_PGALLOC_H */ diff -puN include/asm-m68k/motorola_pgtable.h~highpmd include/asm-m68k/motorola_pgtable.h --- 25/include/asm-m68k/motorola_pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-m68k/motorola_pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -115,6 +115,7 @@ extern inline void pgd_set(pgd_t * pgdp, #define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK)) #define __pmd_page(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK)) #define __pgd_page(pgd) ((unsigned long)__va(pgd_val(pgd) & _TABLE_MASK)) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) #define pte_none(pte) (!pte_val(pte)) @@ -203,6 +204,12 @@ extern inline pmd_t * pmd_offset(pgd_t * return (pmd_t *)__pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD-1)); } +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + /* Find an entry in the third-level page table.. */ extern inline pte_t * pte_offset_kernel(pmd_t * pmdp, unsigned long address) { diff -puN include/asm-m68knommu/pgtable.h~highpmd include/asm-m68knommu/pgtable.h --- 25/include/asm-m68knommu/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-m68knommu/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -21,7 +21,12 @@ typedef pte_t *pte_addr_t; #define pgd_bad(pgd) (0) #define pgd_clear(pgdp) #define kern_addr_valid(addr) (1) -#define pmd_offset(a, b) ((void *)0) +#define pmd_offset(a, b) ((void *)0) +#define pmd_offset_kernel(a, b) pmd_offset(a, b) +#define pmd_offset_map(a, b) pmd_offset(a, b) +#define pmd_offset_map_nested(a, b) pmd_offset(a, b) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) #define PAGE_NONE __pgprot(0) #define PAGE_SHARED __pgprot(0) diff -puN include/asm-m68k/sun3_pgalloc.h~highpmd include/asm-m68k/sun3_pgalloc.h --- 25/include/asm-m68k/sun3_pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-m68k/sun3_pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -18,7 +18,8 @@ extern const char bad_pmd_string[]; -#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm,address) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm,address) ({ BUG(); ((pmd_t *)2); }) static inline void pte_free_kernel(pte_t * pte) diff -puN include/asm-mips64/pgalloc.h~highpmd include/asm-mips64/pgalloc.h --- 25/include/asm-mips64/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-mips64/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -28,7 +28,7 @@ static inline void pmd_populate(struct m set_pmd(pmd, __pmd((PAGE_OFFSET + page_to_pfn(pte)) << PAGE_SHIFT)); } -#define pgd_populate(mm, pgd, pmd) set_pgd(pgd, __pgd(pmd)) +#define pgd_populate(mm, pgd, pmd) set_pgd(pgd, __pgd(page_address(pmd))) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { @@ -88,7 +88,7 @@ static inline void pte_free(struct page #define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) #define __pmd_free_tlb(tlb,x) do { } while (0) -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +static inline pmd_t *pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { pmd_t *pmd; @@ -98,9 +98,18 @@ static inline pmd_t *pmd_alloc_one(struc return pmd; } -static inline void pmd_free(pmd_t *pmd) +static inline struct page *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - free_pages((unsigned long)pmd, PMD_ORDER); + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + if (pmd) + return virt_to_page(pmd); + else + return NULL; +} + +static inline void pmd_free(struct page *pmd) +{ + __free_pages(pmd, PMD_ORDER); } extern pte_t kptbl[(PAGE_SIZE << PGD_ORDER)/sizeof(pte_t)]; diff -puN include/asm-mips64/pgtable.h~highpmd include/asm-mips64/pgtable.h --- 25/include/asm-mips64/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-mips64/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -155,11 +155,13 @@ extern pmd_t empty_bad_pmd_table[2*PAGE_ #define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) #define pmd_page_kernel(pmd) pmd_val(pmd) -static inline unsigned long pgd_page(pgd_t pgd) +static inline unsigned long __pgd_page(pgd_t pgd) { return pgd_val(pgd); } +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) + static inline int pte_none(pte_t pte) { return !(pte_val(pte) & ~_PAGE_GLOBAL); @@ -397,10 +399,16 @@ static inline pte_t pte_modify(pte_t pte /* Find an entry in the second-level page table.. */ static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address) { - return (pmd_t *) pgd_page(*dir) + + return (pmd_t *)__pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); } +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while(0) +#define pmd_unmap_nested(pmd) do { } while(0) + /* Find an entry in the third-level page table.. */ #define __pte_offset(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) diff -puN include/asm-mips/pgalloc.h~highpmd include/asm-mips/pgalloc.h --- 25/include/asm-mips/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-mips/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -96,7 +96,8 @@ static inline void pte_free(struct page * allocating and freeing a pmd is trivial: the 1-entry pmd is * inside the pgd, so has no extra memory associated with it. */ -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm, addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) diff -puN include/asm-mips/pgtable.h~highpmd include/asm-mips/pgtable.h --- 25/include/asm-mips/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-mips/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -374,6 +374,12 @@ static inline pmd_t *pmd_offset(pgd_t *d return (pmd_t *) dir; } +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + /* Find an entry in the third-level page table.. */ #define __pte_offset(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) diff -puN include/asm-parisc/pgalloc.h~highpmd include/asm-parisc/pgalloc.h --- 25/include/asm-parisc/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-parisc/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -28,12 +28,12 @@ static inline void pgd_free(pgd_t *pgd) /* Three Level Page Table Support for pmd's */ -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, struct page *pmd) { - pgd_val(*pgd) = _PAGE_TABLE + __pa((unsigned long)pmd); + pgd_val(*pgd) = _PAGE_TABLE + __pa(page_address(pmd)); } -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +static inline pmd_t *pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); if (pmd) @@ -41,9 +41,18 @@ static inline pmd_t *pmd_alloc_one(struc return pmd; } -static inline void pmd_free(pmd_t *pmd) +static inline struct page *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - free_page((unsigned long)pmd); + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + if (pmd) + return virt_to_page(pmd); + else + return NULL; +} + +static inline void pmd_free(struct page *pmd) +{ + __free_page(pmd); } #else @@ -55,7 +64,8 @@ static inline void pmd_free(pmd_t *pmd) * inside the pgd, so has no extra memory associated with it. */ -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm, addr) pmd_alloc_one(mm, addr) #define pmd_free(x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() diff -puN include/asm-parisc/pgtable.h~highpmd include/asm-parisc/pgtable.h --- 25/include/asm-parisc/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-parisc/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -242,7 +242,8 @@ extern unsigned long *empty_zero_page; #ifdef __LP64__ -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define __pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) /* For 64 bit we have three level tables */ @@ -339,11 +340,17 @@ extern inline pte_t pte_modify(pte_t pte #ifdef __LP64__ #define pmd_offset(dir,address) \ -((pmd_t *) pgd_page(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) +((pmd_t *)__pgd_page(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) #else #define pmd_offset(dir,addr) ((pmd_t *) dir) #endif +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + /* Find an entry in the third-level page table.. */ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) #define pte_offset_kernel(pmd, address) \ diff -puN include/asm-ppc64/pgalloc.h~highpmd include/asm-ppc64/pgalloc.h --- 25/include/asm-ppc64/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-ppc64/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -26,10 +26,10 @@ pgd_free(pgd_t *pgd) free_page((unsigned long)pgd); } -#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, PMD) +#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, page_address(PMD)) static inline pmd_t * -pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { pmd_t *pmd; @@ -39,10 +39,19 @@ pmd_alloc_one(struct mm_struct *mm, unsi return pmd; } +static inline struct page *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + if (pmd) + return virt_to_page(pmd); + else + return NULL; +} + static inline void -pmd_free(pmd_t *pmd) +pmd_free(struct page *pmd) { - free_page((unsigned long)pmd); + __free_page(pmd); } #define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) diff -puN include/asm-ppc64/pgtable.h~highpmd include/asm-ppc64/pgtable.h --- 25/include/asm-ppc64/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-ppc64/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -190,7 +190,8 @@ extern unsigned long empty_zero_page[PAG #define pgd_bad(pgd) ((pgd_val(pgd)) == 0) #define pgd_present(pgd) (pgd_val(pgd) != 0UL) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) -#define pgd_page(pgd) (__bpn_to_ba(pgd_val(pgd))) +#define __pgd_page(pgd) (__bpn_to_ba(pgd_val(pgd))) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) /* * Find an entry in a page-table-directory. We combine the address region @@ -203,12 +204,18 @@ extern unsigned long empty_zero_page[PAG /* Find an entry in the second-level page table.. */ #define pmd_offset(dir,addr) \ - ((pmd_t *) pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + ((pmd_t *)__pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) /* Find an entry in the third-level page table.. */ #define pte_offset_kernel(dir,addr) \ ((pte_t *) pmd_page_kernel(*(dir)) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_unmap(pte) do { } while(0) diff -puN include/asm-ppc/pgalloc.h~highpmd include/asm-ppc/pgalloc.h --- 25/include/asm-ppc/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-ppc/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -15,7 +15,8 @@ extern void pgd_free(pgd_t *pgd); * We don't have any real pmd's, and this code never triggers because * the pgd will always be present.. */ -#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm,address) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() diff -puN include/asm-ppc/pgtable.h~highpmd include/asm-ppc/pgtable.h --- 25/include/asm-ppc/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-ppc/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -370,8 +370,9 @@ static inline int pgd_bad(pgd_t pgd) { static inline int pgd_present(pgd_t pgd) { return 1; } #define pgd_clear(xp) do { } while (0) -#define pgd_page(pgd) \ +#define __pgd_page(pgd) \ ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) /* * The following only work if pte_present() is true. @@ -503,6 +504,12 @@ static inline pmd_t * pmd_offset(pgd_t * return (pmd_t *) dir; } +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + /* Find an entry in the third-level page table.. */ #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) diff -puN include/asm-s390/pgalloc.h~highpmd include/asm-s390/pgalloc.h --- 25/include/asm-s390/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-s390/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -61,12 +61,13 @@ static inline void pgd_free(pgd_t *pgd) * We use pmd cache only on s390x, so these are dummy routines. This * code never triggers because the pgd will always be present. */ -#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm,address) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() #else /* __s390x__ */ -static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) +static inline pmd_t * pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr) { pmd_t *pmd; int i; @@ -79,16 +80,25 @@ static inline pmd_t * pmd_alloc_one(stru return pmd; } -static inline void pmd_free (pmd_t *pmd) +static inline struct page *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - free_pages((unsigned long) pmd, 2); + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + if (pmd) + return virt_to_page(pmd); + else + return NULL; +} + +static inline void pmd_free(struct page *pmd) +{ + __free_pages(pmd, 2); } #define __pmd_free_tlb(tlb,pmd) pmd_free(pmd) -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, struct page *pmd) { - pgd_val(*pgd) = _PGD_ENTRY | __pa(pmd); + pgd_val(*pgd) = _PGD_ENTRY | __pa(page_address(pmd)); } #endif /* __s390x__ */ diff -puN include/asm-s390/pgtable.h~highpmd include/asm-s390/pgtable.h --- 25/include/asm-s390/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-s390/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -613,6 +613,7 @@ static inline pte_t mk_pte_phys(unsigned /* to find an entry in a page-table-directory */ #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) +#define pgd_page(pgd) virt_to_page(pgd_page_kernel(pgd)) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) @@ -634,6 +635,12 @@ extern inline pmd_t * pmd_offset(pgd_t * #endif /* __s390x__ */ +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + /* Find an entry in the third-level page table.. */ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) #define pte_offset_kernel(pmd, address) \ diff -puN include/asm-sh/pgalloc.h~highpmd include/asm-sh/pgalloc.h --- 25/include/asm-sh/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-sh/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -94,7 +94,8 @@ static inline void pte_free(struct page * inside the pgd, so has no extra memory associated with it. */ -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one_kernel(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((struct page *)2); }) #define pmd_free(x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() diff -puN include/asm-sh/pgtable-2level.h~highpmd include/asm-sh/pgtable-2level.h --- 25/include/asm-sh/pgtable-2level.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-sh/pgtable-2level.h 2003-07-04 22:24:13.000000000 -0700 @@ -48,14 +48,21 @@ static inline void pgd_clear (pgd_t * pg #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -#define pgd_page(pgd) \ +#define __pgd_page(pgd) \ ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { return (pmd_t *) dir; } +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) + #define pte_pfn(x) ((unsigned long)(((x).pte >> PAGE_SHIFT))) #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) diff -puN include/asm-sparc64/pgalloc.h~highpmd include/asm-sparc64/pgalloc.h --- 25/include/asm-sparc64/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-sparc64/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -132,7 +132,7 @@ static __inline__ void free_pgd_slow(pgd #define DCACHE_COLOR(address) 0 #endif -#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, PMD) +#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, page_address(PMD)) static __inline__ pmd_t *pmd_alloc_one_fast(struct mm_struct *mm, unsigned long address) { @@ -153,7 +153,7 @@ static __inline__ pmd_t *pmd_alloc_one_f return (pmd_t *)ret; } -static __inline__ pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +static __inline__ pmd_t *pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; @@ -166,6 +166,15 @@ static __inline__ pmd_t *pmd_alloc_one(s return pmd; } +static inline struct page *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + if (pmd) + return virt_to_page(pmd); + else + return NULL; +} + static __inline__ void free_pmd_fast(pmd_t *pmd) { unsigned long color = DCACHE_COLOR((unsigned long)pmd); @@ -222,7 +231,7 @@ static __inline__ void free_pte_slow(pte #define pte_free_kernel(pte) free_pte_fast(pte) #define pte_free(pte) free_pte_fast(page_address(pte)) -#define pmd_free(pmd) free_pmd_fast(pmd) +#define pmd_free(pmd) free_pmd_fast(page_address(pmd)) #define pgd_free(pgd) free_pgd_fast(pgd) #define pgd_alloc(mm) get_pgd_fast() diff -puN include/asm-sparc64/pgtable.h~highpmd include/asm-sparc64/pgtable.h --- 25/include/asm-sparc64/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-sparc64/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -228,7 +228,8 @@ static inline pte_t pte_modify(pte_t ori (pgd_val(*(pgdp)) = (__pa((unsigned long) (pmdp)) >> 11UL)) #define __pmd_page(pmd) ((unsigned long) __va((pmd_val(pmd)<<11UL))) #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) -#define pgd_page(pgd) ((unsigned long) __va((pgd_val(pgd)<<11UL))) +#define __pgd_page(pgd) ((unsigned long) __va((pgd_val(pgd)<<11UL))) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) #define pte_none(pte) (!pte_val(pte)) #define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) #define pte_clear(pte) (pte_val(*(pte)) = 0UL) @@ -270,8 +271,13 @@ static inline pte_t pte_modify(pte_t ori #define pgd_offset_k(address) pgd_offset(&init_mm, address) /* Find an entry in the second-level page table.. */ -#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ +#define pmd_offset(dir, address) ((pmd_t *)__pgd_page(*(dir)) + \ ((address >> PMD_SHIFT) & (REAL_PTRS_PER_PMD-1))) +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) /* Find an entry in the third-level page table.. */ #define pte_index(dir, address) ((pte_t *) __pmd_page(*(dir)) + \ diff -puN include/asm-sparc/pgalloc.h~highpmd include/asm-sparc/pgalloc.h --- 25/include/asm-sparc/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-sparc/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -38,15 +38,24 @@ BTFIXUPDEF_CALL(void, free_pgd_fast, pgd BTFIXUPDEF_CALL(void, pgd_set, pgd_t *, pmd_t *) #define pgd_set(pgdp,pmdp) BTFIXUP_CALL(pgd_set)(pgdp,pmdp) -#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, PMD) +#define pgd_populate(MM, PGD, PMD) pgd_set(PGD, page_address(PMD)) -BTFIXUPDEF_CALL(pmd_t *, pmd_alloc_one, struct mm_struct *, unsigned long) -#define pmd_alloc_one(mm, address) BTFIXUP_CALL(pmd_alloc_one)(mm, address) +BTFIXUPDEF_CALL(pmd_t *, __pmd_alloc_one, struct mm_struct *, unsigned long) +#define pmd_alloc_one_kernel(mm, address) BTFIXUP_CALL(__pmd_alloc_one)(mm, address) + +static inline struct page *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + if (pmd) + return virt_to_page(pmd); + else + return NULL; +} BTFIXUPDEF_CALL(void, free_pmd_fast, pmd_t *) #define free_pmd_fast(pmd) BTFIXUP_CALL(free_pmd_fast)(pmd) -#define pmd_free(pmd) free_pmd_fast(pmd) +#define pmd_free(pmd) free_pmd_fast(page_address(pmd)) #define __pmd_free_tlb(tlb, pmd) pmd_free(pmd) BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *) diff -puN include/asm-sparc/pgtable.h~highpmd include/asm-sparc/pgtable.h --- 25/include/asm-sparc/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-sparc/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -202,10 +202,11 @@ extern unsigned long empty_zero_page; /* */ BTFIXUPDEF_CALL_CONST(struct page *, pmd_page, pmd_t) -BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page, pgd_t) +BTFIXUPDEF_CALL_CONST(unsigned long, __pgd_page, pgd_t) #define pmd_page(pmd) BTFIXUP_CALL(pmd_page)(pmd) -#define pgd_page(pgd) BTFIXUP_CALL(pgd_page)(pgd) +#define __pgd_page(pgd) BTFIXUP_CALL(__pgd_page)(pgd) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) BTFIXUPDEF_SETHI(none_mask) BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t) @@ -352,6 +353,11 @@ extern __inline__ pte_t pte_modify(pte_t /* Find an entry in the second-level page table.. */ BTFIXUPDEF_CALL(pmd_t *, pmd_offset, pgd_t *, unsigned long) #define pmd_offset(dir,addr) BTFIXUP_CALL(pmd_offset)(dir,addr) +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) /* Find an entry in the third-level page table.. */ BTFIXUPDEF_CALL(pte_t *, pte_offset_kernel, pmd_t *, unsigned long) diff -puN include/asm-um/pgalloc.h~highpmd include/asm-um/pgalloc.h --- 25/include/asm-um/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-um/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -42,7 +42,8 @@ static inline void pte_free(struct page * inside the pgd, so has no extra memory associated with it. */ -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((struct page *)2); }) +#define pmd_alloc_one_kernel(mm, addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() diff -puN include/asm-um/pgtable.h~highpmd include/asm-um/pgtable.h --- 25/include/asm-um/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-um/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -374,6 +374,12 @@ static inline pmd_t * pmd_offset(pgd_t * return (pmd_t *) dir; } +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pgd, addr) do { } while (0) +#define pmd_unmap_nested(pgd, addr) do { } while (0) + /* Find an entry in the third-level page table.. */ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ diff -puN include/asm-v850/pgtable.h~highpmd include/asm-v850/pgtable.h --- 25/include/asm-v850/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-v850/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -13,6 +13,11 @@ typedef pte_t *pte_addr_t; #define pgd_clear(pgdp) ((void)0) #define pmd_offset(a, b) ((void *)0) +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) #define kern_addr_valid(addr) (1) diff -puN include/asm-x86_64/pgalloc.h~highpmd include/asm-x86_64/pgalloc.h --- 25/include/asm-x86_64/pgalloc.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-x86_64/pgalloc.h 2003-07-04 22:24:13.000000000 -0700 @@ -10,7 +10,7 @@ #define pmd_populate_kernel(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) #define pgd_populate(mm, pgd, pmd) \ - set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd))) + set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(page_address(pmd)))) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { @@ -22,18 +22,25 @@ extern __inline__ pmd_t *get_pmd(void) return (pmd_t *)get_zeroed_page(GFP_KERNEL); } -extern __inline__ void pmd_free(pmd_t *pmd) +extern __inline__ void pmd_free(struct page *pmd) { - if ((unsigned long)pmd & (PAGE_SIZE-1)) - BUG(); - free_page((unsigned long)pmd); + __free_page(pmd); } -static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr) +static inline pmd_t *pmd_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } +static inline struct page *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + pmd_t *pmd = pmd_alloc_one_kernel(mm, addr); + if (pmd) + return virt_to_page(pmd); + else + return NULL; +} + static inline pgd_t *pgd_alloc (struct mm_struct *mm) { return (pgd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); diff -puN include/asm-x86_64/pgtable.h~highpmd include/asm-x86_64/pgtable.h --- 25/include/asm-x86_64/pgtable.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/asm-x86_64/pgtable.h 2003-07-04 22:24:13.000000000 -0700 @@ -98,8 +98,9 @@ static inline void set_pml4(pml4_t *dst, pml4_val(*dst) = pml4_val(val); } -#define pgd_page(pgd) \ +#define __pgd_page(pgd) \ ((unsigned long) __va(pgd_val(pgd) & PHYSICAL_PAGE_MASK)) +#define pgd_page(pgd) virt_to_page(__pgd_page(pgd)) #define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte, 0)) #define pte_same(a, b) ((a).pte == (b).pte) @@ -332,8 +333,13 @@ static inline pgd_t *current_pgd_offset_ #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ +#define pmd_offset(dir, address) ((pmd_t *)__pgd_page(*(dir)) + \ pmd_index(address)) +#define pmd_offset_kernel(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map(pgd, addr) pmd_offset(pgd, addr) +#define pmd_offset_map_nested(pgd, addr) pmd_offset(pgd, addr) +#define pmd_unmap(pmd) do { } while (0) +#define pmd_unmap_nested(pmd) do { } while (0) #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) diff -puN include/linux/mm.h~highpmd include/linux/mm.h --- 25/include/linux/mm.h~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/include/linux/mm.h 2003-07-04 22:24:13.000000000 -0700 @@ -426,8 +426,9 @@ extern void invalidate_mmap_range(struct loff_t const holelen); extern int vmtruncate(struct inode * inode, loff_t offset); extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); +pmd_t *FASTCALL(__pmd_alloc_kernel(struct mm_struct *mm, pgd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); -extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); +pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pgd_t *pgd, pmd_t **pmd, unsigned long address)); extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); extern int make_pages_present(unsigned long addr, unsigned long end); @@ -488,12 +489,11 @@ static inline int set_page_dirty(struct * inlining and the symmetry break with pte_alloc_map() that does all * of this out-of-line. */ -static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - if (pgd_none(*pgd)) - return __pmd_alloc(mm, pgd, address); - return pmd_offset(pgd, address); -} +#define pmd_alloc_map(mm, pgd, addr) \ + (pgd_none(*(pgd))? __pmd_alloc(mm,pgd,addr): pmd_offset_map(pgd,addr)) + +#define pmd_alloc_kernel(mm, pgd, addr) \ + (pgd_none(*(pgd))? __pmd_alloc_kernel(mm,pgd,addr): pmd_offset_kernel(pgd,addr)) extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap, diff -puN mm/fremap.c~highpmd mm/fremap.c --- 25/mm/fremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/mm/fremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -67,11 +67,11 @@ int install_page(struct mm_struct *mm, s pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); - pmd = pmd_alloc(mm, pgd, addr); + pmd = pmd_alloc_map(mm, pgd, addr); if (!pmd) goto err_unlock; - pte = pte_alloc_map(mm, pmd, addr); + pte = pte_alloc_map(mm, pgd, &pmd, addr); if (!pte) goto err_unlock; @@ -82,6 +82,7 @@ int install_page(struct mm_struct *mm, s set_pte(pte, mk_pte(page, prot)); pte_chain = page_add_rmap(page, pte, pte_chain); pte_unmap(pte); + pmd_unmap(pmd); if (flush) flush_tlb_page(vma, addr); update_mmu_cache(vma, addr, *pte); diff -puN mm/memory.c~highpmd mm/memory.c --- 25/mm/memory.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/mm/memory.c 2003-07-04 22:24:13.000000000 -0700 @@ -104,6 +104,7 @@ static inline void free_one_pgd(struct m { int j; pmd_t * pmd; + struct page *page; if (pgd_none(*dir)) return; @@ -112,11 +113,13 @@ static inline void free_one_pgd(struct m pgd_clear(dir); return; } - pmd = pmd_offset(dir, 0); + page = pgd_page(*dir); + pmd = pmd_offset_map(dir, 0); pgd_clear(dir); for (j = 0; j < PTRS_PER_PMD ; j++) free_one_pmd(tlb, pmd+j); - pmd_free_tlb(tlb, pmd); + pmd_unmap(pmd); + pmd_free_tlb(tlb, page); } /* @@ -136,30 +139,38 @@ void clear_page_tables(struct mmu_gather } while (--nr); } -pte_t * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) +/* + * error return happens with pmd unmapped + */ +pte_t *pte_alloc_map(struct mm_struct *mm, pgd_t *pgd, pmd_t **pmd, unsigned long addr) { - if (!pmd_present(*pmd)) { + if (!pmd_present(**pmd)) { struct page *new; + pmd_unmap(*pmd); spin_unlock(&mm->page_table_lock); - new = pte_alloc_one(mm, address); + new = pte_alloc_one(mm, addr); spin_lock(&mm->page_table_lock); - if (!new) + if (!new) { + *pmd = NULL; return NULL; + } + + *pmd = pmd_offset_map(pgd, addr); /* * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. */ - if (pmd_present(*pmd)) { + if (pmd_present(**pmd)) { pte_free(new); goto out; } - pgtable_add_rmap(new, mm, address); - pmd_populate(mm, pmd, new); + pgtable_add_rmap(new, mm, addr); + pmd_populate(mm, *pmd, new); } out: - return pte_offset_map(pmd, address); + return pte_offset_map(*pmd, addr); } pte_t * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address) @@ -199,7 +210,7 @@ out: * variable count and make things faster. -jj * * dst->page_table_lock is held on entry and exit, - * but may be dropped within pmd_alloc() and pte_alloc_map(). + * but may be dropped within pmd_alloc_map() and pte_alloc_map(). */ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) @@ -244,11 +255,10 @@ skip_copy_pmd_range: address = (address continue; } - src_pmd = pmd_offset(src_pgd, address); - dst_pmd = pmd_alloc(dst, dst_pgd, address); + dst_pmd = pmd_alloc_map(dst, dst_pgd, address); if (!dst_pmd) goto nomem; - + src_pmd = pmd_offset_map_nested(src_pgd, address); do { pte_t * src_pte, * dst_pte; @@ -261,15 +271,20 @@ skip_copy_pmd_range: address = (address pmd_clear(src_pmd); skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; - if (address >= end) + if (address >= end) { + pmd_unmap(dst_pmd); + pmd_unmap_nested(src_pmd); goto out; + } goto cont_copy_pmd_range; } - dst_pte = pte_alloc_map(dst, dst_pmd, address); + pmd_unmap_nested(src_pmd); + dst_pte = pte_alloc_map(dst, dst_pgd, &dst_pmd, address); if (!dst_pte) goto nomem; spin_lock(&src->page_table_lock); + src_pmd = pmd_offset_map_nested(src_pgd, address); src_pte = pte_offset_map_nested(src_pmd, address); do { pte_t pte = *src_pte; @@ -336,6 +351,8 @@ skip_copy_pte_range: */ pte_unmap_nested(src_pte); pte_unmap(dst_pte); + pmd_unmap_nested(src_pmd); + pmd_unmap(dst_pmd); spin_unlock(&src->page_table_lock); spin_unlock(&dst->page_table_lock); pte_chain = pte_chain_alloc(GFP_KERNEL); @@ -343,12 +360,16 @@ skip_copy_pte_range: if (!pte_chain) goto nomem; spin_lock(&src->page_table_lock); + dst_pmd = pmd_offset_map(dst_pgd, address); + src_pmd = pmd_offset_map_nested(src_pgd, address); dst_pte = pte_offset_map(dst_pmd, address); src_pte = pte_offset_map_nested(src_pmd, address); cont_copy_pte_range_noset: address += PAGE_SIZE; if (address >= end) { + pmd_unmap(dst_pmd); + pmd_unmap_nested(src_pmd); pte_unmap_nested(src_pte); pte_unmap(dst_pte); goto out_unlock; @@ -364,6 +385,8 @@ cont_copy_pmd_range: src_pmd++; dst_pmd++; } while ((unsigned long)src_pmd & PMD_TABLE_MASK); + pmd_unmap_nested(src_pmd-1); + pmd_unmap(dst_pmd-1); } out_unlock: spin_unlock(&src->page_table_lock); @@ -439,7 +462,7 @@ zap_pmd_range(struct mmu_gather *tlb, pg pgd_clear(dir); return; } - pmd = pmd_offset(dir, address); + pmd = pmd_offset_map(dir, address); end = address + size; if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) end = ((address + PGDIR_SIZE) & PGDIR_MASK); @@ -448,6 +471,7 @@ zap_pmd_range(struct mmu_gather *tlb, pg address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); + pmd_unmap(pmd - 1); } void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, @@ -629,20 +653,27 @@ follow_page(struct mm_struct *mm, unsign if (pgd_none(*pgd) || pgd_bad(*pgd)) goto out; - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_map(pgd, address); if (pmd_none(*pmd)) - goto out; - if (pmd_huge(*pmd)) - return follow_huge_pmd(mm, address, pmd, write); - if (pmd_bad(*pmd)) - goto out; + goto out_unmap; + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + goto out_unmap; + } + if (pmd_huge(*pmd)) { + struct page *page = follow_huge_pmd(mm, address, pmd, write); + pmd_unmap(pmd); + return page; + } ptep = pte_offset_map(pmd, address); if (!ptep) - goto out; + goto out_unmap; pte = *ptep; pte_unmap(ptep); + pmd_unmap(pmd); if (pte_present(pte)) { if (!write || (pte_write(pte) && pte_dirty(pte))) { pfn = pte_pfn(pte); @@ -653,6 +684,9 @@ follow_page(struct mm_struct *mm, unsign out: return NULL; +out_unmap: + pmd_unmap(pmd); + goto out; } /* @@ -711,7 +745,7 @@ int get_user_pages(struct task_struct *t pgd = pgd_offset_k(pg); if (!pgd) return i ? : -EFAULT; - pmd = pmd_offset(pgd, pg); + pmd = pmd_offset_kernel(pgd, pg); if (!pmd) return i ? : -EFAULT; pte = pte_offset_kernel(pmd, pg); @@ -803,8 +837,8 @@ static void zeromap_pte_range(pte_t * pt } while (address && (address < end)); } -static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, - unsigned long size, pgprot_t prot) +static inline int zeromap_pmd_range(struct mm_struct *mm, pgd_t *pgd, pmd_t **pmd, + unsigned long address, unsigned long size, pgprot_t prot) { unsigned long end; @@ -813,13 +847,13 @@ static inline int zeromap_pmd_range(stru if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { - pte_t * pte = pte_alloc_map(mm, pmd, address); + pte_t *pte = pte_alloc_map(mm, pgd, pmd, address); if (!pte) return -ENOMEM; zeromap_pte_range(pte, address, end - address, prot); pte_unmap(pte); address = (address + PMD_SIZE) & PMD_MASK; - pmd++; + (*pmd)++; } while (address && (address < end)); return 0; } @@ -839,13 +873,14 @@ int zeromap_page_range(struct vm_area_st spin_lock(&mm->page_table_lock); do { - pmd_t *pmd = pmd_alloc(mm, dir, address); + pmd_t *pmd = pmd_alloc_map(mm, dir, address); error = -ENOMEM; if (!pmd) break; - error = zeromap_pmd_range(mm, pmd, address, end - address, prot); + error = zeromap_pmd_range(mm, dir, &pmd, address, end - address, prot); if (error) break; + pmd_unmap(pmd - 1); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); @@ -880,8 +915,9 @@ static inline void remap_pte_range(pte_t } while (address && (address < end)); } -static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, pgprot_t prot) +static inline int remap_pmd_range(struct mm_struct *mm, pgd_t *pgd, pmd_t **pmd, + unsigned long address, unsigned long size, + unsigned long phys_addr, pgprot_t prot) { unsigned long base, end; @@ -892,13 +928,13 @@ static inline int remap_pmd_range(struct end = PGDIR_SIZE; phys_addr -= address; do { - pte_t * pte = pte_alloc_map(mm, pmd, base + address); + pte_t *pte = pte_alloc_map(mm, pgd, pmd, base + address); if (!pte) return -ENOMEM; remap_pte_range(pte, base + address, end - address, address + phys_addr, prot); pte_unmap(pte); address = (address + PMD_SIZE) & PMD_MASK; - pmd++; + (*pmd)++; } while (address && (address < end)); return 0; } @@ -920,13 +956,14 @@ int remap_page_range(struct vm_area_stru spin_lock(&mm->page_table_lock); do { - pmd_t *pmd = pmd_alloc(mm, dir, from); + pmd_t *pmd = pmd_alloc_map(mm, dir, from); error = -ENOMEM; if (!pmd) break; - error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot); + error = remap_pmd_range(mm, dir, &pmd, from, end - from, phys_addr + from, prot); if (error) break; + pmd_unmap(pmd - 1); from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (from && (from < end)); @@ -996,6 +1033,7 @@ static int do_wp_page(struct mm_struct * * data, but for the moment just pretend this is OOM. */ pte_unmap(page_table); + pmd_unmap(pmd); printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", address); goto oom; @@ -1010,11 +1048,13 @@ static int do_wp_page(struct mm_struct * establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); pte_unmap(page_table); + pmd_unmap(pmd); ret = VM_FAULT_MINOR; goto out; } } pte_unmap(page_table); + pmd_unmap(pmd); /* * Ok, we need to copy. Oh, well.. @@ -1034,6 +1074,7 @@ static int do_wp_page(struct mm_struct * * Re-check the pte - we dropped the lock */ spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, address), address); page_table = pte_offset_map(pmd, address); if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) @@ -1047,6 +1088,7 @@ static int do_wp_page(struct mm_struct * new_page = old_page; } pte_unmap(page_table); + pmd_unmap(pmd); page_cache_release(new_page); page_cache_release(old_page); ret = VM_FAULT_MINOR; @@ -1215,6 +1257,7 @@ static int do_swap_page(struct mm_struct struct pte_chain *pte_chain = NULL; pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); page = lookup_swap_cache(entry); if (!page) { @@ -1226,12 +1269,14 @@ static int do_swap_page(struct mm_struct * we released the page table lock. */ spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, address), address); page_table = pte_offset_map(pmd, address); if (pte_same(*page_table, orig_pte)) ret = VM_FAULT_OOM; else ret = VM_FAULT_MINOR; pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); goto out; } @@ -1254,9 +1299,11 @@ static int do_swap_page(struct mm_struct * released the page table lock. */ spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, address), address); page_table = pte_offset_map(pmd, address); if (!pte_same(*page_table, orig_pte)) { pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); unlock_page(page); page_cache_release(page); @@ -1282,6 +1329,7 @@ static int do_swap_page(struct mm_struct /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); + pmd_unmap(pmd); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); out: @@ -1307,11 +1355,13 @@ do_anonymous_page(struct mm_struct *mm, pte_chain = pte_chain_alloc(GFP_ATOMIC); if (!pte_chain) { pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) goto no_mem; spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, addr), addr); page_table = pte_offset_map(pmd, addr); } @@ -1322,6 +1372,7 @@ do_anonymous_page(struct mm_struct *mm, if (write_access) { /* Allocate our own private page. */ pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); page = alloc_page(GFP_HIGHUSER); @@ -1330,9 +1381,11 @@ do_anonymous_page(struct mm_struct *mm, clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, addr), addr); page_table = pte_offset_map(pmd, addr); if (!pte_none(*page_table)) { + pmd_unmap(pmd); pte_unmap(page_table); page_cache_release(page); spin_unlock(&mm->page_table_lock); @@ -1348,6 +1401,7 @@ do_anonymous_page(struct mm_struct *mm, set_pte(page_table, entry); /* ignores ZERO_PAGE */ pte_chain = page_add_rmap(page, page_table, pte_chain); + pmd_unmap(pmd); pte_unmap(page_table); /* No need to invalidate - it was non-present before */ @@ -1390,6 +1444,7 @@ do_no_page(struct mm_struct *mm, struct return do_anonymous_page(mm, vma, page_table, pmd, write_access, address); pte_unmap(page_table); + pmd_unmap(pmd); mapping = vma->vm_file->f_dentry->d_inode->i_mapping; sequence = atomic_read(&mapping->truncate_count); @@ -1434,6 +1489,7 @@ retry: page_cache_release(new_page); goto retry; } + pmd = pmd_offset_map(pgd_offset(mm, address), address); page_table = pte_offset_map(pmd, address); /* @@ -1456,9 +1512,11 @@ retry: set_pte(page_table, entry); pte_chain = page_add_rmap(new_page, page_table, pte_chain); pte_unmap(page_table); + pmd_unmap(pmd); } else { /* One of our sibling threads was faster, back out. */ pte_unmap(page_table); + pmd_unmap(pmd); page_cache_release(new_page); spin_unlock(&mm->page_table_lock); ret = VM_FAULT_MINOR; @@ -1502,6 +1560,7 @@ static int do_file_page(struct mm_struct pgoff = pte_to_pgoff(*pte); pte_unmap(pte); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0); @@ -1562,6 +1621,7 @@ static inline int handle_pte_fault(struc entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); pte_unmap(pte); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); return VM_FAULT_MINOR; } @@ -1588,10 +1648,10 @@ int handle_mm_fault(struct mm_struct *mm * and the SMP-safe atomic PTE updates. */ spin_lock(&mm->page_table_lock); - pmd = pmd_alloc(mm, pgd, address); + pmd = pmd_alloc_map(mm, pgd, address); if (pmd) { - pte_t * pte = pte_alloc_map(mm, pmd, address); + pte_t *pte = pte_alloc_map(mm, pgd, &pmd, address); if (pte) return handle_pte_fault(mm, vma, address, write_access, pte, pmd); } @@ -1610,10 +1670,33 @@ int handle_mm_fault(struct mm_struct *mm */ pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) { + struct page *page; + + spin_unlock(&mm->page_table_lock); + page = pmd_alloc_one(mm, address); + spin_lock(&mm->page_table_lock); + if (!page) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pgd_present(*pgd)) { + pmd_free(page); + goto out; + } + pgd_populate(mm, pgd, page); +out: + return pmd_offset_map(pgd, address); +} + +pmd_t *__pmd_alloc_kernel(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ pmd_t *new; spin_unlock(&mm->page_table_lock); - new = pmd_alloc_one(mm, address); + new = pmd_alloc_one_kernel(mm, address); spin_lock(&mm->page_table_lock); if (!new) return NULL; @@ -1623,12 +1706,12 @@ pmd_t *__pmd_alloc(struct mm_struct *mm, * entry, as somebody else could have populated it.. */ if (pgd_present(*pgd)) { - pmd_free(new); + pmd_free(virt_to_page(new)); goto out; } - pgd_populate(mm, pgd, new); + pgd_populate(mm, pgd, virt_to_page(new)); out: - return pmd_offset(pgd, address); + return pmd_offset_kernel(pgd, address); } int make_pages_present(unsigned long addr, unsigned long end) @@ -1660,7 +1743,7 @@ struct page * vmalloc_to_page(void * vma pte_t *ptep, pte; if (!pgd_none(*pgd)) { - pmd = pmd_offset(pgd, addr); + pmd = pmd_offset_map(pgd, addr); if (!pmd_none(*pmd)) { preempt_disable(); ptep = pte_offset_map(pmd, addr); @@ -1670,6 +1753,7 @@ struct page * vmalloc_to_page(void * vma pte_unmap(ptep); preempt_enable(); } + pmd_unmap(pmd); } return page; } diff -puN mm/mprotect.c~highpmd mm/mprotect.c --- 25/mm/mprotect.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/mm/mprotect.c 2003-07-04 22:24:13.000000000 -0700 @@ -73,7 +73,7 @@ change_pmd_range(pgd_t *pgd, unsigned lo pgd_clear(pgd); return; } - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_map(pgd, address); address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) @@ -83,6 +83,7 @@ change_pmd_range(pgd_t *pgd, unsigned lo address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); + pmd_unmap(pmd - 1); } static void diff -puN mm/mremap.c~highpmd mm/mremap.c --- 25/mm/mremap.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/mm/mremap.c 2003-07-04 22:24:13.000000000 -0700 @@ -38,7 +38,7 @@ static pte_t *get_one_pte_map_nested(str goto end; } - pmd = pmd_offset(pgd, addr); + pmd = pmd_offset_map_nested(pgd, addr); if (pmd_none(*pmd)) goto end; if (pmd_bad(*pmd)) { @@ -53,6 +53,7 @@ static pte_t *get_one_pte_map_nested(str pte = NULL; } end: + pmd_unmap_nested(pmd); return pte; } @@ -61,12 +62,15 @@ static inline int page_table_present(str { pgd_t *pgd; pmd_t *pmd; + int ret; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd)) return 0; - pmd = pmd_offset(pgd, addr); - return pmd_present(*pmd); + pmd = pmd_offset_map(pgd, addr); + ret = pmd_present(*pmd); + pmd_unmap(pmd); + return ret != 0; } #else #define page_table_present(mm, addr) (1) @@ -74,12 +78,15 @@ static inline int page_table_present(str static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr) { + pgd_t *pgd; pmd_t *pmd; pte_t *pte = NULL; - pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr); + pgd = pgd_offset(mm, addr); + pmd = pmd_alloc_map(mm, pgd, addr); if (pmd) - pte = pte_alloc_map(mm, pmd, addr); + pte = pte_alloc_map(mm, pgd, &pmd, addr); + pmd_unmap(pmd); return pte; } diff -puN mm/msync.c~highpmd mm/msync.c --- 25/mm/msync.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/mm/msync.c 2003-07-04 22:24:13.000000000 -0700 @@ -82,7 +82,7 @@ static inline int filemap_sync_pmd_range pgd_clear(pgd); return 0; } - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_map(pgd, address); if ((address & PGDIR_MASK) != (end & PGDIR_MASK)) end = (address & PGDIR_MASK) + PGDIR_SIZE; error = 0; @@ -91,6 +91,7 @@ static inline int filemap_sync_pmd_range address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); + pmd_unmap(pmd - 1); return error; } diff -puN mm/slab.c~highpmd mm/slab.c --- 25/mm/slab.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/mm/slab.c 2003-07-04 22:24:13.000000000 -0700 @@ -2717,7 +2717,7 @@ void ptrinfo(unsigned long addr) printk("No pgd.\n"); break; } - pmd = pmd_offset(pgd, addr); + pmd = pmd_offset_kernel(pgd, addr); if (pmd_none(*pmd)) { printk("No pmd.\n"); break; diff -puN mm/swapfile.c~highpmd mm/swapfile.c --- 25/mm/swapfile.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/mm/swapfile.c 2003-07-04 22:24:13.000000000 -0700 @@ -448,7 +448,7 @@ static int unuse_pgd(struct vm_area_stru pgd_clear(dir); return 0; } - pmd = pmd_offset(dir, address); + pmd = pmd_offset_map(dir, address); offset = address & PGDIR_MASK; address &= ~PGDIR_MASK; end = address + size; @@ -463,6 +463,7 @@ static int unuse_pgd(struct vm_area_stru address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); + pmd_unmap(pmd - 1); return 0; } diff -puN mm/vmalloc.c~highpmd mm/vmalloc.c --- 25/mm/vmalloc.c~highpmd 2003-07-04 22:24:13.000000000 -0700 +++ 25-akpm/mm/vmalloc.c 2003-07-04 22:24:13.000000000 -0700 @@ -70,7 +70,7 @@ static void unmap_area_pmd(pgd_t *dir, u return; } - pmd = pmd_offset(dir, address); + pmd = pmd_offset_kernel(dir, address); address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) @@ -159,7 +159,7 @@ int map_vm_area(struct vm_struct *area, dir = pgd_offset_k(address); spin_lock(&init_mm.page_table_lock); do { - pmd_t *pmd = pmd_alloc(&init_mm, dir, address); + pmd_t *pmd = pmd_alloc_kernel(&init_mm, dir, address); if (!pmd) { err = -ENOMEM; break; _