From: Andi Kleen IA64 suport for 4level pagetables. Using 3 levels tested/works. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton --- 25-akpm/arch/ia64/mm/fault.c | 2 - 25-akpm/arch/ia64/mm/hugetlbpage.c | 36 +++++++++++++++++++-------------- 25-akpm/arch/ia64/mm/init.c | 18 +++++++++++++--- 25-akpm/include/asm-ia64/mmu_context.h | 2 - 25-akpm/include/asm-ia64/page.h | 4 +++ 25-akpm/include/asm-ia64/pgalloc.h | 16 +------------- 25-akpm/include/asm-ia64/pgtable.h | 21 +++++++------------ 25-akpm/include/asm-ia64/tlb.h | 6 +++++ 8 files changed, 58 insertions(+), 47 deletions(-) diff -puN arch/ia64/mm/fault.c~4level-ia64-support arch/ia64/mm/fault.c --- 25/arch/ia64/mm/fault.c~4level-ia64-support 2004-11-30 01:08:17.908865344 -0800 +++ 25-akpm/arch/ia64/mm/fault.c 2004-11-30 01:08:17.921863368 -0800 @@ -54,7 +54,7 @@ mapped_kernel_page_is_present (unsigned pmd_t *pmd; pte_t *ptep, pte; - pgd = pgd_offset_k(address); + pgd = pml4_pgd_offset_k(pml4_offset_k(address), address); if (pgd_none(*pgd) || pgd_bad(*pgd)) return 0; diff -puN arch/ia64/mm/hugetlbpage.c~4level-ia64-support arch/ia64/mm/hugetlbpage.c --- 25/arch/ia64/mm/hugetlbpage.c~4level-ia64-support 2004-11-30 01:08:17.909865192 -0800 +++ 25-akpm/arch/ia64/mm/hugetlbpage.c 2004-11-30 01:08:17.922863216 -0800 @@ -28,11 +28,13 @@ static pte_t * huge_pte_alloc (struct mm_struct *mm, unsigned long addr) { unsigned long taddr = htlbpage_to_page(addr); + pml4_t *pml4; pgd_t *pgd; pmd_t *pmd; pte_t *pte = NULL; - pgd = pgd_offset(mm, taddr); + pml4 = pml4_offset(mm, taddr); + pgd = pml4_pgd_offset(pml4, taddr); pmd = pmd_alloc(mm, pgd, taddr); if (pmd) pte = pte_alloc_map(mm, pmd, taddr); @@ -43,11 +45,13 @@ static pte_t * huge_pte_offset (struct mm_struct *mm, unsigned long addr) { unsigned long taddr = htlbpage_to_page(addr); + pml4_t *pml4; pgd_t *pgd; pmd_t *pmd; pte_t *pte = NULL; - pgd = pgd_offset(mm, taddr); + pml4 = pml4_offset(mm, taddr); + pgd = pml4_pgd_offset(pml4, taddr); if (pgd_present(*pgd)) { pmd = pmd_offset(pgd, taddr); if (pmd_present(*pmd)) @@ -187,7 +191,7 @@ void hugetlb_free_pgtables(struct mmu_ga { unsigned long first = start & HUGETLB_PGDIR_MASK; unsigned long last = end + HUGETLB_PGDIR_SIZE - 1; - unsigned long start_index, end_index; + unsigned long start_pml4_index, start_pgd_index; struct mm_struct *mm = tlb->mm; if (!prev) { @@ -211,23 +215,25 @@ void hugetlb_free_pgtables(struct mmu_ga if (last > next->vm_start) last = next->vm_start; } - if (prev->vm_end > first) + if (prev->vm_end > first) { first = prev->vm_end + HUGETLB_PGDIR_SIZE - 1; + /* mm version cheecks for TASK_SIZE here. Needed too? -AK */ + } break; } no_mmaps: - if (last < first) /* for arches with discontiguous pgd indices */ + if (last < first) /* for arches with discontiguous indices */ return; - /* - * If the PGD bits are not consecutive in the virtual address, the - * old method of shifting the VA >> by PGDIR_SHIFT doesn't work. - */ - - start_index = pgd_index(htlbpage_to_page(first)); - end_index = pgd_index(htlbpage_to_page(last)); - - if (end_index > start_index) { - clear_page_tables(tlb, start_index, end_index - start_index); + start_pml4_index = pml4_index(htlbpage_to_page(first)); + start_pgd_index = pgd_index(htlbpage_to_page(first)); + if (start_pml4_index == 0 && start_pgd_index < FIRST_USER_PGD_NR) { + start_pgd_index = FIRST_USER_PGD_NR; + first = start_pgd_index * PGDIR_SIZE; + } + if (pml4_index(htlbpage_to_page(last)) > start_pml4_index || + pgd_index(htlbpage_to_page(last)) > start_pgd_index) { + clear_page_range(tlb, first, last); + flush_tlb_pgtables(mm, first & PML4_MASK, last & PML4_MASK); } } diff -puN arch/ia64/mm/init.c~4level-ia64-support arch/ia64/mm/init.c --- 25/arch/ia64/mm/init.c~4level-ia64-support 2004-11-30 01:08:17.911864888 -0800 +++ 25-akpm/arch/ia64/mm/init.c 2004-11-30 01:08:17.923863064 -0800 @@ -244,8 +244,7 @@ put_kernel_page (struct page *page, unsi printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n", page_address(page)); - pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ - + pgd = pml4_pgd_offset_k(pml4_offset_k(address), address); spin_lock(&init_mm.page_table_lock); { pmd = pmd_alloc(&init_mm, pgd, address); @@ -392,7 +391,7 @@ create_mem_map_page_table (u64 start, u6 node = paddr_to_nid(__pa(start)); for (address = start_page; address < end_page; address += PAGE_SIZE) { - pgd = pgd_offset_k(address); + pgd = pml4_pgd_offset_k(pml4_offset_k(address), address); if (pgd_none(*pgd)) pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); pmd = pmd_offset(pgd, address); @@ -602,3 +601,16 @@ mem_init (void) ia32_mem_init(); #endif } + +pgd_t *__pgd_alloc (struct mm_struct *mm, pml4_t *dummy, unsigned long address) +{ + /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */ + pgd_t *pgd = pgd_alloc_one_fast(mm); + + if (unlikely(pgd == NULL)) { + pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + if (likely(pgd != NULL)) + clear_page(pgd); + } + return pgd; +} diff -puN include/asm-ia64/mmu_context.h~4level-ia64-support include/asm-ia64/mmu_context.h --- 25/include/asm-ia64/mmu_context.h~4level-ia64-support 2004-11-30 01:08:17.912864736 -0800 +++ 25-akpm/include/asm-ia64/mmu_context.h 2004-11-30 01:08:17.923863064 -0800 @@ -160,7 +160,7 @@ activate_mm (struct mm_struct *prev, str * We may get interrupts here, but that's OK because interrupt handlers cannot * touch user-space. */ - ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); + ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pml4)); activate_context(next); } diff -puN include/asm-ia64/page.h~4level-ia64-support include/asm-ia64/page.h --- 25/include/asm-ia64/page.h~4level-ia64-support 2004-11-30 01:08:17.914864432 -0800 +++ 25-akpm/include/asm-ia64/page.h 2004-11-30 01:08:17.924862912 -0800 @@ -146,6 +146,7 @@ get_order (unsigned long size) } # endif /* __KERNEL__ */ + #endif /* !__ASSEMBLY__ */ #ifdef STRICT_MM_TYPECHECKS @@ -193,4 +194,7 @@ get_order (unsigned long size) (((current->personality & READ_IMPLIES_EXEC) != 0) \ ? VM_EXEC : 0)) + +#include + #endif /* _ASM_IA64_PAGE_H */ diff -puN include/asm-ia64/pgalloc.h~4level-ia64-support include/asm-ia64/pgalloc.h --- 25/include/asm-ia64/pgalloc.h~4level-ia64-support 2004-11-30 01:08:17.915864280 -0800 +++ 25-akpm/include/asm-ia64/pgalloc.h 2004-11-30 01:08:17.924862912 -0800 @@ -54,20 +54,6 @@ pgd_alloc_one_fast (struct mm_struct *mm return (pgd_t *) ret; } -static inline pgd_t* -pgd_alloc (struct mm_struct *mm) -{ - /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */ - pgd_t *pgd = pgd_alloc_one_fast(mm); - - if (unlikely(pgd == NULL)) { - pgd = (pgd_t *)__get_free_page(GFP_KERNEL); - if (likely(pgd != NULL)) - clear_page(pgd); - } - return pgd; -} - static inline void pgd_free (pgd_t *pgd) { @@ -174,4 +160,6 @@ pte_free_kernel (pte_t *pte) extern void check_pgt_cache (void); +#include + #endif /* _ASM_IA64_PGALLOC_H */ diff -puN include/asm-ia64/pgtable.h~4level-ia64-support include/asm-ia64/pgtable.h --- 25/include/asm-ia64/pgtable.h~4level-ia64-support 2004-11-30 01:08:17.917863976 -0800 +++ 25-akpm/include/asm-ia64/pgtable.h 2004-11-30 01:08:17.925862760 -0800 @@ -92,7 +92,7 @@ #define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PTRS_PER_PGD (__IA64_UL(1) << (PAGE_SHIFT-3)) -#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ +#define USER_PTRS_IN_LAST_PML4 (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ #define FIRST_USER_PGD_NR 0 /* @@ -308,23 +308,16 @@ pgd_index (unsigned long address) return (region << (PAGE_SHIFT - 6)) | l1index; } -/* The offset in the 1-level directory is given by the 3 region bits - (61..63) and the level-1 bits. */ -static inline pgd_t* -pgd_offset (struct mm_struct *mm, unsigned long address) -{ - return mm->pgd + pgd_index(address); -} +/* In the kernel's mapped region we have a full 43 bit space available + and completely ignore the region number (since we know its in region + number 5). */ +#define pgd_index_k(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) -/* In the kernel's mapped region we completely ignore the region number - (since we know it's in region number 5). */ -#define pgd_offset_k(addr) \ - (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) /* Look up a pgd entry in the gate area. On IA-64, the gate-area resides in the kernel-mapped segment, hence we use pgd_offset_k() here. */ -#define pgd_offset_gate(mm, addr) pgd_offset_k(addr) +#define pml4_offset_gate(mm, addr) pml4_pgd_offset_k(pml4_offset_k(addr),addr) /* Find an entry in the second-level page table.. */ #define pmd_offset(dir,addr) \ @@ -562,4 +555,6 @@ do { \ #define __HAVE_ARCH_PGD_OFFSET_GATE #include +#include + #endif /* _ASM_IA64_PGTABLE_H */ diff -puN include/asm-ia64/tlb.h~4level-ia64-support include/asm-ia64/tlb.h --- 25/include/asm-ia64/tlb.h~4level-ia64-support 2004-11-30 01:08:17.918863824 -0800 +++ 25-akpm/include/asm-ia64/tlb.h 2004-11-30 01:08:17.925862760 -0800 @@ -236,4 +236,10 @@ do { \ __pmd_free_tlb(tlb, ptep); \ } while (0) +#define pgd_free_tlb(tlb, pgdp) \ +do { \ + tlb->need_flush = 1; \ + __pgd_free_tlb(tlb, pgdp); \ +} while (0) + #endif /* _ASM_IA64_TLB_H */ _