Signed-off-by: Andrea Arcangeli diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -427,6 +427,24 @@ config NR_CPUS This is purely to save memory - each supported CPU requires memory in the static kernel configuration. +config PAGE_SHIFT + int "Software PAGE_SHIFT" + range 12 23 + default "12" + help + The PAGE_SIZE is defined as 2^PAGE_SHIFT. + The valid range of the PAGE_SHIFT is between HARD_PAGE_SHIFT and + HARD_PAGE_SHIFT+HARD_MAX_ORDER (12 + 11 = 23). The larger + PAGE_SHIFT will be, the faster the kernel should be, + but the more memory will be wasted. For example with + a PAGE_SHIFT of 12, if you read a 1 byte large file, 4k + of pagecache will be allocated, while with a PAGE_SHIFT + of 23 8MBytes of pagecache will be allocated even if only 1 byte + of data is stored in the 8MByte softpage. + + Low memory systems should use the default shift of 12. + If unsure use the default shift of 12. + config HOTPLUG_CPU bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" depends on SMP && HOTPLUG && EXPERIMENTAL diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -206,7 +206,7 @@ elf_core_copy_task_xfpregs(struct task_s extern int force_personality32; -#define ELF_EXEC_PAGESIZE PAGE_SIZE +#define ELF_EXEC_PAGESIZE HARD_PAGE_SIZE #define ELF_HWCAP (boot_cpu_data.x86_capability[0]) #define ELF_PLATFORM ("i686") #define SET_PERSONALITY(ex, ibcs2) \ @@ -294,7 +294,7 @@ int ia32_setup_arg_pages(struct linux_bi struct mm_struct *mm = current->mm; int i, ret; - stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE; + stack_base = stack_top - MAX_ARG_PAGES * HARD_PAGE_SIZE; mm->arg_start = bprm->p + stack_base; bprm->p += stack_base; @@ -309,7 +309,7 @@ int ia32_setup_arg_pages(struct linux_bi down_write(&mm->mmap_sem); { mpnt->vm_mm = mm; - mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; + mpnt->vm_start = HARD_PAGE_MASK & (unsigned long) bprm->p; mpnt->vm_end = stack_top; if (executable_stack == EXSTACK_ENABLE_X) mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; @@ -333,7 +333,7 @@ int ia32_setup_arg_pages(struct linux_bi bprm->page[i] = NULL; install_arg_page(mpnt, page, stack_base); } - stack_base += PAGE_SIZE; + stack_base += HARD_PAGE_SIZE; } up_write(&mm->mmap_sem); diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -243,7 +243,7 @@ sys32_mmap(struct mmap_arg_struct __user mm = current->mm; down_write(&mm->mmap_sem); - retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT); + retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>HARD_PAGE_SHIFT); if (file) fput(file); diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -699,7 +699,7 @@ void __init init_apic_mappings(void) * one for the IO-APIC. */ if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + apic_phys = (unsigned long) alloc_bootmem_pages(HARD_PAGE_SIZE); apic_phys = __pa(apic_phys); } else apic_phys = mp_lapic_addr; @@ -710,7 +710,7 @@ void __init init_apic_mappings(void) /* Put local APIC into the resource map. */ lapic_resource.start = apic_phys; - lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + lapic_resource.end = lapic_resource.start + HARD_PAGE_SIZE - 1; insert_resource(&iomem_resource, &lapic_resource); /* @@ -729,7 +729,7 @@ void __init init_apic_mappings(void) if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; } else { - ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long) alloc_bootmem_pages(HARD_PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -278,10 +278,10 @@ early_idt_ripmsg: early_idt_ripmsg: .asciz "RIP %s\n" -.balign PAGE_SIZE +.balign HARD_PAGE_SIZE #define NEXT_PAGE(name) \ - .balign PAGE_SIZE; \ + .balign HARD_PAGE_SIZE; \ ENTRY(name) /* Automate the creation of 1 to 1 mapping pmd entries */ @@ -361,7 +361,7 @@ ENTRY(phys_base) */ .section .data.page_aligned, "aw" - .align PAGE_SIZE + .align HARD_PAGE_SIZE /* The TLS descriptors are currently at a different place compared to i386. Hopefully nobody expects them at a fixed place (Wine?) */ @@ -385,7 +385,7 @@ gdt_end: /* GDTs of other CPUs are now dynamically allocated */ /* zero the remaining page */ - .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 + .fill HARD_PAGE_SIZE / 8 - GDT_ENTRIES,8,0 .section .bss, "aw", @nobits .align L1_CACHE_BYTES @@ -393,6 +393,6 @@ ENTRY(idt_table) .skip 256 * 16 .section .bss.page_aligned, "aw", @nobits - .align PAGE_SIZE + .align HARD_PAGE_SIZE ENTRY(empty_zero_page) - .skip PAGE_SIZE + .skip HARD_PAGE_SIZE diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -125,7 +125,7 @@ static __attribute_const__ struct io_api static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mpc_apicaddr & ~HARD_PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) diff --git a/arch/x86_64/kernel/ldt.c b/arch/x86_64/kernel/ldt.c --- a/arch/x86_64/kernel/ldt.c +++ b/arch/x86_64/kernel/ldt.c @@ -40,7 +40,7 @@ static int alloc_ldt(mm_context_t *pc, u return 0; oldsize = pc->size; mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) + if (mincount*LDT_ENTRY_SIZE > HARD_PAGE_SIZE) newldt = vmalloc(mincount*LDT_ENTRY_SIZE); else newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); @@ -72,7 +72,7 @@ static int alloc_ldt(mm_context_t *pc, u #endif } if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) + if (oldsize*LDT_ENTRY_SIZE > HARD_PAGE_SIZE) vfree(oldldt); else kfree(oldldt); @@ -116,7 +116,7 @@ void destroy_context(struct mm_struct *m void destroy_context(struct mm_struct *mm) { if (mm->context.size) { - if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) + if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > HARD_PAGE_SIZE) vfree(mm->context.ldt); else kfree(mm->context.ldt); diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -15,7 +15,7 @@ #include #include -#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) +#define PAGE_ALIGNED __attribute__ ((__aligned__(HARD_PAGE_SIZE))) static u64 kexec_pgd[512] PAGE_ALIGNED; static u64 kexec_pud0[512] PAGE_ALIGNED; static u64 kexec_pmd0[512] PAGE_ALIGNED; @@ -28,7 +28,7 @@ static void init_level2_page(pmd_t *leve { unsigned long end_addr; - addr &= PAGE_MASK; + addr &= HARD_PAGE_MASK; end_addr = addr + PUD_SIZE; while (addr < end_addr) { set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); @@ -43,7 +43,7 @@ static int init_level3_page(struct kimag int result; result = 0; - addr &= PAGE_MASK; + addr &= HARD_PAGE_MASK; end_addr = addr + PGDIR_SIZE; while ((addr < last_addr) && (addr < end_addr)) { struct page *page; @@ -76,7 +76,7 @@ static int init_level4_page(struct kimag int result; result = 0; - addr &= PAGE_MASK; + addr &= HARD_PAGE_MASK; end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); while ((addr < last_addr) && (addr < end_addr)) { struct page *page; @@ -186,8 +186,8 @@ NORET_TYPE void machine_kexec(struct kim /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); - control_page = page_address(image->control_code_page) + PAGE_SIZE; - memcpy(control_page, relocate_kernel, PAGE_SIZE); + control_page = page_address(image->control_code_page) + HARD_PAGE_SIZE; + memcpy(control_page, relocate_kernel, HARD_PAGE_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c --- a/arch/x86_64/kernel/module.c +++ b/arch/x86_64/kernel/module.c @@ -45,7 +45,7 @@ void *module_alloc(unsigned long size) if (!size) return NULL; - size = PAGE_ALIGN(size); + size = HARD_PAGE_ALIGN(size); if (size > MODULES_LEN) return NULL; diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -539,9 +539,9 @@ static int __init smp_scan_config (unsig || (mpf->mpf_specification == 4)) ) { smp_found_config = 1; - reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); + reserve_bootmem_generic(virt_to_phys(mpf), HARD_PAGE_SIZE); if (mpf->mpf_physptr) - reserve_bootmem_generic(mpf->mpf_physptr, PAGE_SIZE); + reserve_bootmem_generic(mpf->mpf_physptr, HARD_PAGE_SIZE); mpf_found = mpf; return 1; } diff --git a/arch/x86_64/kernel/relocate_kernel.S b/arch/x86_64/kernel/relocate_kernel.S --- a/arch/x86_64/kernel/relocate_kernel.S +++ b/arch/x86_64/kernel/relocate_kernel.S @@ -15,7 +15,7 @@ */ #define PTR(x) (x << 3) -#define PAGE_ALIGNED (1 << PAGE_SHIFT) +#define PAGE_ALIGNED (1 << HARD_PAGE_SHIFT) #define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ .text diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -250,7 +250,7 @@ void __cpuinit cpu_init (void) panic("Cannot allocate exception stack %ld %d\n", v, cpu); } - estacks += PAGE_SIZE << order[v]; + estacks += HARD_PAGE_SIZE << order[v]; orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; } diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c @@ -43,7 +43,7 @@ asmlinkage long sys_mmap(unsigned long a struct file * file; error = -EINVAL; - if (off & ~PAGE_MASK) + if (off & ~HARD_PAGE_MASK) goto out; error = -EBADF; @@ -55,7 +55,7 @@ asmlinkage long sys_mmap(unsigned long a goto out; } down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT); + error = do_mmap_pgoff(file, addr, len, prot, flags, off >> HARD_PAGE_SHIFT); up_write(¤t->mm->mmap_sem); if (file) @@ -101,7 +101,7 @@ arch_get_unmapped_area(struct file *filp return -ENOMEM; if (addr) { - addr = PAGE_ALIGN(addr); + addr = HARD_PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (end - len >= addr && (!vma || addr + len <= vma->vm_start)) diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -687,6 +687,7 @@ asmlinkage void __kprobes do_general_pro "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, regs->rip, regs->rsp, error_code); + for(;;); force_sig(SIGSEGV, tsk); return; diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -52,7 +52,7 @@ SECTIONS RODATA - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ + . = ALIGN(HARD_PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA @@ -61,7 +61,7 @@ SECTIONS _edata = .; /* End of data section */ - . = ALIGN(PAGE_SIZE); + . = ALIGN(HARD_PAGE_SIZE); . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) @@ -123,13 +123,13 @@ SECTIONS *(.data.init_task) }:data.init - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { *(.data.page_aligned) } /* might get freed after init */ - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __smp_alt_begin = .; __smp_alt_instructions = .; .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { @@ -145,10 +145,10 @@ SECTIONS .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { *(.smp_altinstr_replacement) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __smp_alt_end = .; - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; @@ -188,23 +188,23 @@ SECTIONS .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } #ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __initramfs_start = .; .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __per_cpu_start = .; .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __init_end = .; - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __nosave_begin = .; .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __nosave_end = .; __bss_start = .; /* BSS */ diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S --- a/arch/x86_64/lib/copy_page.S +++ b/arch/x86_64/lib/copy_page.S @@ -2,11 +2,12 @@ #include #include +#include ALIGN copy_page_c: CFI_STARTPROC - movl $4096/8,%ecx + movl $PAGE_SIZE/8,%ecx rep movsq ret CFI_ENDPROC @@ -28,7 +29,7 @@ ENTRY(copy_page) movq %r13,2*8(%rsp) CFI_REL_OFFSET r13, 2*8 - movl $(4096/64)-5,%ecx + movl $(PAGE_SIZE/64)-5,%ecx .p2align 4 .Loop64: dec %rcx diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -501,6 +501,7 @@ bad_area_nosemaphore: tsk->pid > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); + for (;;); } tsk->thread.cr2 = address; @@ -541,7 +542,7 @@ no_context: flags = oops_begin(); - if (address < PAGE_SIZE) + if (address < HARD_PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); else printk(KERN_ALERT "Unable to handle kernel paging request"); diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -105,8 +105,8 @@ static __init void *spp_getpage(void) if (after_bootmem) ptr = (void *) get_zeroed_page(GFP_ATOMIC); else - ptr = alloc_bootmem_pages(PAGE_SIZE); - if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) + ptr = alloc_bootmem_pages(HARD_PAGE_SIZE); + if (!ptr || ((unsigned long)ptr & ~HARD_PAGE_MASK)) panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); Dprintk("spp_getpage %p\n", ptr); @@ -146,7 +146,7 @@ static __init void set_pte_phys(unsigned return; } } - new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); + new_pte = pfn_pte(phys >> HARD_PAGE_SHIFT, prot); pte = pte_offset_kernel(pmd, vaddr); if (!pte_none(*pte) && @@ -323,8 +323,8 @@ static void __init find_early_table_spac puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + - round_up(pmds * sizeof(pmd_t), PAGE_SIZE); + tables = round_up(puds * sizeof(pud_t), HARD_PAGE_SIZE) + + round_up(pmds * sizeof(pmd_t), HARD_PAGE_SIZE); /* RED-PEN putting page tables only on node 0 could cause a hotspot and fill up ZONE_DMA. The page tables @@ -528,7 +528,7 @@ void __init mem_init(void) pci_iommu_alloc(); /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); + memset(empty_zero_page, 0, HARD_PAGE_SIZE); reservedpages = 0; @@ -694,7 +694,7 @@ int kern_addr_valid(unsigned long addr) pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) return 0; - return pfn_valid(pte_pfn(*pte)); + return pfn_valid(pte_pfn(*pte) >> (PAGE_SHIFT-HARD_PAGE_SHIFT)); } #ifdef CONFIG_SYSCTL @@ -738,7 +738,7 @@ __initcall(x8664_sysctl_init); static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, - .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << HARD_PAGE_SHIFT), .vm_page_prot = PAGE_READONLY_EXEC, .vm_flags = VM_READ | VM_EXEC }; diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -54,9 +54,9 @@ static struct page *split_large_page(uns address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); - for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { - pbase[i] = pfn_pte(addr >> PAGE_SHIFT, - addr == address ? prot : ref_prot); + for (i = 0; i < (PTRS_PER_PTE<<(PAGE_SHIFT-HARD_PAGE_SHIFT)); i++, addr += HARD_PAGE_SIZE) { + pbase[i] = pfn_pte(addr >> HARD_PAGE_SHIFT, + addr >= address && addr < address+PAGE_SIZE ? prot : ref_prot); } return base; } @@ -116,7 +116,7 @@ static void revert_page(unsigned long ad BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); BUG_ON(pmd_val(*pmd) & _PAGE_PSE); - pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; + pfn = (__pa(address) & LARGE_PAGE_MASK) >> HARD_PAGE_SHIFT; large_pte = pfn_pte(pfn, ref_prot); large_pte = pte_mkhuge(large_pte); set_pte((pte_t *)pmd, large_pte); @@ -129,12 +129,16 @@ __change_page_attr(unsigned long address pte_t *kpte; struct page *kpte_page; pgprot_t ref_prot2; + unsigned long hardpfn; kpte = lookup_address(address); if (!kpte) return 0; kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); if (pgprot_val(prot) != pgprot_val(ref_prot)) { if (!pte_huge(*kpte)) { - set_pte(kpte, pfn_pte(pfn, prot)); + for (hardpfn = pfn << (PAGE_SHIFT-HARD_PAGE_SHIFT); + hardpfn < ((pfn+1) << (PAGE_SHIFT-HARD_PAGE_SHIFT)); hardpfn++) { + set_pte(kpte, pfn_pte(hardpfn, prot)); + } } else { /* * split_large_page will take the reference for this @@ -145,12 +149,18 @@ __change_page_attr(unsigned long address split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; - set_pte(kpte, mk_pte(split, ref_prot2)); +#if LARGE_PAGE_SIZE < PAGE_SIZE +#error "a single set_pte isn't enough" +#endif + set_pte(kpte, mk_pte(split, 0, ref_prot2)); kpte_page = split; } page_private(kpte_page)++; } else if (!pte_huge(*kpte)) { - set_pte(kpte, pfn_pte(pfn, ref_prot)); + for (hardpfn = pfn << (PAGE_SHIFT-HARD_PAGE_SHIFT); + hardpfn < ((pfn+1) << (PAGE_SHIFT-HARD_PAGE_SHIFT)); hardpfn++) { + set_pte(kpte, pfn_pte(hardpfn, ref_prot)); + } BUG_ON(page_private(kpte_page) == 0); page_private(kpte_page)--; } else @@ -190,6 +200,8 @@ int change_page_attr_addr(unsigned long kernel_map = 1; } + BUG_ON(address & ~PAGE_MASK); + down_write(&init_mm.mmap_sem); for (i = 0; i < numpages; i++, address += PAGE_SIZE) { unsigned long pfn = __pa(address) >> PAGE_SHIFT; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -57,10 +57,10 @@ static int elf_core_dump(long signr, str #define elf_core_dump NULL #endif -#if ELF_EXEC_PAGESIZE > PAGE_SIZE +#if ELF_EXEC_PAGESIZE > HARD_PAGE_SIZE #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE #else -#define ELF_MIN_ALIGN PAGE_SIZE +#define ELF_MIN_ALIGN HARD_PAGE_SIZE #endif #ifndef ELF_CORE_EFLAGS @@ -254,8 +254,8 @@ create_elf_tables(struct linux_binprm *b size_t len; if (__put_user((elf_addr_t)p, argv++)) return -EFAULT; - len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) + len = strnlen_user((void __user *)p, HARD_PAGE_SIZE*MAX_ARG_PAGES); + if (!len || len > HARD_PAGE_SIZE*MAX_ARG_PAGES) return 0; p += len; } @@ -266,8 +266,8 @@ create_elf_tables(struct linux_binprm *b size_t len; if (__put_user((elf_addr_t)p, envp++)) return -EFAULT; - len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) + len = strnlen_user((void __user *)p, HARD_PAGE_SIZE*MAX_ARG_PAGES); + if (!len || len > HARD_PAGE_SIZE*MAX_ARG_PAGES) return 0; p += len; } @@ -507,7 +507,7 @@ out: #define INTERPRETER_ELF 2 #ifndef STACK_RND_MASK -#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */ +#define STACK_RND_MASK (0x7ff >> (HARD_PAGE_SHIFT - 12)) /* 8MB of VA */ #endif static unsigned long randomize_stack_top(unsigned long stack_top) @@ -517,12 +517,12 @@ static unsigned long randomize_stack_top if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { random_variable = get_random_int() & STACK_RND_MASK; - random_variable <<= PAGE_SHIFT; + random_variable <<= HARD_PAGE_SHIFT; } #ifdef CONFIG_STACK_GROWSUP - return PAGE_ALIGN(stack_top) + random_variable; + return HARD_PAGE_ALIGN(stack_top) + random_variable; #else - return PAGE_ALIGN(stack_top) - random_variable; + return HARD_PAGE_ALIGN(stack_top) - random_variable; #endif } @@ -1006,7 +1006,7 @@ static int load_elf_binary(struct linux_ Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ down_write(¤t->mm->mmap_sem); - error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, + error = do_mmap(NULL, 0, HARD_PAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); } @@ -1171,8 +1171,8 @@ static int dump_seek(struct file *file, return 0; while (off > 0) { unsigned long n = off; - if (n > PAGE_SIZE) - n = PAGE_SIZE; + if (n > HARD_PAGE_SIZE) + n = HARD_PAGE_SIZE; if (!dump_write(file, buf, n)) return 0; off -= n; @@ -1698,16 +1698,16 @@ static int elf_core_dump(long signr, str for (addr = vma->vm_start; addr < vma->vm_end; - addr += PAGE_SIZE) { + addr += HARD_PAGE_SIZE) { struct page *page; struct vm_area_struct *vma; if (get_user_pages(current, current->mm, addr, 1, 0, 1, &page, &vma) <= 0) { - DUMP_SEEK(PAGE_SIZE); + DUMP_SEEK(HARD_PAGE_SIZE); } else { if (page == ZERO_PAGE(addr)) { - if (!dump_seek(file, PAGE_SIZE)) { + if (!dump_seek(file, HARD_PAGE_SIZE)) { page_cache_release(page); goto end_coredump; } @@ -1716,9 +1716,9 @@ static int elf_core_dump(long signr, str flush_cache_page(vma, addr, page_to_pfn(page)); kaddr = kmap(page); - if ((size += PAGE_SIZE) > limit || + if ((size += HARD_PAGE_SIZE) > limit || !dump_write(file, kaddr, - PAGE_SIZE)) { + HARD_PAGE_SIZE)) { kunmap(page); page_cache_release(page); goto end_coredump; diff --git a/fs/exec.c b/fs/exec.c --- a/fs/exec.c +++ b/fs/exec.c @@ -239,8 +239,8 @@ static int copy_strings(int argc, char _ int offset, bytes_to_copy; struct page *page; - offset = pos % PAGE_SIZE; - i = pos/PAGE_SIZE; + offset = pos % HARD_PAGE_SIZE; + i = pos/HARD_PAGE_SIZE; page = bprm->page[i]; new = 0; if (!page) { @@ -261,12 +261,12 @@ static int copy_strings(int argc, char _ } if (new && offset) memset(kaddr, 0, offset); - bytes_to_copy = PAGE_SIZE - offset; + bytes_to_copy = HARD_PAGE_SIZE - offset; if (bytes_to_copy > len) { bytes_to_copy = len; if (new) memset(kaddr+offset+len, 0, - PAGE_SIZE-offset-len); + HARD_PAGE_SIZE-offset-len); } err = copy_from_user(kaddr+offset, str, bytes_to_copy); if (err) { @@ -328,8 +328,9 @@ void install_arg_page(struct vm_area_str } inc_mm_counter(mm, anon_rss); lru_cache_add_active(page); - set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( - page, vma->vm_page_prot)))); + set_pte_at(mm, address, pte, + pte_mkdirty(pte_mkwrite(mk_pte(page, address, + vma->vm_page_prot)))); page_add_new_anon_rmap(page, vma, address); pte_unmap_unlock(pte, ptl); @@ -369,42 +370,42 @@ int setup_arg_pages(struct linux_binprm } /* Now move them within their pages */ - offset = bprm->p % PAGE_SIZE; + offset = bprm->p % HARD_PAGE_SIZE; to = kmap(bprm->page[0]); for (j = 1; j < i; j++) { - memmove(to, to + offset, PAGE_SIZE - offset); + memmove(to, to + offset, HARD_PAGE_SIZE - offset); from = kmap(bprm->page[j]); - memcpy(to + PAGE_SIZE - offset, from, offset); + memcpy(to + HARD_PAGE_SIZE - offset, from, offset); kunmap(bprm->page[j - 1]); to = from; } - memmove(to, to + offset, PAGE_SIZE - offset); + memmove(to, to + offset, HARD_PAGE_SIZE - offset); kunmap(bprm->page[j - 1]); /* Limit stack size to 1GB */ stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max; if (stack_base > (1 << 30)) stack_base = 1 << 30; - stack_base = PAGE_ALIGN(stack_top - stack_base); + stack_base = HARD_PAGE_ALIGN(stack_top - stack_base); /* Adjust bprm->p to point to the end of the strings. */ - bprm->p = stack_base + PAGE_SIZE * i - offset; + bprm->p = stack_base + HARD_PAGE_SIZE * i - offset; mm->arg_start = stack_base; - arg_size = i << PAGE_SHIFT; + arg_size = i << HARD_PAGE_SHIFT; /* zero pages that were copied above */ while (i < MAX_ARG_PAGES) bprm->page[i++] = NULL; #else - stack_base = arch_align_stack(stack_top - MAX_ARG_PAGES*PAGE_SIZE); - stack_base = PAGE_ALIGN(stack_base); + stack_base = arch_align_stack(stack_top - MAX_ARG_PAGES*HARD_PAGE_SIZE); + stack_base = HARD_PAGE_ALIGN(stack_base); bprm->p += stack_base; mm->arg_start = bprm->p; - arg_size = stack_top - (PAGE_MASK & (unsigned long) mm->arg_start); + arg_size = stack_top - (HARD_PAGE_MASK & (unsigned long) mm->arg_start); #endif - arg_size += EXTRA_STACK_VM_PAGES * PAGE_SIZE; + arg_size += EXTRA_STACK_VM_PAGES * HARD_PAGE_SIZE; if (bprm->loader) bprm->loader += stack_base; @@ -449,7 +450,7 @@ int setup_arg_pages(struct linux_binprm bprm->page[i] = NULL; install_arg_page(mpnt, page, stack_base); } - stack_base += PAGE_SIZE; + stack_base += HARD_PAGE_SIZE; } up_write(&mm->mmap_sem); @@ -1011,8 +1012,8 @@ void remove_arg_zero(struct linux_binprm char *kaddr; struct page *page; - offset = bprm->p & ~PAGE_MASK; - index = bprm->p >> PAGE_SHIFT; + offset = bprm->p & ~HARD_PAGE_MASK; + index = bprm->p >> HARD_PAGE_SHIFT; page = bprm->page[index]; kaddr = kmap_atomic(page, KM_USER0); @@ -1024,12 +1025,12 @@ void remove_arg_zero(struct linux_binprm /* discard that character... */ bprm->p++; offset++; - } while (offset < PAGE_SIZE && ch != '\0'); + } while (offset < HARD_PAGE_SIZE && ch != '\0'); kunmap_atomic(kaddr, KM_USER0); /* free the old page */ - if (offset == PAGE_SIZE) { + if (offset == HARD_PAGE_SIZE) { __free_page(page); bprm->page[index] = NULL; } @@ -1062,7 +1063,7 @@ int search_binary_handler(struct linux_b fput(bprm->file); bprm->file = NULL; - loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); + loader = HARD_PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); file = open_exec("/sbin/loader"); retval = PTR_ERR(file); @@ -1169,7 +1170,7 @@ int do_execve(char * filename, sched_exec(); - bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); + bprm->p = HARD_PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); bprm->file = file; bprm->filename = filename; diff --git a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h --- a/include/asm-x86_64/fixmap.h +++ b/include/asm-x86_64/fixmap.h @@ -33,7 +33,7 @@ enum fixed_addresses { VSYSCALL_LAST_PAGE, - VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, + VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> HARD_PAGE_SHIFT) - 1, VSYSCALL_HPET, FIX_HPET_BASE, FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ @@ -53,15 +53,15 @@ extern void __set_fixmap (enum fixed_add #define set_fixmap_nocache(idx, phys) \ __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) -#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) -#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_TOP (VSYSCALL_END-HARD_PAGE_SIZE) +#define FIXADDR_SIZE (__end_of_fixed_addresses << HARD_PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) /* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) -#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) +#define FIXADDR_USER_END (FIXADDR_USER_START + HARD_PAGE_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << HARD_PAGE_SHIFT)) extern void __this_fixmap_does_not_exist(void); diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -3,11 +3,15 @@ #include -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 +#define PAGE_SHIFT CONFIG_PAGE_SHIFT #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) + +/* PAGE_SHIFT determines the page size */ +#define HARD_PAGE_SHIFT 12 +#define HARD_PAGE_SIZE (_AC(1,UL) << HARD_PAGE_SHIFT) +#define HARD_PAGE_MASK (~(HARD_PAGE_SIZE-1)) +#define PHYSICAL_PAGE_MASK (~(HARD_PAGE_SIZE-1) & __PHYSICAL_MASK) #define THREAD_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) @@ -60,6 +64,7 @@ typedef struct { unsigned long pgd; } pg #define PTE_MASK PHYSICAL_PAGE_MASK typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct { unsigned long hardpfn; } hardpfn_t; extern unsigned long phys_base; @@ -95,6 +100,7 @@ extern unsigned long phys_base; /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) +#define HARD_PAGE_ALIGN(addr) (((addr)+HARD_PAGE_SIZE-1)&HARD_PAGE_MASK) /* See Documentation/x86_64/mm.txt for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 46 diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -28,7 +28,7 @@ extern void clear_kernel_mapping(unsigne * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +extern unsigned long empty_zero_page[HARD_PAGE_SIZE/sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) #endif /* !__ASSEMBLY__ */ @@ -250,12 +250,12 @@ static inline unsigned long pmd_bad(pmd_ #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */ #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pte_pfn(x) ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) +#define pte_pfn(x) ((pte_val(x) & __PHYSICAL_MASK) >> HARD_PAGE_SHIFT) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { pte_t pte; - pte_val(pte) = (page_nr << PAGE_SHIFT); + pte_val(pte) = (page_nr << HARD_PAGE_SHIFT); pte_val(pte) |= pgprot_val(pgprot); pte_val(pte) &= __supported_pte_mask; return pte; @@ -328,7 +328,7 @@ static inline int pmd_large(pmd_t pte) { * Level 4 access. */ #define pgd_page_vaddr(pgd) ((unsigned long) __va((unsigned long)pgd_val(pgd) & PTE_MASK)) -#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)) +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> HARD_PAGE_SHIFT)) #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address)) @@ -338,14 +338,14 @@ static inline int pmd_large(pmd_t pte) { /* PUD - Level3 access */ /* to find an entry in a page-table-directory. */ #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) -#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT)) +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> HARD_PAGE_SHIFT)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address)) #define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT) /* PMD - Level 2 access */ #define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> HARD_PAGE_SHIFT)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) #define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \ @@ -353,17 +353,20 @@ static inline int pmd_large(pmd_t pte) { #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) -#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) - -#define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) -#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE }) +#define pfn_pmd(nr,prot) (__pmd(((nr) << HARD_PAGE_SHIFT) | pgprot_val(prot))) +#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> HARD_PAGE_SHIFT) + +#define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> HARD_PAGE_SHIFT) +#define pgoff_to_pte(off) ((pte_t) { ((off) << HARD_PAGE_SHIFT) | _PAGE_FILE }) #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT /* PTE - Level 1 access. */ /* page, protection -> pte */ -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) +#define hardpfn_offset_to_index(addr) (((addr) & ~PAGE_MASK) >> HARD_PAGE_SHIFT) +#define page_to_hardpfn(page, offset) ((page_to_pfn(page) << (PAGE_SHIFT-HARD_PAGE_SHIFT)) + \ + hardpfn_offset_to_index(offset)) +#define mk_pte(page, offset, pgprot) pfn_pte(page_to_hardpfn((page), (offset)), (pgprot)) #define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) /* Change flags of a PTE */ @@ -376,7 +379,7 @@ static inline pte_t pte_modify(pte_t pte } #define pte_index(address) \ - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + (((address) >> HARD_PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ pte_index(address)) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -68,9 +68,9 @@ extern void reserve_bootmem(unsigned lon #define alloc_bootmem_low(x) \ __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ - __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, HARD_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low(x, PAGE_SIZE, 0) + __alloc_bootmem_low(x, HARD_PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern unsigned long free_all_bootmem(void); @@ -94,9 +94,9 @@ extern void free_bootmem_node(pg_data_t #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ - __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, HARD_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ - __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) + __alloc_bootmem_low_node(pgdat, x, HARD_PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP diff --git a/include/linux/mm.h b/include/linux/mm.h --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1078,10 +1078,10 @@ static inline unsigned long do_mmap(stru unsigned long flag, unsigned long offset) { unsigned long ret = -EINVAL; - if ((offset + PAGE_ALIGN(len)) < offset) + if ((offset + HARD_PAGE_ALIGN(len)) < offset) goto out; - if (!(offset & ~PAGE_MASK)) - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); + if (!(offset & ~HARD_PAGE_MASK)) + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> HARD_PAGE_SHIFT); out: return ret; } @@ -1147,7 +1147,7 @@ static inline struct vm_area_struct * fi static inline unsigned long vma_pages(struct vm_area_struct *vma) { - return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + return (vma->vm_end - vma->vm_start) >> HARD_PAGE_SHIFT; } pgprot_t vm_get_page_prot(unsigned long vm_flags); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -18,10 +18,11 @@ /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_FORCE_MAX_ZONEORDER -#define MAX_ORDER 11 -#else -#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER -#endif +#define HARD_MAX_ORDER 11 +#else +#define HARD_MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER +#endif +#define MAX_ORDER (HARD_MAX_ORDER - (PAGE_SHIFT-HARD_PAGE_SHIFT)) #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) struct free_area { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -148,7 +148,7 @@ static inline pgoff_t linear_page_index( static inline pgoff_t linear_page_index(struct vm_area_struct *vma, unsigned long address) { - pgoff_t pgoff = (address - vma->vm_start) >> PAGE_SHIFT; + pgoff_t pgoff = (address - vma->vm_start) >> HARD_PAGE_SHIFT; pgoff += vma->vm_pgoff; return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); } diff --git a/lib/ioremap.c b/lib/ioremap.c --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -17,7 +17,7 @@ static int ioremap_pte_range(pmd_t *pmd, pte_t *pte; unsigned long pfn; - pfn = phys_addr >> PAGE_SHIFT; + pfn = phys_addr >> HARD_PAGE_SHIFT; pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; @@ -25,7 +25,7 @@ static int ioremap_pte_range(pmd_t *pmd, BUG_ON(!pte_none(*pte)); set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte++, addr += HARD_PAGE_SIZE, addr != end); return 0; } diff --git a/mm/filemap.c b/mm/filemap.c --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1328,6 +1328,10 @@ static int fastcall page_cache_read(stru } while (ret == AOP_TRUNCATED_PAGE); + if (ret) { + printk("page_cache_read ret %d\n", ret); + for(;;); + } return ret; } @@ -1358,12 +1362,18 @@ struct page *filemap_nopage(struct vm_ar unsigned long size, pgoff; int did_readaround = 0, majmin = VM_FAULT_MINOR; - pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + pgoff = ((address-area->vm_start) >> HARD_PAGE_SHIFT) + area->vm_pgoff; + printk("address %lx vm_start %lx vm_pgoff %lu pgoff %lu\n", + address, area->vm_start, area->vm_pgoff, pgoff); + pgoff >>= PAGE_SHIFT-HARD_PAGE_SHIFT; retry_all: size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) + printk("pgoff %ld size %ld\n", pgoff, size); + if (pgoff >= size) { + printk("pgoff %ld size %ld\n", pgoff, size); goto outside_data_content; + } /* If we don't want any read-ahead, don't bother */ if (VM_RandomReadHint(area)) @@ -1445,8 +1455,11 @@ outside_data_content: * An external ptracer can access pages that normally aren't * accessible.. */ - if (area->vm_mm == current->mm) + if (area->vm_mm == current->mm) { + printk("sigbus\n"); + for(;;); return NOPAGE_SIGBUS; + } /* Fall through to the non-read-ahead case */ no_cached_page: /* diff --git a/mm/fremap.c b/mm/fremap.c --- a/mm/fremap.c +++ b/mm/fremap.c @@ -79,7 +79,7 @@ int install_page(struct mm_struct *mm, s inc_mm_counter(mm, file_rss); flush_icache_page(vma, page); - pte_val = mk_pte(page, prot); + pte_val = mk_pte(page, addr, prot); set_pte_at(mm, addr, pte, pte_val); page_add_file_rmap(page); update_mmu_cache(vma, addr, pte_val); diff --git a/mm/hugetlb.c b/mm/hugetlb.c --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -309,10 +309,9 @@ static pte_t make_huge_pte(struct vm_are pte_t entry; if (writable) { - entry = - pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + entry = pte_mkwrite(pte_mkdirty(mk_pte(page, 0, vma->vm_page_prot))); } else { - entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); + entry = pte_wrprotect(mk_pte(page, 0, vma->vm_page_prot)); } entry = pte_mkyoung(entry); entry = pte_mkhuge(entry); diff --git a/mm/memory.c b/mm/memory.c --- a/mm/memory.c +++ b/mm/memory.c @@ -36,6 +36,8 @@ * (Gerhard.Wichert@pdb.siemens.de) * * Aug/Sep 2004 Changed to four level page tables (Andi Kleen) + * + * Jun 2007 - Added CONFIG_PAGE_SHIFT (Andrea Arcangeli) */ #include @@ -379,7 +381,7 @@ static inline int is_cow_mapping(unsigne * and the vm_pgoff will point to the first PFN mapped: thus every * page that is a raw mapping will always honor the rule * - * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) + * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> HARD_PAGE_SHIFT) * * and if that isn't true, the page has been COW'ed (in which case it * _does_ have a "struct page" associated with it even if it is in a @@ -390,7 +392,7 @@ struct page *vm_normal_page(struct vm_ar unsigned long pfn = pte_pfn(pte); if (unlikely(vma->vm_flags & VM_PFNMAP)) { - unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; + unsigned long off = (addr - vma->vm_start) >> HARD_PAGE_SHIFT; if (pfn == vma->vm_pgoff + off) return NULL; if (!is_cow_mapping(vma->vm_flags)) @@ -403,7 +405,7 @@ struct page *vm_normal_page(struct vm_ar * in the meantime we check that we get a valid pfn, * and that the resulting page looks ok. */ - if (unlikely(!pfn_valid(pfn))) { + if (unlikely(!pfn_valid(pfn << (PAGE_SHIFT-HARD_PAGE_SHIFT)))) { print_bad_pte(vma, pte, addr); return NULL; } @@ -1118,7 +1120,7 @@ static int zeromap_pte_range(struct mm_s arch_enter_lazy_mmu_mode(); do { struct page *page = ZERO_PAGE(addr); - pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); + pte_t zero_pte = pte_wrprotect(mk_pte(page, addr, prot)); if (unlikely(!pte_none(*pte))) { err = -EEXIST; @@ -1235,7 +1237,7 @@ static int insert_page(struct mm_struct get_page(page); inc_mm_counter(mm, file_rss); page_add_file_rmap(page); - set_pte_at(mm, addr, pte, mk_pte(page, prot)); + set_pte_at(mm, addr, pte, mk_pte(page, addr, prot)); retval = 0; out_unlock: @@ -1342,7 +1344,7 @@ static int remap_pte_range(struct mm_str BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte++, addr += HARD_PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return 0; @@ -1355,14 +1357,14 @@ static inline int remap_pmd_range(struct pmd_t *pmd; unsigned long next; - pfn -= addr >> PAGE_SHIFT; + pfn -= addr >> HARD_PAGE_SHIFT; pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; do { next = pmd_addr_end(addr, end); if (remap_pte_range(mm, pmd, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) + pfn + (addr >> HARD_PAGE_SHIFT), prot)) return -ENOMEM; } while (pmd++, addr = next, addr != end); return 0; @@ -1375,14 +1377,14 @@ static inline int remap_pud_range(struct pud_t *pud; unsigned long next; - pfn -= addr >> PAGE_SHIFT; + pfn -= addr >> HARD_PAGE_SHIFT; pud = pud_alloc(mm, pgd, addr); if (!pud) return -ENOMEM; do { next = pud_addr_end(addr, end); if (remap_pmd_range(mm, pud, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) + pfn + (addr >> HARD_PAGE_SHIFT), prot)) return -ENOMEM; } while (pud++, addr = next, addr != end); return 0; @@ -1425,6 +1427,7 @@ int remap_pfn_range(struct vm_area_struc * behaviour that some programs depend on. We mark the "original" * un-COW'ed pages by matching them up with "vma->vm_pgoff". */ + pfn <<= PAGE_SHIFT-HARD_PAGE_SHIFT; if (is_cow_mapping(vma->vm_flags)) { if (addr != vma->vm_start || end != vma->vm_end) return -EINVAL; @@ -1434,13 +1437,13 @@ int remap_pfn_range(struct vm_area_struc vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; BUG_ON(addr >= end); - pfn -= addr >> PAGE_SHIFT; + pfn -= addr >> HARD_PAGE_SHIFT; pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); err = remap_pud_range(mm, pgd, addr, next, - pfn + (addr >> PAGE_SHIFT), prot); + pfn + (addr >> HARD_PAGE_SHIFT), prot); if (err) break; } while (pgd++, addr = next, addr != end); @@ -1590,7 +1593,7 @@ static inline void cow_user_page(struct */ if (unlikely(!src)) { void *kaddr = kmap_atomic(dst, KM_USER0); - void __user *uaddr = (void __user *)(va & PAGE_MASK); + void __user *uaddr = (void __user *)(va & HARD_PAGE_MASK); /* * This really shouldn't fail, because the page is there @@ -1598,8 +1601,8 @@ static inline void cow_user_page(struct * in which case we just give up and fill the result with * zeroes. */ - if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) - memset(kaddr, 0, PAGE_SIZE); + if (__copy_from_user_inatomic(kaddr, uaddr, HARD_PAGE_SIZE)) + memset(kaddr, 0, HARD_PAGE_SIZE); kunmap_atomic(kaddr, KM_USER0); flush_dcache_page(dst); return; @@ -1635,6 +1638,7 @@ static int do_wp_page(struct mm_struct * int reuse = 0, ret = VM_FAULT_MINOR; struct page *dirty_page = NULL; + printk("do_wp_page %lx\n", address); old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) goto gotten; @@ -1733,7 +1737,9 @@ gotten: } else inc_mm_counter(mm, anon_rss); flush_cache_page(vma, address, pte_pfn(orig_pte)); - entry = mk_pte(new_page, vma->vm_page_prot); + entry = mk_pte(new_page, + address-vma->vm_start + (vma->vm_pgoff<vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); lazy_mmu_prot_update(entry); /* @@ -1874,7 +1880,7 @@ restart: continue; vba = vma->vm_pgoff; - vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; + vea = vba + ((vma->vm_end - vma->vm_start) >> HARD_PAGE_SHIFT) - 1; /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */ zba = details->first_index; if (zba < vba) @@ -1884,8 +1890,8 @@ restart: zea = vea; if (unmap_mapping_range_vma(vma, - ((zba - vba) << PAGE_SHIFT) + vma->vm_start, - ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, + ((zba - vba) << HARD_PAGE_SHIFT) + vma->vm_start, + ((zea - vba + 1) << HARD_PAGE_SHIFT) + vma->vm_start, details) < 0) goto restart; } @@ -1932,13 +1938,13 @@ void unmap_mapping_range(struct address_ loff_t const holebegin, loff_t const holelen, int even_cows) { struct zap_details details; - pgoff_t hba = holebegin >> PAGE_SHIFT; - pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; + pgoff_t hba = holebegin >> HARD_PAGE_SHIFT; + pgoff_t hlen = (holelen + HARD_PAGE_SIZE - 1) >> HARD_PAGE_SHIFT; /* Check for overflow. */ if (sizeof(holelen) > sizeof(hlen)) { long long holeend = - (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; + (holebegin + holelen + HARD_PAGE_SIZE - 1) >> HARD_PAGE_SHIFT; if (holeend & ~(long long)ULONG_MAX) hlen = ULONG_MAX - hba + 1; } @@ -2002,7 +2008,7 @@ int vmtruncate(struct inode * inode, lof if (IS_SWAPFILE(inode)) goto out_busy; i_size_write(inode, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + unmap_mapping_range(mapping, offset + HARD_PAGE_SIZE - 1, 0, 1); truncate_inode_pages(mapping, offset); goto out_truncate; @@ -2090,7 +2096,7 @@ void swapin_readahead(swp_entry_t entry, /* * Find the next applicable VMA for the NUMA policy. */ - addr += PAGE_SIZE; + addr += HARD_PAGE_SIZE; if (addr == 0) vma = NULL; if (vma) { @@ -2126,6 +2132,7 @@ static int do_swap_page(struct mm_struct pte_t pte; int ret = VM_FAULT_MINOR; + printk("do_swap_page %lx write_access %d\n", address, write_access); if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) goto out; @@ -2176,7 +2183,7 @@ static int do_swap_page(struct mm_struct /* The page isn't present yet, go ahead with the fault. */ inc_mm_counter(mm, anon_rss); - pte = mk_pte(page, vma->vm_page_prot); + pte = mk_pte(page, address, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); write_access = 0; @@ -2225,6 +2232,7 @@ static int do_anonymous_page(struct mm_s spinlock_t *ptl; pte_t entry; +// printk("do_anonymous_page %lx write_access %d\n", address, write_access); if (write_access) { /* Allocate our own private page. */ pte_unmap(page_table); @@ -2235,7 +2243,9 @@ static int do_anonymous_page(struct mm_s if (!page) goto oom; - entry = mk_pte(page, vma->vm_page_prot); + entry = mk_pte(page, + address-vma->vm_start + (vma->vm_pgoff << HARD_PAGE_SHIFT), + vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); page_table = pte_offset_map_lock(mm, pmd, address, &ptl); @@ -2248,7 +2258,7 @@ static int do_anonymous_page(struct mm_s /* Map the ZERO_PAGE - vm_page_prot is readonly */ page = ZERO_PAGE(address); page_cache_get(page); - entry = mk_pte(page, vma->vm_page_prot); + entry = mk_pte(page, 0, vma->vm_page_prot); ptl = pte_lockptr(mm, pmd); spin_lock(ptl); @@ -2299,6 +2309,7 @@ static int do_no_page(struct mm_struct * int anon = 0; struct page *dirty_page = NULL; + printk("do_no_page %lx write_access %d pgoff %lu\n", address, write_access, vma->vm_pgoff); pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); @@ -2308,7 +2319,7 @@ static int do_no_page(struct mm_struct * smp_rmb(); /* serializes i_size against truncate_count */ } retry: - new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); + new_page = vma->vm_ops->nopage(vma, address & HARD_PAGE_MASK, &ret); /* * No smp_rmb is needed here as long as there's a full * spin_lock/unlock sequence inside the ->nopage callback @@ -2317,6 +2328,7 @@ retry: * after the next truncate_count read. */ + printk("new_page %p\n", new_page); /* no page was available -- either SIGBUS, OOM or REFAULT */ if (unlikely(new_page == NOPAGE_SIGBUS)) return VM_FAULT_SIGBUS; @@ -2383,7 +2395,9 @@ retry: /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { flush_icache_page(vma, new_page); - entry = mk_pte(new_page, vma->vm_page_prot); + entry = mk_pte(new_page, + address-vma->vm_start+(vma->vm_pgoff << HARD_PAGE_SHIFT), + vma->vm_page_prot); if (write_access) entry = maybe_mkwrite(pte_mkdirty(entry), vma); set_pte_at(mm, address, page_table, entry); @@ -2414,6 +2428,7 @@ unlock: set_page_dirty_balance(dirty_page); put_page(dirty_page); } + printk("ret %d\n", ret); return ret; oom: page_cache_release(new_page); @@ -2445,11 +2460,12 @@ static noinline int do_no_pfn(struct mm_ unsigned long pfn; int ret = VM_FAULT_MINOR; + printk("do_no_pfn %lx write_access %d\n", address, write_access); pte_unmap(page_table); BUG_ON(!(vma->vm_flags & VM_PFNMAP)); BUG_ON(is_cow_mapping(vma->vm_flags)); - pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); + pfn = vma->vm_ops->nopfn(vma, address & HARD_PAGE_MASK); if (unlikely(pfn == NOPFN_OOM)) return VM_FAULT_OOM; else if (unlikely(pfn == NOPFN_SIGBUS)) @@ -2486,6 +2502,7 @@ static int do_file_page(struct mm_struct pgoff_t pgoff; int err; + printk("do_file_page %lx write_access %d\n", address, write_access); if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) return VM_FAULT_MINOR; @@ -2499,7 +2516,7 @@ static int do_file_page(struct mm_struct /* We can then assume vm->vm_ops && vma->vm_ops->populate */ pgoff = pte_to_pgoff(orig_pte); - err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, + err = vma->vm_ops->populate(vma, address & HARD_PAGE_MASK, HARD_PAGE_SIZE, vma->vm_page_prot, pgoff, 0); if (err == -ENOMEM) return VM_FAULT_OOM; @@ -2594,6 +2611,7 @@ int __handle_mm_fault(struct mm_struct * count_vm_event(PGFAULT); + //printk("fault %lx write_access %d\n", address, write_access); if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, write_access); @@ -2793,9 +2811,9 @@ int access_process_vm(struct task_struct break; bytes = len; - offset = addr & (PAGE_SIZE-1); - if (bytes > PAGE_SIZE-offset) - bytes = PAGE_SIZE-offset; + offset = addr & (HARD_PAGE_SIZE-1); + if (bytes > HARD_PAGE_SIZE-offset) + bytes = HARD_PAGE_SIZE-offset; maddr = kmap(page); if (write) { diff --git a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c +++ b/mm/mmap.c @@ -255,8 +255,8 @@ asmlinkage unsigned long sys_brk(unsigne if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) goto out; - newbrk = PAGE_ALIGN(brk); - oldbrk = PAGE_ALIGN(mm->brk); + newbrk = HARD_PAGE_ALIGN(brk); + oldbrk = HARD_PAGE_ALIGN(mm->brk); if (oldbrk == newbrk) goto set_brk; @@ -268,7 +268,7 @@ asmlinkage unsigned long sys_brk(unsigne } /* Check against existing mmap mappings. */ - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) + if (find_vma_intersection(mm, oldbrk, newbrk+HARD_PAGE_SIZE)) goto out; /* Ok, looks good - let it rip. */ @@ -513,7 +513,7 @@ again: remove_next = 1 + (end > next-> * vma expands, overlapping part of the next: * mprotect case 5 shifting the boundary up. */ - adjust_next = (end - next->vm_start) >> PAGE_SHIFT; + adjust_next = (end - next->vm_start) >> HARD_PAGE_SHIFT; anon_vma = next->anon_vma; importer = vma; } else if (end < vma->vm_end) { @@ -522,7 +522,7 @@ again: remove_next = 1 + (end > next-> * split_vma inserting another: so it must be * mprotect case 4 shifting the boundary down. */ - adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); + adjust_next = - ((vma->vm_end - end) >> HARD_PAGE_SHIFT); anon_vma = next->anon_vma; importer = next; } @@ -583,7 +583,7 @@ again: remove_next = 1 + (end > next-> vma->vm_end = end; vma->vm_pgoff = pgoff; if (adjust_next) { - next->vm_start += adjust_next << PAGE_SHIFT; + next->vm_start += adjust_next << HARD_PAGE_SHIFT; next->vm_pgoff += adjust_next; } @@ -699,7 +699,7 @@ can_vma_merge_after(struct vm_area_struc if (is_mergeable_vma(vma, file, vm_flags) && is_mergeable_anon_vma(anon_vma, vma->anon_vma)) { pgoff_t vm_pglen; - vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + vm_pglen = (vma->vm_end - vma->vm_start) >> HARD_PAGE_SHIFT; if (vma->vm_pgoff + vm_pglen == vm_pgoff) return 1; } @@ -741,7 +741,7 @@ struct vm_area_struct *vma_merge(struct struct anon_vma *anon_vma, struct file *file, pgoff_t pgoff, struct mempolicy *policy) { - pgoff_t pglen = (end - addr) >> PAGE_SHIFT; + pgoff_t pglen = (end - addr) >> HARD_PAGE_SHIFT; struct vm_area_struct *area, *next; /* @@ -833,7 +833,7 @@ struct anon_vma *find_mergeable_anon_vma mpol_equal(vma_policy(vma), vma_policy(near)) && can_vma_merge_before(near, vm_flags, NULL, vma->vm_file, vma->vm_pgoff + - ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))) + ((vma->vm_end - vma->vm_start) >> HARD_PAGE_SHIFT))) return near->anon_vma; try_prev: /* @@ -903,6 +903,7 @@ unsigned long do_mmap_pgoff(struct file int accountable = 1; unsigned long charged = 0, reqprot = prot; + printk("mmap len %ld pgoff %ld\n", len, pgoff); /* * Does the application expect PROT_READ to imply PROT_EXEC? * @@ -921,12 +922,12 @@ unsigned long do_mmap_pgoff(struct file return error; /* Careful about overflows.. */ - len = PAGE_ALIGN(len); + len = HARD_PAGE_ALIGN(len); if (!len || len > TASK_SIZE) return -ENOMEM; /* offset overflow? */ - if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) + if ((pgoff + (len >> HARD_PAGE_SHIFT)) < pgoff) return -EOVERFLOW; /* Too many mappings? */ @@ -937,7 +938,7 @@ unsigned long do_mmap_pgoff(struct file * that it represents a valid section of the address space. */ addr = get_unmapped_area(file, addr, len, pgoff, flags); - if (addr & ~PAGE_MASK) + if (addr & ~HARD_PAGE_MASK) return addr; /* Do simple checking here so the lower-level routines won't have @@ -955,10 +956,10 @@ unsigned long do_mmap_pgoff(struct file /* mlock MCL_FUTURE? */ if (vm_flags & VM_LOCKED) { unsigned long locked, lock_limit; - locked = len >> PAGE_SHIFT; + locked = len >> HARD_PAGE_SHIFT; locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; + lock_limit >>= HARD_PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -1016,7 +1017,7 @@ unsigned long do_mmap_pgoff(struct file /* * Set pgoff according to addr for anon_vma. */ - pgoff = addr >> PAGE_SHIFT; + pgoff = addr >> HARD_PAGE_SHIFT; break; default: return -EINVAL; @@ -1038,7 +1039,7 @@ munmap_back: } /* Check against address space limit. */ - if (!may_expand_vm(mm, len >> PAGE_SHIFT)) + if (!may_expand_vm(mm, len >> HARD_PAGE_SHIFT)) return -ENOMEM; if (accountable && (!(flags & MAP_NORESERVE) || @@ -1144,10 +1145,10 @@ munmap_back: kmem_cache_free(vm_area_cachep, vma); } out: - mm->total_vm += len >> PAGE_SHIFT; - vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); + mm->total_vm += len >> HARD_PAGE_SHIFT; + vm_stat_account(mm, vm_flags, file, len >> HARD_PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + mm->locked_vm += len >> HARD_PAGE_SHIFT; make_pages_present(addr, addr + len); } if (flags & MAP_POPULATE) { @@ -1183,7 +1184,7 @@ EXPORT_SYMBOL(do_mmap_pgoff); * Ugly calling convention alert: * Return value with the low bits set means error value, * ie - * if (ret & ~PAGE_MASK) + * if (ret & ~HARD_PAGE_MASK) * error = ret; * * This function "knows" that -ENOMEM has the bits set. @@ -1204,7 +1205,7 @@ arch_get_unmapped_area(struct file *filp return addr; if (addr) { - addr = PAGE_ALIGN(addr); + addr = HARD_PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -1281,7 +1282,7 @@ arch_get_unmapped_area_topdown(struct fi /* requesting a specific address */ if (addr) { - addr = PAGE_ALIGN(addr); + addr = HARD_PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -1378,7 +1379,7 @@ get_unmapped_area(struct file *file, uns if (addr > TASK_SIZE - len) return -ENOMEM; - if (addr & ~PAGE_MASK) + if (addr & ~HARD_PAGE_MASK) return -EINVAL; return addr; @@ -1483,7 +1484,7 @@ static int acct_stack_growth(struct vm_a unsigned long locked; unsigned long limit; locked = mm->locked_vm + grow; - limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> HARD_PAGE_SHIFT; if (locked > limit && !capable(CAP_IPC_LOCK)) return -ENOMEM; } @@ -1538,8 +1539,8 @@ int expand_upwards(struct vm_area_struct * anon_vma lock to serialize against concurrent expand_stacks. * Also guard against wrapping around to address 0. */ - if (address < PAGE_ALIGN(address+4)) - address = PAGE_ALIGN(address+4); + if (address < HARD_PAGE_ALIGN(address+4)) + address = HARD_PAGE_ALIGN(address+4); else { anon_vma_unlock(vma); return -ENOMEM; @@ -1551,7 +1552,7 @@ int expand_upwards(struct vm_area_struct unsigned long size, grow; size = address - vma->vm_start; - grow = (address - vma->vm_end) >> PAGE_SHIFT; + grow = (address - vma->vm_end) >> HARD_PAGE_SHIFT; error = acct_stack_growth(vma, size, grow); if (!error) @@ -1573,7 +1574,7 @@ find_extend_vma(struct mm_struct *mm, un { struct vm_area_struct *vma, *prev; - addr &= PAGE_MASK; + addr &= HARD_PAGE_MASK; vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; @@ -1605,7 +1606,7 @@ int expand_stack(struct vm_area_struct * * is required to hold the mmap_sem in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. */ - address &= PAGE_MASK; + address &= HARD_PAGE_MASK; error = 0; /* Somebody else might have raced and expanded it already */ @@ -1613,7 +1614,7 @@ int expand_stack(struct vm_area_struct * unsigned long size, grow; size = vma->vm_end - address; - grow = (vma->vm_start - address) >> PAGE_SHIFT; + grow = (vma->vm_start - address) >> HARD_PAGE_SHIFT; error = acct_stack_growth(vma, size, grow); if (!error) { @@ -1631,7 +1632,7 @@ find_extend_vma(struct mm_struct * mm, u struct vm_area_struct * vma; unsigned long start; - addr &= PAGE_MASK; + addr &= HARD_PAGE_MASK; vma = find_vma(mm,addr); if (!vma) return NULL; @@ -1750,7 +1751,7 @@ int split_vma(struct mm_struct * mm, str new->vm_end = addr; else { new->vm_start = addr; - new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); + new->vm_pgoff += ((addr - vma->vm_start) >> HARD_PAGE_SHIFT); } pol = mpol_copy(vma_policy(vma)); @@ -1768,7 +1769,7 @@ int split_vma(struct mm_struct * mm, str if (new_below) vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + - ((addr - new->vm_start) >> PAGE_SHIFT), new); + ((addr - new->vm_start) >> HARD_PAGE_SHIFT), new); else vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); @@ -1785,10 +1786,10 @@ int do_munmap(struct mm_struct *mm, unsi unsigned long end; struct vm_area_struct *vma, *prev, *last; - if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start) + if ((start & ~HARD_PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start) return -EINVAL; - if ((len = PAGE_ALIGN(len)) == 0) + if ((len = HARD_PAGE_ALIGN(len)) == 0) return -EINVAL; /* Find the first overlapping VMA */ @@ -1873,10 +1874,10 @@ unsigned long do_brk(unsigned long addr, struct vm_area_struct * vma, * prev; unsigned long flags; struct rb_node ** rb_link, * rb_parent; - pgoff_t pgoff = addr >> PAGE_SHIFT; + pgoff_t pgoff = addr >> HARD_PAGE_SHIFT; int error; - len = PAGE_ALIGN(len); + len = HARD_PAGE_ALIGN(len); if (!len) return addr; @@ -1897,10 +1898,10 @@ unsigned long do_brk(unsigned long addr, */ if (mm->def_flags & VM_LOCKED) { unsigned long locked, lock_limit; - locked = len >> PAGE_SHIFT; + locked = len >> HARD_PAGE_SHIFT; locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; + lock_limit >>= HARD_PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -1923,13 +1924,13 @@ unsigned long do_brk(unsigned long addr, } /* Check against address space limits *after* clearing old maps... */ - if (!may_expand_vm(mm, len >> PAGE_SHIFT)) + if (!may_expand_vm(mm, len >> HARD_PAGE_SHIFT)) return -ENOMEM; if (mm->map_count > sysctl_max_map_count) return -ENOMEM; - if (security_vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> HARD_PAGE_SHIFT)) return -ENOMEM; /* Can we just expand an old private anonymous mapping? */ @@ -1942,7 +1943,7 @@ unsigned long do_brk(unsigned long addr, */ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { - vm_unacct_memory(len >> PAGE_SHIFT); + vm_unacct_memory(len >> HARD_PAGE_SHIFT); return -ENOMEM; } @@ -1955,9 +1956,9 @@ unsigned long do_brk(unsigned long addr, (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; vma_link(mm, vma, prev, rb_link, rb_parent); out: - mm->total_vm += len >> PAGE_SHIFT; + mm->total_vm += len >> HARD_PAGE_SHIFT; if (flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + mm->locked_vm += len >> HARD_PAGE_SHIFT; make_pages_present(addr, addr + len); } return addr; @@ -2019,7 +2020,7 @@ int insert_vm_struct(struct mm_struct * */ if (!vma->vm_file) { BUG_ON(vma->anon_vma); - vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; + vma->vm_pgoff = vma->vm_start >> HARD_PAGE_SHIFT; } __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent); if (__vma && __vma->vm_start < vma->vm_end) @@ -2050,7 +2051,7 @@ struct vm_area_struct *copy_vma(struct v * to match new location, to increase its chance of merging. */ if (!vma->vm_file && !vma->anon_vma) - pgoff = addr >> PAGE_SHIFT; + pgoff = addr >> HARD_PAGE_SHIFT; find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, @@ -2094,7 +2095,7 @@ int may_expand_vm(struct mm_struct *mm, unsigned long cur = mm->total_vm; /* pages */ unsigned long lim; - lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> HARD_PAGE_SHIFT; if (cur + npages > lim) return 0; @@ -2111,7 +2112,7 @@ static struct page *special_mapping_nopa address -= vma->vm_start; for (pages = vma->vm_private_data; address > 0 && *pages; ++pages) - address -= PAGE_SIZE; + address -= HARD_PAGE_SIZE; if (*pages) { struct page *page = *pages; @@ -2138,7 +2139,7 @@ static struct vm_operations_struct speci * Called with mm->mmap_sem held for writing. * Insert a new vma covering the given region, with the given flags. * Its pages are supplied by the given array of struct page *. - * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. + * The array can be shorter than len >> HARD_PAGE_SHIFT if it's null-terminated. * The region past the last page supplied will always produce SIGBUS. * The array pointer and the pages it points to are assumed to stay alive * for as long as this mapping might exist. @@ -2168,7 +2169,7 @@ int install_special_mapping(struct mm_st return -ENOMEM; } - mm->total_vm += len >> PAGE_SHIFT; + mm->total_vm += len >> HARD_PAGE_SHIFT; return 0; } diff --git a/mm/swapfile.c b/mm/swapfile.c --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -512,7 +512,7 @@ static void unuse_pte(struct vm_area_str inc_mm_counter(vma->vm_mm, anon_rss); get_page(page); set_pte_at(vma->vm_mm, addr, pte, - pte_mkold(mk_pte(page, vma->vm_page_prot))); + pte_mkold(mk_pte(page, addr, vma->vm_page_prot))); page_add_anon_rmap(page, vma, addr); swap_free(entry); /* diff --git a/mm/vmalloc.c b/mm/vmalloc.c --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -35,7 +35,7 @@ static void vunmap_pte_range(pmd_t *pmd, do { pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); WARN_ON(!pte_none(ptent) && !pte_present(ptent)); - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte++, addr += HARD_PAGE_SIZE, addr != end); } static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr, @@ -100,9 +100,11 @@ static int vmap_pte_range(pmd_t *pmd, un WARN_ON(!pte_none(*pte)); if (!page) return -ENOMEM; - set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); - (*pages)++; - } while (pte++, addr += PAGE_SIZE, addr != end); + set_pte_at(&init_mm, addr, pte, mk_pte(page, addr, prot)); + addr += HARD_PAGE_SIZE; + if (!hardpfn_offset_to_index(addr)) + (*pages)++; + } while (pte++, addr != end); return 0; }