From: "Mikael Starvik" Memory management patches. * SMP support. * Non-executable stack (on v32). * 4-level page tables. * Added simple Thread Local Storage support. Signed-off-by: Mikael Starvik Signed-off-by: Andrew Morton --- arch/cris/mm/fault.c | 95 ++++++++++++++++++++++++++++++++++------ arch/cris/mm/ioremap.c | 58 +++++++++++++++++------- arch/cris/mm/tlb.c | 25 ++-------- include/asm-cris/arch-v10/mmu.h | 5 +- 4 files changed, 135 insertions(+), 48 deletions(-) diff -puN arch/cris/mm/fault.c~cris-update-9-17-mm arch/cris/mm/fault.c --- 25/arch/cris/mm/fault.c~cris-update-9-17-mm 2005-06-25 14:20:05.000000000 -0700 +++ 25-akpm/arch/cris/mm/fault.c 2005-06-25 14:20:05.000000000 -0700 @@ -6,6 +6,38 @@ * Authors: Bjorn Wesen * * $Log: fault.c,v $ + * Revision 1.20 2005/03/04 08:16:18 starvik + * Merge of Linux 2.6.11. + * + * Revision 1.19 2005/01/14 10:07:59 starvik + * Fixed warning. + * + * Revision 1.18 2005/01/12 08:10:14 starvik + * Readded the change of frametype when handling kernel page fault fixup + * for v10. This is necessary to avoid that the CPU remakes the faulting + * access. + * + * Revision 1.17 2005/01/11 13:53:05 starvik + * Use raw_printk. + * + * Revision 1.16 2004/12/17 11:39:41 starvik + * SMP support. + * + * Revision 1.15 2004/11/23 18:36:18 starvik + * Stack is now non-executable. + * Signal handler trampolines are placed in a reserved page mapped into all + * processes. + * + * Revision 1.14 2004/11/23 07:10:21 starvik + * Moved find_fixup_code to generic code. + * + * Revision 1.13 2004/11/23 07:00:54 starvik + * Actually use the execute permission bit in the MMU. This makes it possible + * to prevent e.g. attacks where executable code is put on the stack. + * + * Revision 1.12 2004/09/29 06:16:04 starvik + * Use instruction_pointer + * * Revision 1.11 2004/05/14 07:58:05 starvik * Merge of changes from 2.4 * @@ -103,6 +135,7 @@ extern int find_fixup_code(struct pt_regs *); extern void die_if_kernel(const char *, struct pt_regs *, long); +extern int raw_printk(const char *fmt, ...); /* debug of low-level TLB reload */ #undef DEBUG @@ -118,7 +151,8 @@ extern void die_if_kernel(const char *, /* current active page directory */ -volatile pgd_t *current_pgd; +volatile DEFINE_PER_CPU(pgd_t *,current_pgd); +unsigned long cris_signal_return_page; /* * This routine handles page faults. It determines the address, @@ -146,8 +180,9 @@ do_page_fault(unsigned long address, str struct vm_area_struct * vma; siginfo_t info; - D(printk("Page fault for %X at %X, prot %d write %d\n", - address, regs->erp, protection, writeaccess)); + D(printk("Page fault for %lX on %X at %lX, prot %d write %d\n", + address, smp_processor_id(), instruction_pointer(regs), + protection, writeaccess)); tsk = current; @@ -175,8 +210,19 @@ do_page_fault(unsigned long address, str !user_mode(regs)) goto vmalloc_fault; + /* When stack execution is not allowed we store the signal + * trampolines in the reserved cris_signal_return_page. + * Handle this in the exact same way as vmalloc (we know + * that the mapping is there and is valid so no need to + * call handle_mm_fault). + */ + if (cris_signal_return_page && + address == cris_signal_return_page && + !protection && user_mode(regs)) + goto vmalloc_fault; + /* we can and should enable interrupts at this point */ - sti(); + local_irq_enable(); mm = tsk->mm; info.si_code = SEGV_MAPERR; @@ -220,7 +266,10 @@ do_page_fault(unsigned long address, str /* first do some preliminary protection checks */ - if (writeaccess) { + if (writeaccess == 2){ + if (!(vma->vm_flags & VM_EXEC)) + goto bad_area; + } else if (writeaccess == 1) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { @@ -234,7 +283,7 @@ do_page_fault(unsigned long address, str * the fault. */ - switch (handle_mm_fault(mm, vma, address, writeaccess)) { + switch (handle_mm_fault(mm, vma, address, writeaccess & 1)) { case 1: tsk->min_flt++; break; @@ -292,10 +341,10 @@ do_page_fault(unsigned long address, str */ if ((unsigned long) (address) < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + raw_printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); else - printk(KERN_ALERT "Unable to handle kernel access"); - printk(" at virtual address %08lx\n",address); + raw_printk(KERN_ALERT "Unable to handle kernel access"); + raw_printk(" at virtual address %08lx\n",address); die_if_kernel("Oops", regs, (writeaccess << 1) | protection); @@ -346,10 +395,11 @@ vmalloc_fault: int offset = pgd_index(address); pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; - pgd = (pgd_t *)current_pgd + offset; + pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset; pgd_k = init_mm.pgd + offset; /* Since we're two-level, we don't need to do both @@ -364,8 +414,13 @@ vmalloc_fault: * it exists. */ - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + goto no_context; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto bad_area_nosemaphore; @@ -385,3 +440,19 @@ vmalloc_fault: return; } } + +/* Find fixup code. */ +int +find_fixup_code(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + if ((fixup = search_exception_tables(instruction_pointer(regs))) != 0) { + /* Adjust the instruction pointer in the stackframe. */ + instruction_pointer(regs) = fixup->fixup; + arch_fixup(regs); + return 1; + } + + return 0; +} diff -puN arch/cris/mm/ioremap.c~cris-update-9-17-mm arch/cris/mm/ioremap.c --- 25/arch/cris/mm/ioremap.c~cris-update-9-17-mm 2005-06-25 14:20:05.000000000 -0700 +++ 25-akpm/arch/cris/mm/ioremap.c 2005-06-25 14:20:05.000000000 -0700 @@ -14,9 +14,10 @@ #include #include #include +#include extern inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) + unsigned long phys_addr, pgprot_t prot) { unsigned long end; @@ -31,9 +32,7 @@ extern inline void remap_area_pte(pte_t printk("remap_area_pte: page already exists\n"); BUG(); } - set_pte(pte, mk_pte_phys(phys_addr, __pgprot(_PAGE_PRESENT | __READABLE | - __WRITEABLE | _PAGE_GLOBAL | - _PAGE_KERNEL | flags))); + set_pte(pte, mk_pte_phys(phys_addr, prot)); address += PAGE_SIZE; phys_addr += PAGE_SIZE; pte++; @@ -41,7 +40,7 @@ extern inline void remap_area_pte(pte_t } static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) + unsigned long phys_addr, pgprot_t prot) { unsigned long end; @@ -56,7 +55,7 @@ static inline int remap_area_pmd(pmd_t * pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); if (!pte) return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); + remap_area_pte(pte, address, end - address, address + phys_addr, prot); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -64,7 +63,7 @@ static inline int remap_area_pmd(pmd_t * } static int remap_area_pages(unsigned long address, unsigned long phys_addr, - unsigned long size, unsigned long flags) + unsigned long size, pgprot_t prot) { int error; pgd_t * dir; @@ -77,13 +76,19 @@ static int remap_area_pages(unsigned lon BUG(); spin_lock(&init_mm.page_table_lock); do { + pud_t *pud; pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + error = -ENOMEM; + pud = pud_alloc(&init_mm, dir, address); + if (!pud) + break; + pmd = pmd_alloc(&init_mm, pud, address); + if (!pmd) break; if (remap_area_pmd(pmd, address, end - address, - phys_addr + address, flags)) + phys_addr + address, prot)) break; error = 0; address = (address + PGDIR_SIZE) & PGDIR_MASK; @@ -107,9 +112,9 @@ static int remap_area_pages(unsigned lon * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) +void __iomem * __ioremap_prot(unsigned long phys_addr, unsigned long size, pgprot_t prot) { - void * addr; + void __iomem * addr; struct vm_struct * area; unsigned long offset, last_addr; @@ -131,15 +136,36 @@ void * __ioremap(unsigned long phys_addr area = get_vm_area(size, VM_IOREMAP); if (!area) return NULL; - addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { - vfree(addr); + addr = (void __iomem *)area->addr; + if (remap_area_pages((unsigned long) addr, phys_addr, size, prot)) { + vfree((void __force *)addr); return NULL; } - return (void *) (offset + (char *)addr); + return (void __iomem *) (offset + (char __iomem *)addr); +} + +void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) +{ + return __ioremap_prot(phys_addr, size, + __pgprot(_PAGE_PRESENT | __READABLE | + __WRITEABLE | _PAGE_GLOBAL | + _PAGE_KERNEL | flags)); +} + +/** + * ioremap_nocache - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * Must be freed with iounmap. + */ + +void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) +{ + return __ioremap(phys_addr | MEM_NON_CACHEABLE, size, 0); } -void iounmap(void *addr) +void iounmap(volatile void __iomem *addr) { if (addr > high_memory) return vfree((void *) (PAGE_MASK & (unsigned long) addr)); diff -puN arch/cris/mm/tlb.c~cris-update-9-17-mm arch/cris/mm/tlb.c --- 25/arch/cris/mm/tlb.c~cris-update-9-17-mm 2005-06-25 14:20:05.000000000 -0700 +++ 25-akpm/arch/cris/mm/tlb.c 2005-06-25 14:20:05.000000000 -0700 @@ -29,18 +29,6 @@ struct mm_struct *page_id_map[NUM_PAGEID]; static int map_replace_ptr = 1; /* which page_id_map entry to replace next */ -/* - * Initialize the context related info for a new mm_struct - * instance. - */ - -int -init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - mm->context = NO_CONTEXT; - return 0; -} - /* the following functions are similar to those used in the PPC port */ static inline void @@ -60,12 +48,12 @@ alloc_context(struct mm_struct *mm) */ flush_tlb_mm(old_mm); - old_mm->context = NO_CONTEXT; + old_mm->context.page_id = NO_CONTEXT; } /* insert it into the page_id_map */ - mm->context = map_replace_ptr; + mm->context.page_id = map_replace_ptr; page_id_map[map_replace_ptr] = mm; map_replace_ptr++; @@ -81,7 +69,7 @@ alloc_context(struct mm_struct *mm) void get_mmu_context(struct mm_struct *mm) { - if(mm->context == NO_CONTEXT) + if(mm->context.page_id == NO_CONTEXT) alloc_context(mm); } @@ -96,11 +84,10 @@ get_mmu_context(struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { - if(mm->context != NO_CONTEXT) { - D(printk("destroy_context %d (%p)\n", mm->context, mm)); + if(mm->context.page_id != NO_CONTEXT) { + D(printk("destroy_context %d (%p)\n", mm->context.page_id, mm)); flush_tlb_mm(mm); /* TODO this might be redundant ? */ - page_id_map[mm->context] = NULL; - /* mm->context = NO_CONTEXT; redundant.. mm will be freed */ + page_id_map[mm->context.page_id] = NULL; } } diff -puN include/asm-cris/arch-v10/mmu.h~cris-update-9-17-mm include/asm-cris/arch-v10/mmu.h --- 25/include/asm-cris/arch-v10/mmu.h~cris-update-9-17-mm 2005-06-25 14:20:05.000000000 -0700 +++ 25-akpm/include/asm-cris/arch-v10/mmu.h 2005-06-25 14:20:05.000000000 -0700 @@ -7,7 +7,10 @@ /* type used in struct mm to couple an MMU context to an active mm */ -typedef unsigned int mm_context_t; +typedef struct +{ + unsigned int page_id; +} mm_context_t; /* kernel memory segments */ _