From: "Andi Kleen" Collected ioremap fixes - Call change_page_attr correctly - Fix a race during iounmap - Handle mmio holes without mem_map correctly (needed for SRAT patch) - Some cleanups Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton --- 25-akpm/arch/x86_64/mm/ioremap.c | 74 ++++++++++++++++++++------------ 25-akpm/arch/x86_64/mm/pageattr.c | 40 ++++++++++------- 25-akpm/include/asm-x86_64/cacheflush.h | 1 3 files changed, 72 insertions(+), 43 deletions(-) diff -puN arch/x86_64/mm/ioremap.c~x86_64-collected-ioremap-fixes arch/x86_64/mm/ioremap.c --- 25/arch/x86_64/mm/ioremap.c~x86_64-collected-ioremap-fixes 2004-12-03 20:24:05.098869408 -0800 +++ 25-akpm/arch/x86_64/mm/ioremap.c 2004-12-03 20:24:05.105868344 -0800 @@ -16,7 +16,7 @@ #include #include #include - +#include static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, unsigned long phys_addr, unsigned long flags) @@ -122,7 +122,31 @@ static int remap_area_pages(unsigned lon } /* - * Generic mapping function (not visible outside): + * Fix up the linear direct mapping of the kernel to avoid cache attribute + * conflicts. + */ +static int +ioremap_change_attr(unsigned long phys_addr, unsigned long size, + unsigned long flags) +{ + int err = 0; + if (flags && phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) { + unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long vaddr = (unsigned long) __va(phys_addr); + + /* + * Must use a address here and not struct page because the phys addr + * can be a in hole between nodes and not have an memmap entry. + */ + err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags)); + if (!err) + global_flush_tlb(); + } + return err; +} + +/* + * Generic mapping function */ /* @@ -184,6 +208,11 @@ void __iomem * __ioremap(unsigned long p area->phys_addr = phys_addr; addr = area->addr; if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { + remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); + return NULL; + } + if (ioremap_change_attr(phys_addr, size, flags) < 0) { + area->flags &= 0xffffff; vunmap(addr); return NULL; } @@ -214,43 +243,34 @@ void __iomem * __ioremap(unsigned long p void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) { - void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); - if (!p) - return p; - - if (phys_addr + size <= virt_to_phys(high_memory)) { - struct page *ppage = virt_to_page(__va(phys_addr)); - unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - - BUG_ON(phys_addr+size >= (unsigned long)high_memory); - BUG_ON(phys_addr + size <= phys_addr); - - if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { - iounmap(p); - p = NULL; - } - global_flush_tlb(); - } - - return p; + return __ioremap(phys_addr, size, _PAGE_PCD); } void iounmap(volatile void __iomem *addr) { - struct vm_struct *p; + struct vm_struct *p, **pprev; + if (addr <= high_memory) return; - p = remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); + + write_lock(&vmlist_lock); + for (p = vmlist, pprev = &vmlist; p != NULL; pprev = &p->next, p = *pprev) + if (p->addr == (void *)(PAGE_MASK & (unsigned long)addr)) + break; if (!p) { printk("__iounmap: bad address %p\n", addr); - return; - } - - if ((p->flags >> 24) && p->phys_addr + p->size <= virt_to_phys(high_memory)) { + goto out_unlock; + } + *pprev = p->next; + unmap_vm_area(p); + if ((p->flags >> 24) && + p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) { change_page_attr(virt_to_page(__va(p->phys_addr)), p->size >> PAGE_SHIFT, PAGE_KERNEL); global_flush_tlb(); } +out_unlock: + write_unlock(&vmlist_lock); kfree(p); } diff -puN arch/x86_64/mm/pageattr.c~x86_64-collected-ioremap-fixes arch/x86_64/mm/pageattr.c --- 25/arch/x86_64/mm/pageattr.c~x86_64-collected-ioremap-fixes 2004-12-03 20:24:05.099869256 -0800 +++ 25-akpm/arch/x86_64/mm/pageattr.c 2004-12-03 20:24:05.106868192 -0800 @@ -122,26 +122,27 @@ static void revert_page(unsigned long ad } static int -__change_page_attr(unsigned long address, struct page *page, pgprot_t prot, - pgprot_t ref_prot) +__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, + pgprot_t ref_prot) { pte_t *kpte; struct page *kpte_page; unsigned kpte_flags; - kpte = lookup_address(address); if (!kpte) return 0; kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); kpte_flags = pte_val(*kpte); if (pgprot_val(prot) != pgprot_val(ref_prot)) { if ((kpte_flags & _PAGE_PSE) == 0) { - set_pte(kpte, mk_pte(page, prot)); + set_pte(kpte, pfn_pte(pfn, prot)); } else { /* - * split_large_page will take the reference for this change_page_attr - * on the split page. + * split_large_page will take the reference for this + * change_page_attr on the split page. */ - struct page *split = split_large_page(address, prot, ref_prot); + struct page *split; + + split = split_large_page(address, prot, ref_prot); if (!split) return -ENOMEM; set_pte(kpte,mk_pte(split, ref_prot)); @@ -149,7 +150,7 @@ __change_page_attr(unsigned long address } get_page(kpte_page); } else if ((kpte_flags & _PAGE_PSE) == 0) { - set_pte(kpte, mk_pte(page, ref_prot)); + set_pte(kpte, pfn_pte(pfn, ref_prot)); __put_page(kpte_page); } else BUG(); @@ -181,31 +182,38 @@ __change_page_attr(unsigned long address * * Caller must call global_flush_tlb() after this. */ -int change_page_attr(struct page *page, int numpages, pgprot_t prot) +int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) { int err = 0; int i; down_write(&init_mm.mmap_sem); - for (i = 0; i < numpages; !err && i++, page++) { - unsigned long address = (unsigned long)page_address(page); - err = __change_page_attr(address, page, prot, PAGE_KERNEL); + for (i = 0; i < numpages; i++, address += PAGE_SIZE) { + unsigned long pfn = __pa(address) >> PAGE_SHIFT; + + err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); if (err) break; /* Handle kernel mapping too which aliases part of the * lowmem */ /* Disabled right now. Fixme */ - if (0 && page_to_phys(page) < KERNEL_TEXT_SIZE) { + if (0 && __pa(address) < KERNEL_TEXT_SIZE) { unsigned long addr2; - addr2 = __START_KERNEL_map + page_to_phys(page); - err = __change_page_attr(addr2, page, prot, - PAGE_KERNEL_EXEC); + addr2 = __START_KERNEL_map + __pa(address); + err = __change_page_attr(addr2, pfn, prot, PAGE_KERNEL_EXEC); } } up_write(&init_mm.mmap_sem); return err; } +/* Don't call this for MMIO areas that may not have a mem_map entry */ +int change_page_attr(struct page *page, int numpages, pgprot_t prot) +{ + unsigned long addr = (unsigned long)page_address(page); + return change_page_attr_addr(addr, numpages, prot); +} + void global_flush_tlb(void) { struct deferred_page *df, *next_df; diff -puN include/asm-x86_64/cacheflush.h~x86_64-collected-ioremap-fixes include/asm-x86_64/cacheflush.h --- 25/include/asm-x86_64/cacheflush.h~x86_64-collected-ioremap-fixes 2004-12-03 20:24:05.101868952 -0800 +++ 25-akpm/include/asm-x86_64/cacheflush.h 2004-12-03 20:24:05.106868192 -0800 @@ -25,5 +25,6 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); +int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); #endif /* _X8664_CACHEFLUSH_H */ _