diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/mm/shmem.c 2.4.23aa2/mm/shmem.c --- x-ref/mm/shmem.c 2004-01-28 19:57:49.000000000 +0100 +++ 2.4.23aa2/mm/shmem.c 2004-01-28 22:14:14.000000000 +0100 @@ -602,7 +602,17 @@ getswap: spin_unlock(&info->lock); SetPageUptodate(page); set_page_dirty(page); - UnlockPage(page); + + /* + * The page is now ready for the swapout I/O, we'd better + * start it here rather than forcing another whole scan of the lru + * before this page can finally be freed. This will reduce + * cpu computations latencies spent scanning the lru, especially + * if there are several gigabytes of shm in a row queued in the lru. + */ + ClearPageDirty(page); + page->mapping->a_ops->writepage(page); + return 0; fail: return fail_writepage(page); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/mm/vmscan.c 2.4.23aa2/mm/vmscan.c --- x-ref/mm/vmscan.c 2004-01-28 19:57:56.000000000 +0100 +++ 2.4.23aa2/mm/vmscan.c 2004-01-28 19:59:16.000000000 +0100 @@ -86,6 +86,139 @@ int vm_vfs_scan_ratio = 6; int vm_anon_lru = 0; /* + * find_pte - Find a pte pointer given a vma and a struct page. + * @vma: the vma to search + * @page: the page to find + * + * Determine if this page is mapped in this vma. If it is, map and rethrn + * the pte pointer associated with it. Return null if the page is not + * mapped in this vma for any reason. + * + * This is strictly an internal helper function for the object-based rmap + * functions. + * + * It is the caller's responsibility to unmap the pte if it is returned. + * + * This find_pte function is partly taken from the 2.6 objrmap from IBM. + */ +static inline pte_t * find_pte(struct mm_struct * mm, struct vm_area_struct * vma, + struct page * page, unsigned long * addr) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *ptep, pte; + unsigned long loffset; + unsigned long address; + + loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)); + address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT); + if (address < vma->vm_start || address >= vma->vm_end) + return NULL; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return NULL; + + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) + return NULL; + + ptep = pte_offset_atomic(pmd, address); + pte = *ptep; + + if (!pte_present(pte)) + goto out_unmap; + + if (page != pte_page(pte)) + goto out_unmap; + + *addr = address; + + return ptep; + + out_unmap: + pte_kunmap(ptep); + return NULL; +} + +static int FASTCALL(try_to_unmap_shared_vma(struct vm_area_struct *, struct page *)); +static int try_to_unmap_shared_vma(struct vm_area_struct * vma, struct page * page) +{ + struct mm_struct *mm; + unsigned long address; + pte_t * ptep, pte; + int break_loop = 0; + + mm = vma->vm_mm; + if (!spin_trylock(&mm->page_table_lock)) + return 0; + + if (vma->vm_flags & (VM_RESERVED|VM_BIGPAGE|VM_LOCKED)) + goto out_unlock; + + ptep = find_pte(mm, vma, page, &address); + if (!ptep) + goto out_unlock; + pte = *ptep; + + break_loop = 1; + if (pte_young(pte)) + goto out_unmap; + break_loop = 0; + + flush_cache_page(vma, address); + pte = ptep_get_and_clear(ptep); + flush_tlb_page(vma, address); + + if (pte_dirty(pte)) + set_page_dirty(page); + + mm->rss--; + page_cache_release(page); + + out_unmap: + pte_kunmap(ptep); + + out_unlock: + spin_unlock(&mm->page_table_lock); + + return break_loop; +} + +static void FASTCALL(__try_to_unmap_shared_mappings(struct vm_area_struct *, + struct page *, struct vm_area_struct *)); +static void __try_to_unmap_shared_mappings(struct vm_area_struct * shared_vma, + struct page * page, struct vm_area_struct * curr_vma) +{ + do { + if (shared_vma->vm_mm == curr_vma->vm_mm) + continue; + if (try_to_unmap_shared_vma(shared_vma, page)) + break; + } while ((shared_vma = shared_vma->vm_next_share)); +} + +static inline void try_to_unmap_shared_mappings(struct page * page, struct vm_area_struct * curr_vma) +{ + struct address_space *mapping = page->mapping; + + if (!mapping) + return; + if (page_count(page) - !!page->buffers <= 2) + return; + if (!mapping->i_mmap_shared) + return; + + if (unlikely(!spin_trylock(&mapping->i_shared_lock))) + return; + BUG_ON(!mapping->i_mmap_shared); + + __try_to_unmap_shared_mappings(mapping->i_mmap_shared, page, curr_vma); + + spin_unlock(&mapping->i_shared_lock); +} + +/* * The swap-out function returns 1 if it successfully * scanned all the pages it was asked to (`count'). * It returns zero if it couldn't do anything, @@ -137,6 +270,9 @@ set_swap_pte: set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: mm->rss--; + + try_to_unmap_shared_mappings(page, vma); + UnlockPage(page); { int freeable = page_count(page) - !!page->buffers <= 2; @@ -409,8 +545,8 @@ static int memclass_related_bhs(struct p } static void FASTCALL(refill_inactive(int nr_pages, zone_t * classzone)); -static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout)); -static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout) +static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout, int can_writepage)); +static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout, int can_writepage) { struct list_head * entry; int max_scan = (classzone->nr_inactive_pages + classzone->nr_active_pages) / vm_cache_scan_ratio; @@ -468,6 +604,9 @@ static int shrink_cache(int nr_pages, zo max_scan--; + if (!can_writepage && PageDirty(page)) + continue; + /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; @@ -581,7 +720,7 @@ static int shrink_cache(int nr_pages, zo spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: - if (--max_mapped < 0) { + if (can_writepage && --max_mapped < 0) { spin_unlock(&pagemap_lru_lock); nr_pages -= kmem_cache_reap(gfp_mask); @@ -702,8 +841,8 @@ static void refill_inactive(int nr_pages } } -static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout)); -static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout) +static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout, int can_writepage)); +static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout, int can_writepage) { nr_pages -= kmem_cache_reap(gfp_mask); if (nr_pages <= 0) @@ -711,8 +850,7 @@ static int shrink_caches(zone_t * classz spin_lock(&pagemap_lru_lock); refill_inactive(nr_pages, classzone); - - nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, failed_swapout); + nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, failed_swapout, can_writepage); out: return nr_pages; @@ -727,10 +865,16 @@ int try_to_free_pages_zone(zone_t *class for (;;) { int tries = vm_passes; int failed_swapout = !(gfp_mask & __GFP_IO); - int nr_pages = SWAP_CLUSTER_MAX; + int nr_pages; do { - nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &failed_swapout); + nr_pages = SWAP_CLUSTER_MAX; + nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &failed_swapout, 0); + if (nr_pages <= 0) + return 1; + + nr_pages = SWAP_CLUSTER_MAX; + nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &failed_swapout, 1); if (nr_pages <= 0) return 1;