diff -urpN -X /home/fletch/.diff.exclude objrmap/include/linux/fs.h objrmap-sort/include/linux/fs.h --- objrmap/include/linux/fs.h Sat Jun 14 18:37:37 2003 +++ objrmap-sort/include/linux/fs.h Sun Jun 15 20:55:23 2003 @@ -331,6 +331,25 @@ struct address_space { struct address_space *assoc_mapping; /* ditto */ }; +/* + * s = address_space, r = address_range, v = vma + * + * s - r - r - r - r - r + * | | | | | + * v v v v v + * | | | + * v v v + * | + * v + */ +struct address_range { + unsigned long start; /* Offset into file in PAGE_SIZE units */ + unsigned long end; /* Offset of end in PAGE_SIZE units */ + /* (last page, not beginning of next region) */ + struct list_head ranges; + struct list_head vmas; +}; + struct block_device { struct list_head bd_hash; atomic_t bd_count; diff -urpN -X /home/fletch/.diff.exclude objrmap/mm/memory.c objrmap-sort/mm/memory.c --- objrmap/mm/memory.c Sun Jun 15 20:54:09 2003 +++ objrmap-sort/mm/memory.c Sun Jun 15 20:55:23 2003 @@ -1063,36 +1063,76 @@ out: return ret; } -static void vmtruncate_list(struct list_head *head, unsigned long pgoff) +/* + * Helper function for invalidate_mmap_range(). + * Both hba and hlen are page numbers in PAGE_SIZE units. + * An hlen of zero blows away the entire portion file after hba. + */ +static void +invalidate_mmap_range_list(struct list_head *head, + unsigned long const hba, + unsigned long const hlen) { - unsigned long start, end, len, diff; - struct vm_area_struct *vma; - struct list_head *curr; - - list_for_each(curr, head) { - vma = list_entry(curr, struct vm_area_struct, shared); - start = vma->vm_start; - end = vma->vm_end; - len = end - start; - - /* mapping wholly truncated? */ - if (vma->vm_pgoff >= pgoff) { - zap_page_range(vma, start, len); + unsigned long hea; /* last page of hole. */ + struct address_range *range; + struct vm_area_struct *vp; + unsigned long zba; + unsigned long zea; + + hea = hba + hlen - 1; /* avoid overflow. */ + if (hea < hba) + hea = ULONG_MAX; + list_for_each_entry(range, head, ranges) { + if ((hea < range->start) || (hba > range->end)) continue; + zba = (hba <= range->start) ? range->start : hba; + zea = (hea > range->end) ? range->end : hea; + list_for_each_entry(vp, &range->vmas, shared) { + zap_page_range(vp, + ((zba - range->start) << PAGE_SHIFT) + + vp->vm_start, + (zea - zba + 1) << PAGE_SHIFT); } + } +} - /* mapping wholly unaffected? */ - len = len >> PAGE_SHIFT; - diff = pgoff - vma->vm_pgoff; - if (diff >= len) - continue; +/** + * invalidate_mmap_range - invalidate the portion of all mmaps + * in the specified address_space corresponding to the specified + * page range in the underlying file. + * @address_space: the address space containing mmaps to be invalidated. + * @holebegin: byte in first page to invalidate, relative to the start of + * the underlying file. This will be rounded down to a PAGE_SIZE + * boundary. Note that this is different from vmtruncate(), which + * must keep the partial page. In contrast, we must get rid of + * partial pages. + * @holelen: size of prospective hole in bytes. This will be rounded + * up to a PAGE_SIZE boundary. A holelen of zero truncates to the + * end of the file. + */ +void +invalidate_mmap_range(struct address_space *mapping, + loff_t const holebegin, + loff_t const holelen) +{ + unsigned long hba = holebegin >> PAGE_SHIFT; + unsigned long hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; - /* Ok, partially affected.. */ - start += diff << PAGE_SHIFT; - len = (len - diff) << PAGE_SHIFT; - zap_page_range(vma, start, len); + /* Check for overflow. */ + if (sizeof(holelen) > sizeof(hlen)) { + long long holeend = + (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; + + if (holeend & ~(long long)ULONG_MAX) + hlen = ULONG_MAX - hba + 1; } -} + down(&mapping->i_shared_sem); + if (unlikely(!list_empty(&mapping->i_mmap))) + invalidate_mmap_range_list(&mapping->i_mmap, hba, hlen); + if (unlikely(!list_empty(&mapping->i_mmap_shared))) + invalidate_mmap_range_list(&mapping->i_mmap_shared, hba, hlen); + up(&mapping->i_shared_sem); +} /* * Handle all mappings that got truncated by a "truncate()" @@ -1112,12 +1152,7 @@ int vmtruncate(struct inode * inode, lof goto do_expand; inode->i_size = offset; pgoff = (offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - down(&mapping->i_shared_sem); - if (unlikely(!list_empty(&mapping->i_mmap))) - vmtruncate_list(&mapping->i_mmap, pgoff); - if (unlikely(!list_empty(&mapping->i_mmap_shared))) - vmtruncate_list(&mapping->i_mmap_shared, pgoff); - up(&mapping->i_shared_sem); + invalidate_mmap_range(mapping, offset + PAGE_SIZE - 1, 0); truncate_inode_pages(mapping, offset); goto out_truncate; diff -urpN -X /home/fletch/.diff.exclude objrmap/mm/mmap.c objrmap-sort/mm/mmap.c --- objrmap/mm/mmap.c Sun Jun 15 20:54:09 2003 +++ objrmap-sort/mm/mmap.c Sun Jun 15 20:55:23 2003 @@ -306,6 +306,56 @@ static void __vma_link_rb(struct mm_stru rb_insert_color(&vma->vm_rb, &mm->mm_rb); } +static void vma_add (struct vm_area_struct *vma, + struct list_head *range_list) +{ + struct address_range *range; + struct list_head *prev, *next; + unsigned long start = vma->vm_pgoff; + unsigned long end = vma->vm_pgoff + + (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1); + + /* First, look for an existing range that matches ours */ + prev = range_list; + list_for_each(next, range_list) { + range = list_entry(next, struct address_range, ranges); + if (range->start > start) + break; /* this list is sorted by start */ + if ((range->start == start) && (range->end == end)) { + goto found; + } + prev = next; + } + + /* + * No existing range was found that matched. + * But we left range pointing at the last address range + * that was <= start ... so we can just shove ourselves in there. + */ + range = kmalloc(sizeof(struct address_range), GFP_KERNEL); + range->start = start; + range->end = end; + INIT_LIST_HEAD(&range->ranges); + INIT_LIST_HEAD(&range->vmas); + list_add(&range->ranges, prev); +found: + list_add_tail(&vma->shared, &range->vmas); +} + +static void vma_del (struct vm_area_struct *vma) +{ + struct address_range *range; + struct list_head *next; + + next = vma->shared.next; /* stash the range list we're on */ + list_del(&vma->shared); /* remove us from the list of vmas */ + if (list_empty(next)) { /* we were the last vma for range */ + range = list_entry(next, struct address_range, vmas); + list_del(&range->ranges); + kfree(range); + } +} + static inline void __vma_link_file(struct vm_area_struct *vma) { struct file * file; @@ -319,9 +369,9 @@ static inline void __vma_link_file(struc atomic_dec(&inode->i_writecount); if (vma->vm_flags & VM_SHARED) - list_add_tail(&vma->shared, &mapping->i_mmap_shared); + vma_add(vma, &mapping->i_mmap_shared); else - list_add_tail(&vma->shared, &mapping->i_mmap); + vma_add(vma, &mapping->i_mmap); } } @@ -346,9 +396,7 @@ static void vma_link(struct mm_struct *m if (mapping) down(&mapping->i_shared_sem); - spin_lock(&mm->page_table_lock); __vma_link(mm, vma, prev, rb_link, rb_parent); - spin_unlock(&mm->page_table_lock); if (mapping) up(&mapping->i_shared_sem); @@ -379,14 +427,12 @@ static inline int is_mergeable_vma(struc static void move_vma_start(struct vm_area_struct *vma, unsigned long addr) { - spinlock_t *lock = &vma->vm_mm->page_table_lock; struct inode *inode = NULL; if (vma->vm_file) { inode = vma->vm_file->f_dentry->d_inode; down(&inode->i_mapping->i_shared_sem); } - spin_lock(lock); if (inode) __remove_shared_vm_struct(vma, inode); /* If no vm_file, perhaps we should always keep vm_pgoff at 0?? */ @@ -396,7 +442,6 @@ static void move_vma_start(struct vm_are __vma_link_file(vma); up(&inode->i_mapping->i_shared_sem); } - spin_unlock(lock); } /* @@ -470,7 +515,6 @@ static int vma_merge(struct mm_struct *m is_mergeable_vma(prev, file, vm_flags) && can_vma_merge_after(prev, vm_flags, file, pgoff)) { struct vm_area_struct *next; - spinlock_t *lock = &mm->page_table_lock; struct inode *inode = file ? file->f_dentry->d_inode : NULL; int need_up = 0; @@ -479,7 +523,6 @@ static int vma_merge(struct mm_struct *m down(&inode->i_mapping->i_shared_sem); need_up = 1; } - spin_lock(lock); prev->vm_end = end; /* @@ -492,7 +535,6 @@ static int vma_merge(struct mm_struct *m prev->vm_end = next->vm_end; __vma_unlink(mm, next, prev); __remove_shared_vm_struct(next, inode); - spin_unlock(lock); if (need_up) up(&inode->i_mapping->i_shared_sem); if (file) @@ -502,7 +544,6 @@ static int vma_merge(struct mm_struct *m kmem_cache_free(vm_area_cachep, next); return 1; } - spin_unlock(lock); if (need_up) up(&inode->i_mapping->i_shared_sem); return 1; @@ -956,19 +997,16 @@ int expand_stack(struct vm_area_struct * */ address += 4 + PAGE_SIZE - 1; address &= PAGE_MASK; - spin_lock(&vma->vm_mm->page_table_lock); grow = (address - vma->vm_end) >> PAGE_SHIFT; /* Overcommit.. */ if (!vm_enough_memory(grow)) { - spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur || ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { - spin_unlock(&vma->vm_mm->page_table_lock); vm_unacct_memory(grow); return -ENOMEM; } @@ -976,7 +1014,6 @@ int expand_stack(struct vm_area_struct * vma->vm_mm->total_vm += grow; if (vma->vm_flags & VM_LOCKED) vma->vm_mm->locked_vm += grow; - spin_unlock(&vma->vm_mm->page_table_lock); return 0; } @@ -1010,19 +1047,16 @@ int expand_stack(struct vm_area_struct * * the spinlock only before relocating the vma range ourself. */ address &= PAGE_MASK; - spin_lock(&vma->vm_mm->page_table_lock); grow = (vma->vm_start - address) >> PAGE_SHIFT; /* Overcommit.. */ if (!vm_enough_memory(grow)) { - spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { - spin_unlock(&vma->vm_mm->page_table_lock); vm_unacct_memory(grow); return -ENOMEM; } @@ -1031,7 +1065,6 @@ int expand_stack(struct vm_area_struct * vma->vm_mm->total_vm += grow; if (vma->vm_flags & VM_LOCKED) vma->vm_mm->locked_vm += grow; - spin_unlock(&vma->vm_mm->page_table_lock); return 0; } @@ -1194,8 +1227,6 @@ static void unmap_region(struct mm_struc /* * Create a list of vma's touched by the unmap, removing them from the mm's * vma list as we go.. - * - * Called with the page_table_lock held. */ static void detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, @@ -1224,6 +1255,7 @@ int split_vma(struct mm_struct * mm, str unsigned long addr, int new_below) { struct vm_area_struct *new; + struct address_space *mapping = NULL; if (mm->map_count >= MAX_MAP_COUNT) return -ENOMEM; @@ -1237,6 +1269,9 @@ int split_vma(struct mm_struct * mm, str INIT_LIST_HEAD(&new->shared); + if (vma->vm_file) + mapping = vma->vm_file->f_dentry->d_inode->i_mapping; + if (new_below) { new->vm_end = addr; move_vma_start(vma, addr); @@ -1246,6 +1281,16 @@ int split_vma(struct mm_struct * mm, str new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } + if (mapping) { + down(&mapping->i_shared_sem); + vma_del(vma); + if (vma->vm_flags & VM_SHARED) + vma_add(vma, &mapping->i_mmap_shared); + else + vma_add(vma, &mapping->i_mmap); + up(&mapping->i_shared_sem); + } + if (new->vm_file) get_file(new->vm_file); @@ -1318,8 +1363,8 @@ int do_munmap(struct mm_struct *mm, unsi /* * Remove the vma's, and unmap the actual pages */ - spin_lock(&mm->page_table_lock); detach_vmas_to_be_unmapped(mm, mpnt, prev, end); + spin_lock(&mm->page_table_lock); unmap_region(mm, mpnt, prev, start, end); spin_unlock(&mm->page_table_lock); diff -urpN -X /home/fletch/.diff.exclude objrmap/mm/rmap.c objrmap-sort/mm/rmap.c --- objrmap/mm/rmap.c Sun Jun 15 20:54:09 2003 +++ objrmap-sort/mm/rmap.c Sun Jun 15 20:55:23 2003 @@ -37,6 +37,12 @@ /* #define DEBUG_RMAP */ +#define foreach_vma_starting_below(vma, listp, shared, page) \ + list_for_each_entry_while(vma, listp, shared, \ + page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT) ) + + + /* * Shared pages have a chain of pte_chain structures, used to locate * all the mappings to this page. We only need a pointer to the pte @@ -205,8 +211,10 @@ static int page_referenced_obj(struct page *page) { struct address_space *mapping = page->mapping; + struct address_range *range; struct vm_area_struct *vma; int referenced = 0; + unsigned long index = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); if (!page->pte.mapcount) return 0; @@ -220,11 +228,23 @@ page_referenced_obj(struct page *page) if (down_trylock(&mapping->i_shared_sem)) return 1; - list_for_each_entry(vma, &mapping->i_mmap, shared) - referenced += page_referenced_obj_one(vma, page); - - list_for_each_entry(vma, &mapping->i_mmap_shared, shared) - referenced += page_referenced_obj_one(vma, page); + list_for_each_entry(range, &mapping->i_mmap, ranges) { + if (range->start > index) + break; /* Sorted by start address => we are done */ + if (range->end < index) + continue; + list_for_each_entry(vma, &range->vmas, shared) + referenced += page_referenced_obj_one(vma, page); + } + + list_for_each_entry(range, &mapping->i_mmap_shared, ranges) { + if (range->start > index) + break; /* Sorted by start address => we are done */ + if (range->end < index) + continue; + list_for_each_entry(vma, &range->vmas, shared) + referenced += page_referenced_obj_one(vma, page); + } up(&mapping->i_shared_sem); @@ -512,7 +532,9 @@ static int try_to_unmap_obj(struct page *page) { struct address_space *mapping = page->mapping; + struct address_range *range; struct vm_area_struct *vma; + unsigned long index = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); int ret = SWAP_AGAIN; if (!mapping) @@ -524,16 +546,28 @@ try_to_unmap_obj(struct page *page) if (down_trylock(&mapping->i_shared_sem)) return ret; - list_for_each_entry(vma, &mapping->i_mmap, shared) { - ret = try_to_unmap_obj_one(vma, page); - if (ret == SWAP_FAIL || !page->pte.mapcount) - goto out; + list_for_each_entry(range, &mapping->i_mmap, ranges) { + if (range->start > index) + break; /* Sorted by start address => we are done */ + if (range->end < index) + continue; + list_for_each_entry(vma, &range->vmas, shared) { + ret = try_to_unmap_obj_one(vma, page); + if (ret == SWAP_FAIL || !page->pte.mapcount) + goto out; + } } - list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { - ret = try_to_unmap_obj_one(vma, page); - if (ret == SWAP_FAIL || !page->pte.mapcount) - goto out; + list_for_each_entry(range, &mapping->i_mmap_shared, ranges) { + if (range->start > index) + break; /* Sorted by start address => we are done */ + if (range->end < index) + continue; + list_for_each_entry(vma, &range->vmas, shared) { + ret = try_to_unmap_obj_one(vma, page); + if (ret == SWAP_FAIL || !page->pte.mapcount) + goto out; + } } out: @@ -752,7 +786,9 @@ out: int page_convert_anon(struct page *page) { struct address_space *mapping; + struct address_range *range; struct vm_area_struct *vma; + unsigned long index = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct pte_chain *pte_chain = NULL; pte_t *pte; int err = 0; @@ -788,41 +824,54 @@ int page_convert_anon(struct page *page) */ pte_chain_unlock(page); - list_for_each_entry(vma, &mapping->i_mmap, shared) { - if (!pte_chain) { - pte_chain = pte_chain_alloc(GFP_KERNEL); + list_for_each_entry(range, &mapping->i_mmap, ranges) { + if (range->start > index) + break; /* Sorted by start address => we are done */ + if (range->end < index) + continue; + list_for_each_entry(vma, &range->vmas, shared) { if (!pte_chain) { - err = -ENOMEM; - goto out_unlock; + pte_chain = pte_chain_alloc(GFP_KERNEL); + if (!pte_chain) { + err = -ENOMEM; + goto out_unlock; + } } + spin_lock(&vma->vm_mm->page_table_lock); + pte = find_pte(vma, page, NULL); + if (pte) { + /* Make sure this isn't a duplicate */ + page_remove_rmap(page, pte); + pte_chain = page_add_rmap(page, pte, pte_chain); + pte_unmap(pte); + } + spin_unlock(&vma->vm_mm->page_table_lock); } - spin_lock(&vma->vm_mm->page_table_lock); - pte = find_pte(vma, page, NULL); - if (pte) { - /* Make sure this isn't a duplicate */ - page_remove_rmap(page, pte); - pte_chain = page_add_rmap(page, pte, pte_chain); - pte_unmap(pte); - } - spin_unlock(&vma->vm_mm->page_table_lock); - } - list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { - if (!pte_chain) { - pte_chain = pte_chain_alloc(GFP_KERNEL); + } + + list_for_each_entry(range, &mapping->i_mmap_shared, ranges) { + if (range->start > index) + break; /* Sorted by start address => we are done */ + if (range->end < index) + continue; + list_for_each_entry(vma, &range->vmas, shared) { if (!pte_chain) { - err = -ENOMEM; - goto out_unlock; + pte_chain = pte_chain_alloc(GFP_KERNEL); + if (!pte_chain) { + err = -ENOMEM; + goto out_unlock; + } } + spin_lock(&vma->vm_mm->page_table_lock); + pte = find_pte(vma, page, NULL); + if (pte) { + /* Make sure this isn't a duplicate */ + page_remove_rmap(page, pte); + pte_chain = page_add_rmap(page, pte, pte_chain); + pte_unmap(pte); + } + spin_unlock(&vma->vm_mm->page_table_lock); } - spin_lock(&vma->vm_mm->page_table_lock); - pte = find_pte(vma, page, NULL); - if (pte) { - /* Make sure this isn't a duplicate */ - page_remove_rmap(page, pte); - pte_chain = page_add_rmap(page, pte, pte_chain); - pte_unmap(pte); - } - spin_unlock(&vma->vm_mm->page_table_lock); } out_unlock: diff -urpN -X /home/fletch/.diff.exclude objrmap/mm/swapfile.c objrmap-sort/mm/swapfile.c --- objrmap/mm/swapfile.c Sun Jun 15 20:54:09 2003 +++ objrmap-sort/mm/swapfile.c Sun Jun 15 20:55:23 2003 @@ -494,6 +494,7 @@ static int unuse_process(struct mm_struc /* * Go through process' page directory. */ + down_read(&mm->mmap_sem); spin_lock(&mm->page_table_lock); for (vma = mm->mmap; vma; vma = vma->vm_next) { pgd_t * pgd = pgd_offset(mm, vma->vm_start); @@ -501,6 +502,7 @@ static int unuse_process(struct mm_struc break; } spin_unlock(&mm->page_table_lock); + up_read(&mm->mmap_sem); pte_chain_free(pte_chain); return 0; }