diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/arm/mm/mm-armv.c sles-anobjrmap-2-works/arch/arm/mm/mm-armv.c --- sles-anobjrmap-1/arch/arm/mm/mm-armv.c 2004-01-15 18:36:03.000000000 +0100 +++ sles-anobjrmap-2-works/arch/arm/mm/mm-armv.c 2004-03-11 20:10:22.666988776 +0100 @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/i386/mm/hugetlbpage.c sles-anobjrmap-2-works/arch/i386/mm/hugetlbpage.c --- sles-anobjrmap-1/arch/i386/mm/hugetlbpage.c 2004-01-15 18:36:04.000000000 +0100 +++ sles-anobjrmap-2-works/arch/i386/mm/hugetlbpage.c 2004-03-11 13:50:10.783922128 +0100 @@ -278,7 +278,7 @@ follow_huge_pmd(struct mm_struct *mm, un static void free_huge_page(struct page *page) { BUG_ON(page_count(page)); - BUG_ON(page->mapping); + BUG_ON(page_mapping(page)); INIT_LIST_HEAD(&page->list); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/ia64/ia32/binfmt_elf32.c sles-anobjrmap-2-works/arch/ia64/ia32/binfmt_elf32.c --- sles-anobjrmap-1/arch/ia64/ia32/binfmt_elf32.c 2004-01-15 18:36:04.000000000 +0100 +++ sles-anobjrmap-2-works/arch/ia64/ia32/binfmt_elf32.c 2004-03-11 17:34:16.019935568 +0100 @@ -91,9 +91,10 @@ ia64_elf32_init (struct pt_regs *regs) vma->vm_page_prot = PAGE_SHARED; vma->vm_flags = VM_READ|VM_MAYREAD; vma->vm_ops = &ia32_shared_page_vm_ops; - vma->vm_pgoff = 0; + vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; vma->vm_file = NULL; vma->vm_private_data = NULL; + vma->anon_vma = NULL; down_write(¤t->mm->mmap_sem); { insert_vm_struct(current->mm, vma); @@ -113,8 +114,9 @@ ia64_elf32_init (struct pt_regs *regs) vma->vm_page_prot = PAGE_SHARED; vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE; vma->vm_ops = NULL; - vma->vm_pgoff = 0; + vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; vma->vm_file = NULL; + vma->anon_vma = NULL; vma->vm_private_data = NULL; down_write(¤t->mm->mmap_sem); { @@ -193,8 +195,9 @@ ia32_setup_arg_pages (struct linux_binpr mpnt->vm_page_prot = PAGE_COPY; mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_ops = NULL; - mpnt->vm_pgoff = 0; + mpnt->vm_pgoff = mpnt->vm_start >> PAGE_SHIFT; mpnt->vm_file = NULL; + mpnt->anon_vma = NULL; mpnt->vm_private_data = 0; insert_vm_struct(current->mm, mpnt); current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; @@ -204,7 +207,7 @@ ia32_setup_arg_pages (struct linux_binpr struct page *page = bprm->page[i]; if (page) { bprm->page[i] = NULL; - put_dirty_page(current, page, stack_base, PAGE_COPY); + put_dirty_page(current, page, stack_base, PAGE_COPY, mpnt); } stack_base += PAGE_SIZE; } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/ia64/ilp32/binfmt_ilp32.c sles-anobjrmap-2-works/arch/ia64/ilp32/binfmt_ilp32.c --- sles-anobjrmap-1/arch/ia64/ilp32/binfmt_ilp32.c 2004-02-29 17:47:26.000000000 +0100 +++ sles-anobjrmap-2-works/arch/ia64/ilp32/binfmt_ilp32.c 2004-03-11 17:35:22.315857056 +0100 @@ -13,7 +13,7 @@ #define init_elf_binfmt init_ilp32_elf_binfmt #define exit_elf_binfmt exit_ilp32_elf_binfmt -extern void put_dirty_page(struct task_struct *tsk, struct page *page, unsigned long address, pgprot_t prot); +extern void put_dirty_page(struct task_struct *tsk, struct page *page, unsigned long address, pgprot_t prot, struct vm_area_struct * vma); struct linux_binprm; struct elf32_hdr; static int ilp32_elf_setup_arg_pages (struct linux_binprm *); @@ -59,8 +59,9 @@ static int ilp32_elf_setup_arg_pages (st mpnt->vm_page_prot = PAGE_COPY; mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_ops = NULL; - mpnt->vm_pgoff = 0; + mpnt->vm_pgoff = mpnt->vm_start >> PAGE_SHIFT; mpnt->vm_file = NULL; + mpnt->anon_vma = NULL; mpnt->vm_private_data = 0; insert_vm_struct(current->mm, mpnt); current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; @@ -70,7 +71,7 @@ static int ilp32_elf_setup_arg_pages (st struct page *page = bprm->page[i]; if (page) { bprm->page[i] = NULL; - put_dirty_page(current, page, stack_base, PAGE_COPY); + put_dirty_page(current, page, stack_base, PAGE_COPY, mpnt); } stack_base += PAGE_SIZE; } @@ -108,8 +109,9 @@ void ilp32_init_addr_space() vma->vm_page_prot = PAGE_COPY; vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP; vma->vm_ops = NULL; - vma->vm_pgoff = 0; + vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; vma->vm_file = NULL; + vma->anon_vma = NULL; vma->vm_private_data = NULL; insert_vm_struct(current->mm, vma); } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/ia64/kernel/perfmon.c sles-anobjrmap-2-works/arch/ia64/kernel/perfmon.c --- sles-anobjrmap-1/arch/ia64/kernel/perfmon.c 2004-02-04 16:06:39.000000000 +0100 +++ sles-anobjrmap-2-works/arch/ia64/kernel/perfmon.c 2004-03-11 15:09:48.000000000 +0100 @@ -2286,9 +2286,11 @@ pfm_smpl_buffer_alloc(struct task_struct vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED|VM_DONTCOPY; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ vma->vm_ops = &pfm_vm_ops; - vma->vm_pgoff = 0; + vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; vma->vm_file = NULL; vma->vm_private_data = ctx; /* information needed by the pfm_vm_close() function */ + /* insert_vm_struct takes care of anon_vma_node */ + vma->anon_vma = NULL; /* * Now we have everything we need and we can initialize diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/ia64/mm/init.c sles-anobjrmap-2-works/arch/ia64/mm/init.c --- sles-anobjrmap-1/arch/ia64/mm/init.c 2004-02-04 16:06:39.000000000 +0100 +++ sles-anobjrmap-2-works/arch/ia64/mm/init.c 2004-03-11 15:09:57.000000000 +0100 @@ -128,8 +128,10 @@ ia64_init_addr_space (void) vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7]; vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP; vma->vm_ops = NULL; - vma->vm_pgoff = 0; + vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; vma->vm_file = NULL; + /* insert_vm_struct takes care of anon_vma_node */ + vma->anon_vma = NULL; vma->vm_private_data = NULL; insert_vm_struct(current->mm, vma); } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/ppc64/mm/hugetlbpage.c sles-anobjrmap-2-works/arch/ppc64/mm/hugetlbpage.c --- sles-anobjrmap-1/arch/ppc64/mm/hugetlbpage.c 2004-02-04 16:06:42.000000000 +0100 +++ sles-anobjrmap-2-works/arch/ppc64/mm/hugetlbpage.c 2004-03-11 20:10:17.071839368 +0100 @@ -25,7 +25,6 @@ #include #include #include -#include #include diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/s390/kernel/compat_exec.c sles-anobjrmap-2-works/arch/s390/kernel/compat_exec.c --- sles-anobjrmap-1/arch/s390/kernel/compat_exec.c 2003-07-17 01:53:55.000000000 +0200 +++ sles-anobjrmap-2-works/arch/s390/kernel/compat_exec.c 2004-03-11 17:35:26.085284016 +0100 @@ -69,9 +69,11 @@ int setup_arg_pages32(struct linux_binpr mpnt->vm_page_prot = PAGE_COPY; mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_ops = NULL; - mpnt->vm_pgoff = 0; + mpnt->vm_pgoff = mpnt->vm_start >> PAGE_SHIFT; mpnt->vm_file = NULL; INIT_LIST_HEAD(&mpnt->shared); + /* insert_vm_struct takes care of anon_vma_node */ + mpnt->anon_vma = NULL; mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; @@ -81,7 +83,7 @@ int setup_arg_pages32(struct linux_binpr struct page *page = bprm->page[i]; if (page) { bprm->page[i] = NULL; - put_dirty_page(current,page,stack_base,PAGE_COPY); + put_dirty_page(current,page,stack_base,PAGE_COPY, mpnt); } stack_base += PAGE_SIZE; } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/arch/x86_64/ia32/ia32_binfmt.c sles-anobjrmap-2-works/arch/x86_64/ia32/ia32_binfmt.c --- sles-anobjrmap-1/arch/x86_64/ia32/ia32_binfmt.c 2004-02-29 17:47:27.000000000 +0100 +++ sles-anobjrmap-2-works/arch/x86_64/ia32/ia32_binfmt.c 2004-03-11 17:35:30.182661120 +0100 @@ -369,9 +369,11 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? PAGE_COPY_EXEC : PAGE_COPY; mpnt->vm_ops = NULL; - mpnt->vm_pgoff = 0; + mpnt->vm_pgoff = mpnt->vm_start >> PAGE_SHIFT; mpnt->vm_file = NULL; INIT_LIST_HEAD(&mpnt->shared); + /* insert_vm_struct takes care of anon_vma_node */ + mpnt->anon_vma = NULL; mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; @@ -381,7 +383,7 @@ int setup_arg_pages(struct linux_binprm struct page *page = bprm->page[i]; if (page) { bprm->page[i] = NULL; - put_dirty_page(current,page,stack_base,PAGE_COPY_EXEC); + put_dirty_page(current,page,stack_base,PAGE_COPY_EXEC, mpnt); } stack_base += PAGE_SIZE; } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/buffer.c sles-anobjrmap-2-works/fs/buffer.c --- sles-anobjrmap-1/fs/buffer.c 2004-02-29 17:47:41.000000000 +0100 +++ sles-anobjrmap-2-works/fs/buffer.c 2004-03-11 17:32:22.612176168 +0100 @@ -584,6 +584,8 @@ void end_buffer_async_write(struct buffe BUG_ON(!buffer_async_write(bh)); page = bh->b_page; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); if (uptodate) { set_buffer_uptodate(bh); } else { @@ -593,7 +595,7 @@ void end_buffer_async_write(struct buffe "I/O error on %s\n", bdevname(bh->b_bdev, b)); } - set_bit(AS_EIO, &page->mapping->flags); + set_bit(AS_EIO, &page->as.mapping->flags); clear_buffer_uptodate(bh); SetPageError(page); } @@ -804,7 +806,10 @@ void write_boundary_block(struct block_d void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) { struct address_space *mapping = inode->i_mapping; - struct address_space *buffer_mapping = bh->b_page->mapping; + struct address_space *buffer_mapping = bh->b_page->as.mapping; + + BUG_ON(PageAnon(bh->b_page)); + BUG_ON(PageSwapCache(bh->b_page)); mark_buffer_dirty(bh); if (!mapping->assoc_mapping) { @@ -855,7 +860,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); */ int __set_page_dirty_buffers(struct page *page) { - struct address_space * const mapping = page->mapping; + struct address_space * const mapping = page_mapping(page); int ret = 0; if (mapping == NULL) { @@ -880,7 +885,7 @@ int __set_page_dirty_buffers(struct page if (!TestSetPageDirty(page)) { spin_lock(&mapping->page_lock); - if (page->mapping) { /* Race with truncate? */ + if (page_mapping(page)) { /* Race with truncate? */ if (!mapping->backing_dev_info->memory_backed) inc_page_state(nr_dirty); list_del(&page->list); @@ -1299,9 +1304,12 @@ void __brelse(struct buffer_head * buf) */ void __bforget(struct buffer_head *bh) { + BUG_ON(PageAnon(bh->b_page)); + BUG_ON(PageSwapCache(bh->b_page)); + clear_buffer_dirty(bh); if (!list_empty(&bh->b_assoc_buffers)) { - struct address_space *buffer_mapping = bh->b_page->mapping; + struct address_space *buffer_mapping = bh->b_page->as.mapping; spin_lock(&buffer_mapping->private_lock); list_del_init(&bh->b_assoc_buffers); @@ -1588,10 +1596,11 @@ static inline void discard_buffer(struct */ int try_to_release_page(struct page *page, int gfp_mask) { - struct address_space * const mapping = page->mapping; + struct address_space * const mapping = page->as.mapping; - if (!PageLocked(page)) - BUG(); + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + BUG_ON(!PageLocked(page)); if (PageWriteback(page)) return 0; @@ -1663,6 +1672,9 @@ void create_empty_buffers(struct page *p { struct buffer_head *bh, *head, *tail; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + head = create_buffers(page, blocksize, 1); bh = head; do { @@ -1672,7 +1684,7 @@ void create_empty_buffers(struct page *p } while (bh); tail->b_this_page = head; - spin_lock(&page->mapping->private_lock); + spin_lock(&page->as.mapping->private_lock); if (PageUptodate(page) || PageDirty(page)) { bh = head; do { @@ -1684,7 +1696,7 @@ void create_empty_buffers(struct page *p } while (bh != head); } __set_page_buffers(page, head); - spin_unlock(&page->mapping->private_lock); + spin_unlock(&page->as.mapping->private_lock); } EXPORT_SYMBOL(create_empty_buffers); @@ -2077,7 +2089,7 @@ static int __block_commit_write(struct i */ int block_read_full_page(struct page *page, get_block_t *get_block) { - struct inode *inode = page->mapping->host; + struct inode *inode; sector_t iblock, lblock; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; unsigned int blocksize; @@ -2088,6 +2100,9 @@ int block_read_full_page(struct page *pa PAGE_BUG(page); if (PageUptodate(page)) buffer_error(); + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + inode = page->as.mapping->host; blocksize = 1 << inode->i_blkbits; if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); @@ -2217,15 +2232,21 @@ out: int cont_prepare_write(struct page *page, unsigned offset, unsigned to, get_block_t *get_block, loff_t *bytes) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; + struct address_space *mapping; + struct inode *inode; struct page *new_page; unsigned long pgpos; long status; unsigned zerofrom; - unsigned blocksize = 1 << inode->i_blkbits; + unsigned blocksize; void *kaddr; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + mapping = page->as.mapping; + inode = mapping->host; + blocksize = 1 << inode->i_blkbits; + while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { status = -ENOMEM; new_page = grab_cache_page(mapping, pgpos); @@ -2299,8 +2320,13 @@ out: int block_prepare_write(struct page *page, unsigned from, unsigned to, get_block_t *get_block) { - struct inode *inode = page->mapping->host; - int err = __block_prepare_write(inode, page, from, to, get_block); + struct inode *inode; + int err; + + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + inode = page->as.mapping->host; + err = __block_prepare_write(inode, page, from, to, get_block); if (err) ClearPageUptodate(page); return err; @@ -2308,7 +2334,11 @@ int block_prepare_write(struct page *pag int block_commit_write(struct page *page, unsigned from, unsigned to) { - struct inode *inode = page->mapping->host; + struct inode *inode; + + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + inode = page->as.mapping->host; __block_commit_write(inode,page,from,to); return 0; } @@ -2316,8 +2346,13 @@ int block_commit_write(struct page *page int generic_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + struct inode *inode; + loff_t pos; + + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + inode = page->as.mapping->host; + pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; __block_commit_write(inode,page,from,to); /* * No need to use i_size_read() here, the i_size @@ -2337,7 +2372,7 @@ int generic_commit_write(struct file *fi int nobh_prepare_write(struct page *page, unsigned from, unsigned to, get_block_t *get_block) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocksize = 1 << blkbits; struct buffer_head map_bh; @@ -2352,6 +2387,9 @@ int nobh_prepare_write(struct page *page int is_mapped_to_disk = 1; int dirtied_it = 0; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (PageMappedToDisk(page)) return 0; @@ -2471,8 +2509,13 @@ EXPORT_SYMBOL(nobh_prepare_write); int nobh_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + struct inode *inode; + loff_t pos; + + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + inode = page->as.mapping->host; + pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; set_page_dirty(page); if (pos > inode->i_size) { @@ -2605,12 +2648,15 @@ out: int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc) { - struct inode * const inode = page->mapping->host; + struct inode * const inode = page->as.mapping->host; loff_t i_size = i_size_read(inode); const unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; void *kaddr; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + /* Is the page fully inside i_size? */ if (page->index < end_index) return __block_write_full_page(inode, page, get_block, wbc); @@ -2783,10 +2829,13 @@ void sync_dirty_buffer(struct buffer_hea */ static void check_ttfb_buffer(struct page *page, struct buffer_head *bh) { + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (!buffer_uptodate(bh) && !buffer_req(bh)) { - if (PageUptodate(page) && page->mapping + if (PageUptodate(page) && page->as.mapping && buffer_mapped(bh) /* discard_buffer */ - && S_ISBLK(page->mapping->host->i_mode)) + && S_ISBLK(page->as.mapping->host->i_mode)) { buffer_error(); } @@ -2826,11 +2875,14 @@ drop_buffers(struct page *page, struct b struct buffer_head *bh; int was_uptodate = 1; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + bh = head; do { check_ttfb_buffer(page, bh); if (buffer_write_io_error(bh)) - set_bit(AS_EIO, &page->mapping->flags); + set_bit(AS_EIO, &page->as.mapping->flags); if (buffer_busy(bh)) goto failed; if (!buffer_uptodate(bh) && !buffer_req(bh)) @@ -2857,10 +2909,12 @@ failed: int try_to_free_buffers(struct page *page) { - struct address_space * const mapping = page->mapping; + struct address_space * const mapping = page->as.mapping; struct buffer_head *buffers_to_free = NULL; int ret = 0; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); BUG_ON(!PageLocked(page)); if (PageWriteback(page)) return 0; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/exec.c sles-anobjrmap-2-works/fs/exec.c --- sles-anobjrmap-1/fs/exec.c 2004-03-05 05:12:30.000000000 +0100 +++ sles-anobjrmap-2-works/fs/exec.c 2004-03-11 17:38:11.000213136 +0100 @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include @@ -295,21 +295,18 @@ EXPORT_SYMBOL(copy_strings_kernel); * tsk->mmap_sem is held for writing. */ void put_dirty_page(struct task_struct *tsk, struct page *page, - unsigned long address, pgprot_t prot) + unsigned long address, pgprot_t prot, + struct vm_area_struct *vma) { pgd_t * pgd; pmd_t * pmd; pte_t * pte; - struct pte_chain *pte_chain; if (page_count(page) != 1) printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address); pgd = pgd_offset(tsk->mm, address); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto out_sig; spin_lock(&tsk->mm->page_table_lock); pmd = pmd_alloc(tsk->mm, pgd, address); if (!pmd) @@ -325,20 +322,17 @@ void put_dirty_page(struct task_struct * flush_dcache_page(page); SetPageAnon(page); set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot)))); - pte_chain = page_add_rmap(page, pte, pte_chain); + page_add_rmap(page, vma, address); pte_unmap(pte); tsk->mm->rss++; spin_unlock(&tsk->mm->page_table_lock); /* no need for flush_tlb */ - pte_chain_free(pte_chain); return; out: spin_unlock(&tsk->mm->page_table_lock); -out_sig: __free_page(page); force_sig(SIGKILL, tsk); - pte_chain_free(pte_chain); return; } @@ -428,9 +422,11 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7]; mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_ops = NULL; - mpnt->vm_pgoff = 0; + mpnt->vm_pgoff = mpnt->vm_start >> PAGE_SHIFT; mpnt->vm_file = NULL; INIT_LIST_HEAD(&mpnt->shared); + /* insert_vm_struct takes care of anon_vma_node */ + mpnt->anon_vma = NULL; mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; @@ -441,7 +437,7 @@ int setup_arg_pages(struct linux_binprm if (page) { bprm->page[i] = NULL; put_dirty_page(current, page, stack_base, - mpnt->vm_page_prot); + mpnt->vm_page_prot, mpnt); } stack_base += PAGE_SIZE; } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/ext2/dir.c sles-anobjrmap-2-works/fs/ext2/dir.c --- sles-anobjrmap-1/fs/ext2/dir.c 2003-05-14 01:56:42.000000000 +0200 +++ sles-anobjrmap-2-works/fs/ext2/dir.c 2004-03-11 17:41:59.553467760 +0100 @@ -64,10 +64,13 @@ ext2_last_byte(struct inode *inode, unsi static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) { - struct inode *dir = page->mapping->host; + struct inode *dir = page->as.mapping->host; int err = 0; + + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); dir->i_version++; - page->mapping->a_ops->commit_write(NULL, page, from, to); + page->as.mapping->a_ops->commit_write(NULL, page, from, to); if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -77,7 +80,7 @@ static int ext2_commit_chunk(struct page static void ext2_check_page(struct page *page) { - struct inode *dir = page->mapping->host; + struct inode *dir = page->as.mapping->host; struct super_block *sb = dir->i_sb; unsigned chunk_size = ext2_chunk_size(dir); char *kaddr = page_address(page); @@ -87,6 +90,8 @@ static void ext2_check_page(struct page ext2_dirent *p; char *error; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { limit = dir->i_size & ~PAGE_CACHE_MASK; if (limit & (chunk_size - 1)) @@ -411,8 +416,10 @@ void ext2_set_link(struct inode *dir, st unsigned to = from + le16_to_cpu(de->rec_len); int err; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = page->as.mapping->a_ops->prepare_write(NULL, page, from, to); if (err) BUG(); de->inode = cpu_to_le32(inode->i_ino); @@ -493,9 +500,11 @@ int ext2_add_link (struct dentry *dentry return -EINVAL; got_it: + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); from = (char*)de - (char*)page_address(page); to = from + rec_len; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = page->as.mapping->a_ops->prepare_write(NULL, page, from, to); if (err) goto out_unlock; if (de->inode) { @@ -528,7 +537,7 @@ out_unlock: */ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page->as.mapping; struct inode *inode = mapping->host; char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); @@ -537,6 +546,8 @@ int ext2_delete_entry (struct ext2_dir_e ext2_dirent * de = (ext2_dirent *) (kaddr + from); int err; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); while ((char*)de < (char*)dir) { if (de->rec_len == 0) { ext2_error(inode->i_sb, __FUNCTION__, diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/ext3/inode.c sles-anobjrmap-2-works/fs/ext3/inode.c --- sles-anobjrmap-1/fs/ext3/inode.c 2004-02-04 16:06:59.000000000 +0100 +++ sles-anobjrmap-2-works/fs/ext3/inode.c 2004-03-11 17:43:11.981457032 +0100 @@ -1078,10 +1078,12 @@ static int do_journal_get_write_access(h static int ext3_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; int ret, needed_blocks = ext3_writepage_trans_blocks(inode); handle_t *handle; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); handle = ext3_journal_start(inode, needed_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -1133,9 +1135,11 @@ static int ext3_ordered_commit_write(str unsigned from, unsigned to) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; int ret = 0, ret2; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, ext3_journal_dirty_data); @@ -1162,10 +1166,12 @@ static int ext3_writeback_commit_write(s unsigned from, unsigned to) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; int ret = 0, ret2; loff_t new_i_size; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; @@ -1180,11 +1186,13 @@ static int ext3_journalled_commit_write( struct page *page, unsigned from, unsigned to) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; int ret = 0, ret2; int partial = 0; loff_t pos; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); /* * Here we duplicate the generic_commit_write() functionality */ @@ -1335,7 +1343,7 @@ static int journal_dirty_data_fn(handle_ static int ext3_ordered_writepage(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; struct buffer_head *page_bufs; handle_t *handle = NULL; int ret = 0; @@ -1343,6 +1351,8 @@ static int ext3_ordered_writepage(struct J_ASSERT(PageLocked(page)); + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); /* * We give up here if we're reentered, because it might be for a * different filesystem. @@ -1403,11 +1413,13 @@ out_fail: static int ext3_writeback_writepage(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; handle_t *handle = NULL; int ret = 0; int err; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); if (ext3_journal_current_handle()) goto out_fail; @@ -1432,11 +1444,13 @@ out_fail: static int ext3_journalled_writepage(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; handle_t *handle = NULL; int ret = 0; int err; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); if (ext3_journal_current_handle()) goto no_write; @@ -1500,8 +1514,10 @@ ext3_readpages(struct file *file, struct static int ext3_invalidatepage(struct page *page, unsigned long offset) { - journal_t *journal = EXT3_JOURNAL(page->mapping->host); + journal_t *journal = EXT3_JOURNAL(page->as.mapping->host); + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); /* * If it's a full truncate we just forget about the pending dirtying */ @@ -1513,8 +1529,10 @@ static int ext3_invalidatepage(struct pa static int ext3_releasepage(struct page *page, int wait) { - journal_t *journal = EXT3_JOURNAL(page->mapping->host); + journal_t *journal = EXT3_JOURNAL(page->as.mapping->host); + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); WARN_ON(PageChecked(page)); return journal_try_to_free_buffers(journal, page, wait); } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/isofs/rock.c sles-anobjrmap-2-works/fs/isofs/rock.c --- sles-anobjrmap-1/fs/isofs/rock.c 2003-09-17 04:04:29.000000000 +0200 +++ sles-anobjrmap-2-works/fs/isofs/rock.c 2004-03-11 17:43:52.471301640 +0100 @@ -430,7 +430,7 @@ int parse_rock_ridge_inode(struct iso_di static int rock_ridge_symlink_readpage(struct file *file, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; char *link = kmap(page); unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); unsigned char bufbits = ISOFS_BUFFER_BITS(inode); @@ -445,6 +445,8 @@ static int rock_ridge_symlink_readpage(s unsigned char *chr; struct rock_ridge *rr; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); if (!ISOFS_SB(inode->i_sb)->s_rock) panic ("Cannot have symlink with high sierra variant of iso filesystem\n"); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/jbd/commit.c sles-anobjrmap-2-works/fs/jbd/commit.c --- sles-anobjrmap-1/fs/jbd/commit.c 2004-01-15 18:36:22.000000000 +0100 +++ sles-anobjrmap-2-works/fs/jbd/commit.c 2004-03-11 17:44:17.937430200 +0100 @@ -60,7 +60,9 @@ static void release_buffer_page(struct b page = bh->b_page; if (!page) goto nope; - if (page->mapping) + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (page->as.mapping) goto nope; /* OK, it's a truncated page */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/jbd/journal.c sles-anobjrmap-2-works/fs/jbd/journal.c --- sles-anobjrmap-1/fs/jbd/journal.c 2003-12-01 05:31:07.000000000 +0100 +++ sles-anobjrmap-2-works/fs/jbd/journal.c 2004-03-11 17:45:01.563797984 +0100 @@ -1676,7 +1676,7 @@ repeat: } else { J_ASSERT_BH(bh, (atomic_read(&bh->b_count) > 0) || - (bh->b_page && bh->b_page->mapping)); + (bh->b_page && bh->b_page->as.mapping)); if (!new_jh) { jbd_unlock_bh_journal_head(bh); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/libfs.c sles-anobjrmap-2-works/fs/libfs.c --- sles-anobjrmap-1/fs/libfs.c 2004-02-20 17:26:48.000000000 +0100 +++ sles-anobjrmap-2-works/fs/libfs.c 2004-03-11 17:38:52.003979616 +0100 @@ -336,9 +336,12 @@ int simple_prepare_write(struct file *fi int simple_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + /* * No need to use i_size_read() here, the i_size * cannot change under us because we hold the i_sem. diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/minix/dir.c sles-anobjrmap-2-works/fs/minix/dir.c --- sles-anobjrmap-1/fs/minix/dir.c 2003-05-14 01:56:42.000000000 +0200 +++ sles-anobjrmap-2-works/fs/minix/dir.c 2004-03-11 17:46:00.339862664 +0100 @@ -47,9 +47,11 @@ static inline unsigned long dir_pages(st static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) { - struct inode *dir = (struct inode *)page->mapping->host; + struct inode *dir = (struct inode *)page->as.mapping->host; int err = 0; - page->mapping->a_ops->commit_write(NULL, page, from, to); + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + page->as.mapping->a_ops->commit_write(NULL, page, from, to); if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -238,9 +240,11 @@ int minix_add_link(struct dentry *dentry return -EINVAL; got_it: + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); from = (char*)de - (char*)page_address(page); to = from + sbi->s_dirsize; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = page->as.mapping->a_ops->prepare_write(NULL, page, from, to); if (err) goto out_unlock; memcpy (de->name, name, namelen); @@ -260,13 +264,15 @@ out_unlock: int minix_delete_entry(struct minix_dir_entry *de, struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page->as.mapping; struct inode *inode = (struct inode*)mapping->host; char *kaddr = page_address(page); unsigned from = (char*)de - kaddr; unsigned to = from + minix_sb(inode->i_sb)->s_dirsize; int err; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); lock_page(page); err = mapping->a_ops->prepare_write(NULL, page, from, to); if (err == 0) { @@ -364,14 +370,16 @@ not_empty: void minix_set_link(struct minix_dir_entry *de, struct page *page, struct inode *inode) { - struct inode *dir = (struct inode*)page->mapping->host; + struct inode *dir = (struct inode*)page->as.mapping->host; struct minix_sb_info *sbi = minix_sb(dir->i_sb); unsigned from = (char *)de-(char*)page_address(page); unsigned to = from + sbi->s_dirsize; int err; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = page->as.mapping->a_ops->prepare_write(NULL, page, from, to); if (err == 0) { de->inode = inode->i_ino; err = dir_commit_chunk(page, from, to); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/mpage.c sles-anobjrmap-2-works/fs/mpage.c --- sles-anobjrmap-1/fs/mpage.c 2003-08-31 02:37:22.000000000 +0200 +++ sles-anobjrmap-2-works/fs/mpage.c 2004-03-11 17:40:26.359635376 +0100 @@ -129,10 +129,13 @@ mpage_alloc(struct block_device *bdev, static void map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; struct buffer_head *page_bh, *head; int block = 0; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (!page_has_buffers(page)) { /* * don't make any buffers if there is only one buffer on @@ -209,7 +212,7 @@ static struct bio * do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, sector_t *last_block_in_bio, get_block_t get_block) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; @@ -223,6 +226,9 @@ do_mpage_readpage(struct bio *bio, struc int length; int fully_mapped = 1; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (page_has_buffers(page)) goto confused; @@ -388,8 +394,8 @@ static struct bio * mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc) { - struct address_space *mapping = page->mapping; - struct inode *inode = page->mapping->host; + struct address_space *mapping = page->as.mapping; + struct inode *inode = page->as.mapping->host; const unsigned blkbits = inode->i_blkbits; unsigned long end_index; const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits; @@ -405,6 +411,9 @@ mpage_writepage(struct bio *bio, struct int length; struct buffer_head map_bh; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); struct buffer_head *bh = head; @@ -562,7 +571,7 @@ alloc_new: confused: if (bio) bio = mpage_bio_submit(WRITE, bio); - *ret = page->mapping->a_ops->writepage(page, wbc); + *ret = page->as.mapping->a_ops->writepage(page, wbc); /* * The caller has a ref on the inode, so *mapping is stable */ @@ -657,6 +666,9 @@ mpage_writepages(struct address_space *m page_cache_get(page); spin_unlock(&mapping->page_lock); + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + /* * At this point we hold neither mapping->page_lock nor * lock on the page itself: the page may be truncated or @@ -669,7 +681,7 @@ mpage_writepages(struct address_space *m if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); - if (page->mapping == mapping && !PageWriteback(page) && + if (page->as.mapping == mapping && !PageWriteback(page) && test_clear_page_dirty(page)) { if (writepage) { ret = (*writepage)(page, wbc); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/nfs/read.c sles-anobjrmap-2-works/fs/nfs/read.c --- sles-anobjrmap-1/fs/nfs/read.c 2003-10-31 05:54:22.000000000 +0100 +++ sles-anobjrmap-2-works/fs/nfs/read.c 2004-03-11 17:46:39.597894536 +0100 @@ -308,9 +308,11 @@ nfs_readpage_result(struct rpc_task *tas int nfs_readpage(struct file *file, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; int error; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page->index); /* @@ -349,16 +351,20 @@ static int readpage_sync_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - return nfs_readpage_sync(desc->filp, page->mapping->host, page); + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + return nfs_readpage_sync(desc->filp, page->as.mapping->host, page); } static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; struct nfs_page *new; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); nfs_wb_page(inode, page); new = nfs_create_request(desc->filp, inode, page, 0, PAGE_CACHE_SIZE); if (IS_ERR(new)) { diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/nfs/write.c sles-anobjrmap-2-works/fs/nfs/write.c --- sles-anobjrmap-1/fs/nfs/write.c 2004-02-29 17:47:31.000000000 +0100 +++ sles-anobjrmap-2-works/fs/nfs/write.c 2004-03-11 17:47:07.287685048 +0100 @@ -224,13 +224,15 @@ nfs_writepage_async(struct file *file, s int nfs_writepage(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; unsigned long end_index; unsigned offset = PAGE_CACHE_SIZE; loff_t i_size = i_size_read(inode); int inode_referenced = 0; int err; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); /* * Note: We need to ensure that we have a reference to the inode * if we are to do asynchronous writes. If not, waiting @@ -641,9 +643,11 @@ nfs_strategy(struct inode *inode) int nfs_flush_incompatible(struct file *file, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; struct nfs_page *req; int status = 0; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); /* * Look for a request corresponding to this page. If there * is one, and it belongs to another file, we flush it out @@ -671,11 +675,13 @@ int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { struct dentry *dentry = file->f_dentry; - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; struct nfs_page *req; loff_t end; int status = 0; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, count, (long long)(page_offset(page) +offset)); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/reiserfs/inode.c sles-anobjrmap-2-works/fs/reiserfs/inode.c --- sles-anobjrmap-1/fs/reiserfs/inode.c 2004-02-29 17:47:29.000000000 +0100 +++ sles-anobjrmap-2-works/fs/reiserfs/inode.c 2004-03-11 17:48:57.079994080 +0100 @@ -2093,7 +2093,7 @@ static void lock_buffer_for_writepage(st * code to handle reiserfs tails. */ static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host ; + struct inode *inode = page->as.mapping->host ; unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ; int error = 0; unsigned long block ; @@ -2101,6 +2101,8 @@ static int reiserfs_write_full_page(stru int partial = 0 ; int nr = 0; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); /* The page dirty bit is cleared before writepage is called, which * means we have to tell create_empty_buffers to make dirty buffers * The page really should be up to date at this point, so tossing @@ -2246,14 +2248,18 @@ static int reiserfs_readpage (struct fil static int reiserfs_writepage (struct page * page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host ; + struct inode *inode = page->as.mapping->host ; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); reiserfs_wait_on_write_block(inode->i_sb) ; return reiserfs_write_full_page(page, wbc) ; } int reiserfs_prepare_write(struct file *f, struct page *page, unsigned from, unsigned to) { - struct inode *inode = page->mapping->host ; + struct inode *inode = page->as.mapping->host ; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); reiserfs_wait_on_write_block(inode->i_sb) ; fix_tail_page_for_writing(page) ; if (reiserfs_transaction_running(inode->i_sb)) { @@ -2272,11 +2278,13 @@ static sector_t reiserfs_aop_bmap(struct static int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to) { - struct inode *inode = page->mapping->host ; + struct inode *inode = page->as.mapping->host ; loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; int ret = 0; struct reiserfs_transaction_handle *th = NULL; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); reiserfs_wait_on_write_block(inode->i_sb) ; if (reiserfs_transaction_running(inode->i_sb)) { th = current->journal_info; @@ -2427,10 +2435,12 @@ free_jh: static int reiserfs_invalidatepage(struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; - struct inode *inode = page->mapping->host; + struct inode *inode = page->as.mapping->host; unsigned int curr_off = 0; int ret = 1; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); BUG_ON(!PageLocked(page)); if (!page_has_buffers(page)) goto out; @@ -2476,12 +2486,14 @@ out: */ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) { - struct inode *inode = page->mapping->host ; + struct inode *inode = page->as.mapping->host ; struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; struct buffer_head *head ; struct buffer_head *bh ; int ret = 1 ; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); spin_lock(&j->j_dirty_buffers_lock) ; head = page_buffers(page) ; bh = head ; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/fs/reiserfs/tail_conversion.c sles-anobjrmap-2-works/fs/reiserfs/tail_conversion.c --- sles-anobjrmap-1/fs/reiserfs/tail_conversion.c 2003-09-17 04:04:54.000000000 +0200 +++ sles-anobjrmap-2-works/fs/reiserfs/tail_conversion.c 2004-03-11 17:49:49.758985656 +0100 @@ -149,8 +149,10 @@ void reiserfs_unmap_buffer(struct buffer interested in removing it from per-sb j_dirty_buffers list, to avoid BUG() on attempt to write not mapped buffer */ if ( !list_empty(&bh->b_assoc_buffers) && bh->b_page) { - struct inode *inode = bh->b_page->mapping->host; + struct inode *inode = bh->b_page->as.mapping->host; struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); + BUG_ON(PageAnon(bh->b_page)); + BUG_ON(PageSwapCache(bh->b_page)); spin_lock(&j->j_dirty_buffers_lock); list_del_init(&bh->b_assoc_buffers); spin_unlock(&j->j_dirty_buffers_lock); Binary files sles-anobjrmap-1/ID and sles-anobjrmap-2-works/ID differ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-alpha/rmap.h sles-anobjrmap-2-works/include/asm-alpha/rmap.h --- sles-anobjrmap-1/include/asm-alpha/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-alpha/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _ALPHA_RMAP_H -#define _ALPHA_RMAP_H - -/* nothing to see, move along */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-arm/rmap.h sles-anobjrmap-2-works/include/asm-arm/rmap.h --- sles-anobjrmap-1/include/asm-arm/rmap.h 2002-07-27 22:32:02.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-arm/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,6 +0,0 @@ -#ifndef _ARM_RMAP_H -#define _ARM_RMAP_H - -#include - -#endif /* _ARM_RMAP_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-arm26/rmap.h sles-anobjrmap-2-works/include/asm-arm26/rmap.h --- sles-anobjrmap-1/include/asm-arm26/rmap.h 2003-06-08 18:21:42.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-arm26/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,66 +0,0 @@ -#ifndef _ARM_RMAP_H -#define _ARM_RMAP_H - -/* - * linux/include/asm-arm26/proc-armv/rmap.h - * - * Architecture dependant parts of the reverse mapping code, - * - * ARM is different since hardware page tables are smaller than - * the page size and Linux uses a "duplicate" one with extra info. - * For rmap this means that the first 2 kB of a page are the hardware - * page tables and the last 2 kB are the software page tables. - */ - -static inline void pgtable_add_rmap(struct page *page, struct mm_struct * mm, unsigned long address) -{ - page->mapping = (void *)mm; - page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1); - inc_page_state(nr_page_table_pages); -} - -static inline void pgtable_remove_rmap(struct page *page) -{ - page->mapping = NULL; - page->index = 0; - dec_page_state(nr_page_table_pages); -} - -static inline struct mm_struct * ptep_to_mm(pte_t * ptep) -{ - struct page * page = virt_to_page(ptep); - return (struct mm_struct *)page->mapping; -} - -/* The page table takes half of the page */ -#define PTE_MASK ((PAGE_SIZE / 2) - 1) - -static inline unsigned long ptep_to_address(pte_t * ptep) -{ - struct page * page = virt_to_page(ptep); - unsigned long low_bits; - - low_bits = ((unsigned long)ptep & PTE_MASK) * PTRS_PER_PTE; - return page->index + low_bits; -} - -//FIXME!!! IS these correct? -static inline pte_addr_t ptep_to_paddr(pte_t *ptep) -{ - return (pte_addr_t)ptep; -} - -static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) -{ - return (pte_t *)pte_paddr; -} - -static inline void rmap_ptep_unmap(pte_t *pte) -{ - return; -} - - -//#include - -#endif /* _ARM_RMAP_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-cris/rmap.h sles-anobjrmap-2-works/include/asm-cris/rmap.h --- sles-anobjrmap-1/include/asm-cris/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-cris/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _CRIS_RMAP_H -#define _CRIS_RMAP_H - -/* nothing to see, move along :) */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-generic/rmap.h sles-anobjrmap-2-works/include/asm-generic/rmap.h --- sles-anobjrmap-1/include/asm-generic/rmap.h 2003-05-14 01:56:46.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-generic/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,90 +0,0 @@ -#ifndef _GENERIC_RMAP_H -#define _GENERIC_RMAP_H -/* - * linux/include/asm-generic/rmap.h - * - * Architecture dependent parts of the reverse mapping code, - * this version should work for most architectures with a - * 'normal' page table layout. - * - * We use the struct page of the page table page to find out - * the process and full address of a page table entry: - * - page->mapping points to the process' mm_struct - * - page->index has the high bits of the address - * - the lower bits of the address are calculated from the - * offset of the page table entry within the page table page - * - * For CONFIG_HIGHPTE, we need to represent the address of a pte in a - * scalar pte_addr_t. The pfn of the pte's page is shifted left by PAGE_SIZE - * bits and is then ORed with the byte offset of the pte within its page. - * - * For CONFIG_HIGHMEM4G, the pte_addr_t is 32 bits. 20 for the pfn, 12 for - * the offset. - * - * For CONFIG_HIGHMEM64G, the pte_addr_t is 64 bits. 52 for the pfn, 12 for - * the offset. - */ -#include - -static inline void pgtable_add_rmap(struct page * page, struct mm_struct * mm, unsigned long address) -{ -#ifdef BROKEN_PPC_PTE_ALLOC_ONE - /* OK, so PPC calls pte_alloc() before mem_map[] is setup ... ;( */ - extern int mem_init_done; - - if (!mem_init_done) - return; -#endif - page->mapping = (void *)mm; - page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1); - inc_page_state(nr_page_table_pages); -} - -static inline void pgtable_remove_rmap(struct page * page) -{ - page->mapping = NULL; - page->index = 0; - dec_page_state(nr_page_table_pages); -} - -static inline struct mm_struct * ptep_to_mm(pte_t * ptep) -{ - struct page * page = kmap_atomic_to_page(ptep); - return (struct mm_struct *) page->mapping; -} - -static inline unsigned long ptep_to_address(pte_t * ptep) -{ - struct page * page = kmap_atomic_to_page(ptep); - unsigned long low_bits; - low_bits = ((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE; - return page->index + low_bits; -} - -#ifdef CONFIG_HIGHPTE -static inline pte_addr_t ptep_to_paddr(pte_t *ptep) -{ - pte_addr_t paddr; - paddr = ((pte_addr_t)page_to_pfn(kmap_atomic_to_page(ptep))) << PAGE_SHIFT; - return paddr + (pte_addr_t)((unsigned long)ptep & ~PAGE_MASK); -} -#else -static inline pte_addr_t ptep_to_paddr(pte_t *ptep) -{ - return (pte_addr_t)ptep; -} -#endif - -#ifndef CONFIG_HIGHPTE -static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) -{ - return (pte_t *)pte_paddr; -} - -static inline void rmap_ptep_unmap(pte_t *pte) -{ - return; -} -#endif - -#endif /* _GENERIC_RMAP_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-i386/rmap.h sles-anobjrmap-2-works/include/asm-i386/rmap.h --- sles-anobjrmap-1/include/asm-i386/rmap.h 2002-09-10 20:09:43.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-i386/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,21 +0,0 @@ -#ifndef _I386_RMAP_H -#define _I386_RMAP_H - -/* nothing to see, move along */ -#include - -#ifdef CONFIG_HIGHPTE -static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) -{ - unsigned long pfn = (unsigned long)(pte_paddr >> PAGE_SHIFT); - unsigned long off = ((unsigned long)pte_paddr) & ~PAGE_MASK; - return (pte_t *)((char *)kmap_atomic(pfn_to_page(pfn), KM_PTE2) + off); -} - -static inline void rmap_ptep_unmap(pte_t *pte) -{ - kunmap_atomic(pte, KM_PTE2); -} -#endif - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-ia64/rmap.h sles-anobjrmap-2-works/include/asm-ia64/rmap.h --- sles-anobjrmap-1/include/asm-ia64/rmap.h 2002-08-13 06:26:30.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-ia64/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _ASM_IA64_RMAP_H -#define _ASM_IA64_RMAP_H - -/* nothing to see, move along */ -#include - -#endif /* _ASM_IA64_RMAP_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-m68k/rmap.h sles-anobjrmap-2-works/include/asm-m68k/rmap.h --- sles-anobjrmap-1/include/asm-m68k/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-m68k/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _M68K_RMAP_H -#define _M68K_RMAP_H - -/* nothing to see, move along */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-m68knommu/rmap.h sles-anobjrmap-2-works/include/asm-m68knommu/rmap.h --- sles-anobjrmap-1/include/asm-m68knommu/rmap.h 2002-11-01 20:44:49.000000000 +0100 +++ sles-anobjrmap-2-works/include/asm-m68knommu/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,2 +0,0 @@ -/* Do not need anything here */ - diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-mips/rmap.h sles-anobjrmap-2-works/include/asm-mips/rmap.h --- sles-anobjrmap-1/include/asm-mips/rmap.h 2003-07-17 01:54:49.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-mips/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef __ASM_RMAP_H -#define __ASM_RMAP_H - -/* nothing to see, move along */ -#include - -#endif /* __ASM_RMAP_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-parisc/rmap.h sles-anobjrmap-2-works/include/asm-parisc/rmap.h --- sles-anobjrmap-1/include/asm-parisc/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-parisc/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _PARISC_RMAP_H -#define _PARISC_RMAP_H - -/* nothing to see, move along */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-ppc/rmap.h sles-anobjrmap-2-works/include/asm-ppc/rmap.h --- sles-anobjrmap-1/include/asm-ppc/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-ppc/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,9 +0,0 @@ -#ifndef _PPC_RMAP_H -#define _PPC_RMAP_H - -/* PPC calls pte_alloc() before mem_map[] is setup ... */ -#define BROKEN_PPC_PTE_ALLOC_ONE - -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-ppc64/rmap.h sles-anobjrmap-2-works/include/asm-ppc64/rmap.h --- sles-anobjrmap-1/include/asm-ppc64/rmap.h 2002-07-24 05:28:36.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-ppc64/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,9 +0,0 @@ -#ifndef _PPC64_RMAP_H -#define _PPC64_RMAP_H - -/* PPC64 calls pte_alloc() before mem_map[] is setup ... */ -#define BROKEN_PPC_PTE_ALLOC_ONE - -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-s390/rmap.h sles-anobjrmap-2-works/include/asm-s390/rmap.h --- sles-anobjrmap-1/include/asm-s390/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-s390/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _S390_RMAP_H -#define _S390_RMAP_H - -/* nothing to see, move along */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-sh/rmap.h sles-anobjrmap-2-works/include/asm-sh/rmap.h --- sles-anobjrmap-1/include/asm-sh/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-sh/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _SH_RMAP_H -#define _SH_RMAP_H - -/* nothing to see, move along */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-sparc/rmap.h sles-anobjrmap-2-works/include/asm-sparc/rmap.h --- sles-anobjrmap-1/include/asm-sparc/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-sparc/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _SPARC_RMAP_H -#define _SPARC_RMAP_H - -/* nothing to see, move along */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-sparc64/rmap.h sles-anobjrmap-2-works/include/asm-sparc64/rmap.h --- sles-anobjrmap-1/include/asm-sparc64/rmap.h 2002-07-19 20:08:35.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-sparc64/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _SPARC64_RMAP_H -#define _SPARC64_RMAP_H - -/* nothing to see, move along */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-um/rmap.h sles-anobjrmap-2-works/include/asm-um/rmap.h --- sles-anobjrmap-1/include/asm-um/rmap.h 2002-09-12 19:42:39.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-um/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,6 +0,0 @@ -#ifndef __UM_RMAP_H -#define __UM_RMAP_H - -#include "asm/arch/rmap.h" - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-v850/rmap.h sles-anobjrmap-2-works/include/asm-v850/rmap.h --- sles-anobjrmap-1/include/asm-v850/rmap.h 2002-11-01 20:43:49.000000000 +0100 +++ sles-anobjrmap-2-works/include/asm-v850/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -/* Do not need anything here */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/asm-x86_64/rmap.h sles-anobjrmap-2-works/include/asm-x86_64/rmap.h --- sles-anobjrmap-1/include/asm-x86_64/rmap.h 2002-10-13 02:07:58.000000000 +0200 +++ sles-anobjrmap-2-works/include/asm-x86_64/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -#ifndef _X8664_RMAP_H -#define _X8664_RMAP_H - -/* nothing to see, move along */ -#include - -#endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/linux/mm.h sles-anobjrmap-2-works/include/linux/mm.h --- sles-anobjrmap-1/include/linux/mm.h 2004-03-03 06:45:38.000000000 +0100 +++ sles-anobjrmap-2-works/include/linux/mm.h 2004-03-11 17:34:49.560836576 +0100 @@ -39,6 +39,22 @@ extern int page_cluster; * mmap() functions). */ +typedef struct anon_vma_s { + /* This serializes the accesses to the vma list. */ + spinlock_t anon_vma_lock; + + /* + * This is a list of anonymous "related" vmas, + * to scan if one of the pages pointing to this + * anon_vma needs to be unmapped. + * After we unlink the last vma we must garbage collect + * the object if the list is empty because we're + * guaranteed no page can be pointing to this anon_vma + * if there's no vma anymore. + */ + struct list_head anon_vma_head; +} anon_vma_t; + /* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -69,6 +85,19 @@ struct vm_area_struct { */ struct list_head shared; + /* + * The same vma can be both queued into the i_mmap and in a + * anon_vma too, for example after a cow in + * a MAP_PRIVATE file mapping. However only the MAP_PRIVATE + * will go both in the i_mmap and anon_vma. A MAP_SHARED + * will only be in the i_mmap_shared and a MAP_ANONYMOUS (file = 0) + * will only be queued only in the anon_vma. + * The list is serialized by the anon_vma->lock. + */ + struct list_head anon_vma_node; + /* Serialized by the vma->vm_mm->page_table_lock */ + anon_vma_t * anon_vma; + /* Function pointers to deal with this struct. */ struct vm_operations_struct * vm_ops; @@ -172,16 +201,51 @@ struct page { updated asynchronously */ atomic_t count; /* Usage count, see below. */ struct list_head list; /* ->mapping has some page lists. */ - struct address_space *mapping; /* The inode (or ...) we belong to. */ unsigned long index; /* Our offset within mapping. */ struct list_head lru; /* Pageout list, eg. active_list; protected by zone->lru_lock !! */ + + /* + * Address space of this page. + * A page can be either mapped to a file or to be anonymous + * memory, so using the union is optimal here. The PG_anon + * bitflag tells if this is anonymous or a file-mapping. + * If PG_anon is clear we use the as.mapping, if PG_anon is + * set and PG_direct is not set we use the as.anon_vma, + * if PG_anon is set and PG_direct is set we use the as.vma. + */ union { - struct pte_chain *chain;/* Reverse pte mapping pointer. - * protected by PG_chainlock */ - pte_addr_t direct; - int mapcount; - } pte; + /* The inode address space if it's a file mapping. */ + struct address_space * mapping; + + /* + * This points to an anon_vma object. + * The anon_vma can't go away under us if + * we hold the PG_maplock. + */ + anon_vma_t * anon_vma; + + /* + * Before the first fork we avoid anon_vma object allocation + * and we set PG_direct. anon_vma objects are only created + * via fork(), and the vm then stop using the page->as.vma + * and it starts using the as.anon_vma object instead. + * After the first fork(), even if the child exit, the pages + * cannot be downgraded to PG_direct anymore (even if we + * wanted to) because there's no way to reach pages starting + * from an anon_vma object. + */ + struct vm_area_struct * vma; + } as; + + /* + * Number of ptes mapping this page. + * It's serialized by PG_maplock. + * This is needed only to maintain the nr_mapped global info + * so it would be nice to drop it. + */ + unsigned long mapcount; + unsigned long private; /* mapping-private opaque data */ /* @@ -396,13 +460,11 @@ void page_address_init(void); #endif /* - * Return true if this page is mapped into pagetables. Subtle: test pte.direct - * rather than pte.chain. Because sometimes pte.direct is 64-bit, and .chain - * is only 32-bit. + * Return true if this page is mapped into pagetables. */ static inline int page_mapped(struct page *page) { - return page->pte.direct != 0; + return page->mapcount; } /* @@ -440,7 +502,8 @@ void unmap_page_range(struct mmu_gather unsigned long address, unsigned long size); void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma); + struct vm_area_struct *vma, struct vm_area_struct *orig_vma, + anon_vma_t ** anon_vma); int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long size, pgprot_t prot); @@ -459,7 +522,8 @@ extern int access_process_vm(struct task extern asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long nonblock); extern asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); void put_dirty_page(struct task_struct *tsk, struct page *page, - unsigned long address, pgprot_t prot); + unsigned long address, pgprot_t prot, + struct vm_area_struct *vma); int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); @@ -494,6 +558,18 @@ struct shrinker; extern struct shrinker *set_shrinker(int, shrinker_t); extern void remove_shrinker(struct shrinker *shrinker); +static inline struct address_space * page_mapping(struct page * page) +{ + extern struct address_space swapper_space; + struct address_space * mapping = NULL; + + if (unlikely(PageSwapCache(page))) + mapping = &swapper_space; + else if (!PageAnon(page)) + mapping = page->as.mapping; + return mapping; +} + /* * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. @@ -501,10 +577,10 @@ extern void remove_shrinker(struct shrin */ static inline int set_page_dirty(struct page *page) { - if (page->mapping) { + if (page_mapping(page)) { int (*spd)(struct page *); - spd = page->mapping->a_ops->set_page_dirty; + spd = page_mapping(page)->a_ops->set_page_dirty; if (spd) return (*spd)(page); } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/linux/objrmap.h sles-anobjrmap-2-works/include/linux/objrmap.h --- sles-anobjrmap-1/include/linux/objrmap.h 1970-01-01 01:00:00.000000000 +0100 +++ sles-anobjrmap-2-works/include/linux/objrmap.h 2004-03-12 00:50:03.676887504 +0100 @@ -0,0 +1,78 @@ +#ifndef _LINUX_RMAP_H +#define _LINUX_RMAP_H +/* + * Declarations for Object Reverse Mapping functions in mm/objrmap.c + */ +#include + +#ifdef CONFIG_MMU + +#include +#include +#include + +extern kmem_cache_t * anon_vma_cachep; + +#define page_map_lock(page) bit_spin_lock(PG_maplock, &page->flags) +#define page_map_unlock(page) bit_spin_unlock(PG_maplock, &page->flags) + +static inline void anon_vma_free(anon_vma_t * anon_vma) +{ + kmem_cache_free(anon_vma_cachep, anon_vma); +} + +static inline anon_vma_t * anon_vma_alloc(void) +{ + return kmem_cache_alloc(anon_vma_cachep, SLAB_KERNEL); +} + +static inline void anon_vma_lock(struct vm_area_struct * vma) +{ + anon_vma_t * anon_vma = vma->anon_vma; + if (anon_vma) + spin_lock(&anon_vma->anon_vma_lock); +} + +static inline void anon_vma_unlock(struct vm_area_struct * vma) +{ + anon_vma_t * anon_vma = vma->anon_vma; + if (anon_vma) + spin_unlock(&anon_vma->anon_vma_lock); +} + +/* + * anon_vma helper functions. The one starting with __ requires + * the caller to hold the anon_vma_lock, the other takes it + * internally. + */ +extern void FASTCALL(anon_vma_merge(struct vm_area_struct * vma, + struct vm_area_struct * vma_dying)); +extern void FASTCALL(anon_vma_unlink(struct vm_area_struct * vma)); +extern void FASTCALL(__anon_vma_link(struct vm_area_struct * vma)); + +/* objrmap tracking functions */ +void FASTCALL(page_add_rmap(struct page *, struct vm_area_struct *, unsigned long)); +void FASTCALL(page_add_rmap_fork(struct page *, struct vm_area_struct *, + struct vm_area_struct *, anon_vma_t **, int *)); +void FASTCALL(page_remove_rmap(struct page *)); + +/* + * Called from mm/vmscan.c to handle paging out + */ +int FASTCALL(try_to_unmap(struct page *)); +int FASTCALL(page_referenced(struct page *)); + +/* + * Return values of try_to_unmap + */ +#define SWAP_SUCCESS 0 +#define SWAP_AGAIN 1 +#define SWAP_FAIL 2 + +#else /* !CONFIG_MMU */ + +#define page_referenced(page) TestClearPageReferenced(page) + +#endif /* CONFIG_MMU */ + +#endif /* _LINUX_RMAP_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/linux/page-flags.h sles-anobjrmap-2-works/include/linux/page-flags.h --- sles-anobjrmap-1/include/linux/page-flags.h 2004-03-03 06:45:38.000000000 +0100 +++ sles-anobjrmap-2-works/include/linux/page-flags.h 2004-03-11 13:48:37.000000000 +0100 @@ -69,13 +69,14 @@ #define PG_private 12 /* Has something at ->private */ #define PG_writeback 13 /* Page is under writeback */ #define PG_nosave 14 /* Used for system suspend/resume */ -#define PG_chainlock 15 /* lock bit for ->pte_chain */ +#define PG_maplock 15 /* lock bit for ->as.anon_vma and ->mapcount */ -#define PG_direct 16 /* ->pte_chain points directly at pte */ +#define PG_direct 16 /* if set it must use page->as.vma */ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ #define PG_reclaim 18 /* To be reclaimed asap */ #define PG_compound 19 /* Part of a compound page */ #define PG_anon 20 /* Anonymous page */ +#define PG_swapcache 21 /* SwapCache page */ /* @@ -275,13 +276,10 @@ extern void get_full_page_state(struct p #define SetPageAnon(page) set_bit(PG_anon, &(page)->flags) #define ClearPageAnon(page) clear_bit(PG_anon, &(page)->flags) -/* - * The PageSwapCache predicate doesn't use a PG_flag at this time, - * but it may again do so one day. - */ #ifdef CONFIG_SWAP -extern struct address_space swapper_space; -#define PageSwapCache(page) ((page)->mapping == &swapper_space) +#define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) +#define SetPageSwapCache(page) set_bit(PG_swapcache, &(page)->flags) +#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags) #else #define PageSwapCache(page) 0 #endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/linux/pagemap.h sles-anobjrmap-2-works/include/linux/pagemap.h --- sles-anobjrmap-1/include/linux/pagemap.h 2004-01-15 18:36:24.000000000 +0100 +++ sles-anobjrmap-2-works/include/linux/pagemap.h 2004-03-11 15:23:31.410497472 +0100 @@ -141,9 +141,17 @@ static inline unsigned long get_page_cac static inline void ___add_to_page_cache(struct page *page, struct address_space *mapping, unsigned long index) { + extern struct address_space swapper_space; + list_add(&page->list, &mapping->clean_pages); - page->mapping = mapping; - page->index = index; + if (likely(mapping != &swapper_space)) { + BUG_ON(PageAnon(page)); + page->as.mapping = mapping; + page->index = index; + } else { + SetPageSwapCache(page); + page->private = index; + } mapping->nrpages++; pagecache_acct(1); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/include/linux/rmap.h sles-anobjrmap-2-works/include/linux/rmap.h --- sles-anobjrmap-1/include/linux/rmap.h 2004-03-05 05:27:41.000000000 +0100 +++ sles-anobjrmap-2-works/include/linux/rmap.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,53 +0,0 @@ -#ifndef _LINUX_RMAP_H -#define _LINUX_RMAP_H -/* - * Declarations for Reverse Mapping functions in mm/rmap.c - * Its structures are declared within that file. - */ -#include - -#ifdef CONFIG_MMU - -#include -#include - -struct pte_chain; -extern kmem_cache_t *pte_chain_cache; - -#define pte_chain_lock(page) bit_spin_lock(PG_chainlock, &page->flags) -#define pte_chain_unlock(page) bit_spin_unlock(PG_chainlock, &page->flags) - -struct pte_chain *pte_chain_alloc(int gfp_flags); -void __pte_chain_free(struct pte_chain *pte_chain); - -static inline void pte_chain_free(struct pte_chain *pte_chain) -{ - if (pte_chain) - __pte_chain_free(pte_chain); -} - -int FASTCALL(page_referenced(struct page *)); -struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *, - struct pte_chain *)); -void FASTCALL(page_remove_rmap(struct page *, pte_t *)); -int page_convert_anon(struct page *); - -/* - * Called from mm/vmscan.c to handle paging out - */ -int FASTCALL(try_to_unmap(struct page *)); - -/* - * Return values of try_to_unmap - */ -#define SWAP_SUCCESS 0 -#define SWAP_AGAIN 1 -#define SWAP_FAIL 2 - -#else /* !CONFIG_MMU */ - -#define page_referenced(page) TestClearPageReferenced(page) - -#endif /* CONFIG_MMU */ - -#endif /* _LINUX_RMAP_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/init/main.c sles-anobjrmap-2-works/init/main.c --- sles-anobjrmap-1/init/main.c 2004-02-29 17:47:36.000000000 +0100 +++ sles-anobjrmap-2-works/init/main.c 2004-03-09 05:32:34.000000000 +0100 @@ -85,7 +85,7 @@ extern void signals_init(void); extern void buffer_init(void); extern void pidhash_init(void); extern void pidmap_init(void); -extern void pte_chain_init(void); +extern void anon_vma_init(void); extern void radix_tree_init(void); extern void free_initmem(void); extern void populate_rootfs(void); @@ -495,7 +495,7 @@ asmlinkage void __init start_kernel(void calibrate_delay(); pidmap_init(); pgtable_cache_init(); - pte_chain_init(); + anon_vma_init(); #ifdef CONFIG_KDB kdb_init(); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/kernel/fork.c sles-anobjrmap-2-works/kernel/fork.c --- sles-anobjrmap-1/kernel/fork.c 2004-02-29 17:47:33.000000000 +0100 +++ sles-anobjrmap-2-works/kernel/fork.c 2004-03-12 00:45:43.146494144 +0100 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,7 @@ static inline int dup_mmap(struct mm_str struct vm_area_struct * mpnt, *tmp, **pprev; int retval; unsigned long charge = 0; + anon_vma_t * anon_vma = NULL; down_write(&oldmm->mmap_sem); flush_cache_mm(current->mm); @@ -310,6 +312,11 @@ static inline int dup_mmap(struct mm_str goto fail_nomem; charge += len; } + if (!anon_vma) { + anon_vma = anon_vma_alloc(); + if (!anon_vma) + goto fail_nomem; + } tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!tmp) goto fail_nomem; @@ -319,6 +326,12 @@ static inline int dup_mmap(struct mm_str tmp->vm_next = NULL; file = tmp->vm_file; INIT_LIST_HEAD(&tmp->shared); + /* + * No need to setup tmp->anon_vma here, if it's + * empty (and in turn we'll not call + * page_add_rmap_fork) it was already null + * in the mpnt. + */ if (file) { struct inode *inode = file->f_dentry->d_inode; get_file(file); @@ -339,7 +352,7 @@ static inline int dup_mmap(struct mm_str *pprev = tmp; pprev = &tmp->vm_next; mm->map_count++; - retval = copy_page_range(mm, current->mm, tmp); + retval = copy_page_range(mm, current->mm, tmp, mpnt, &anon_vma); spin_unlock(&mm->page_table_lock); if (tmp->vm_ops && tmp->vm_ops->open) @@ -354,6 +367,8 @@ static inline int dup_mmap(struct mm_str out: flush_tlb_mm(current->mm); up_write(&oldmm->mmap_sem); + if (anon_vma) + anon_vma_free(anon_vma); return retval; fail_nomem: retval = -ENOMEM; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/filemap.c sles-anobjrmap-2-works/mm/filemap.c --- sles-anobjrmap-1/mm/filemap.c 2004-03-03 06:45:38.000000000 +0100 +++ sles-anobjrmap-2-works/mm/filemap.c 2004-03-12 03:08:30.012132608 +0100 @@ -97,11 +97,17 @@ */ void __remove_from_page_cache(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); - radix_tree_delete(&mapping->page_tree, page->index); list_del(&page->list); - page->mapping = NULL; + if (!likely(PageSwapCache(page))) { + BUG_ON(PageAnon(page)); + radix_tree_delete(&mapping->page_tree, page->index); + page->as.mapping = NULL; + } else { + radix_tree_delete(&mapping->page_tree, page->private); + ClearPageSwapCache(page); + } mapping->nrpages--; pagecache_acct(-1); @@ -109,7 +115,7 @@ void __remove_from_page_cache(struct pag void remove_from_page_cache(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); if (unlikely(!PageLocked(page))) PAGE_BUG(page); @@ -121,7 +127,7 @@ void remove_from_page_cache(struct page static inline int sync_page(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); if (mapping && mapping->a_ops && mapping->a_ops->sync_page) return mapping->a_ops->sync_page(page); @@ -451,7 +457,9 @@ repeat: spin_lock(&mapping->page_lock); /* Has the page been truncated while we slept? */ - if (page->mapping != mapping || page->index != offset) { + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (page->as.mapping != mapping || page->index != offset) { unlock_page(page); page_cache_release(page); goto repeat; @@ -668,7 +676,7 @@ page_not_up_to_date: lock_page(page); /* Did it get unhashed before we got the lock? */ - if (!page->mapping) { + if (!page_mapping(page)) { unlock_page(page); page_cache_release(page); continue; @@ -1124,7 +1132,9 @@ page_not_uptodate: lock_page(page); /* Did it get unhashed while we waited for it? */ - if (!page->mapping) { + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (!page->as.mapping) { unlock_page(page); page_cache_release(page); goto retry_all; @@ -1151,7 +1161,9 @@ page_not_uptodate: lock_page(page); /* Somebody truncated the page on us? */ - if (!page->mapping) { + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (!page->as.mapping) { unlock_page(page); page_cache_release(page); goto retry_all; @@ -1233,7 +1245,9 @@ page_not_uptodate: lock_page(page); /* Did it get unhashed while we waited for it? */ - if (!page->mapping) { + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (!page->as.mapping) { unlock_page(page); goto err; } @@ -1259,7 +1273,9 @@ page_not_uptodate: lock_page(page); /* Somebody truncated the page on us? */ - if (!page->mapping) { + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (!page->as.mapping) { unlock_page(page); goto err; } @@ -1444,7 +1460,9 @@ retry: goto out; lock_page(page); - if (!page->mapping) { + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (!page->as.mapping) { unlock_page(page); page_cache_release(page); goto retry; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/fremap.c sles-anobjrmap-2-works/mm/fremap.c --- sles-anobjrmap-1/mm/fremap.c 2004-03-05 05:24:20.000000000 +0100 +++ sles-anobjrmap-2-works/mm/fremap.c 2004-03-11 16:30:52.570147824 +0100 @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -36,7 +36,7 @@ static inline void zap_pte(struct mm_str if (!PageReserved(page)) { if (pte_dirty(pte)) set_page_dirty(page); - page_remove_rmap(page, ptep); + page_remove_rmap(page); page_cache_release(page); mm->rss--; } @@ -60,26 +60,6 @@ int install_page(struct mm_struct *mm, s pgd_t *pgd; pmd_t *pmd; pte_t pte_val; - struct pte_chain *pte_chain; - unsigned long pgidx; - - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto err; - - /* - * Convert this page to anon for objrmap if it's nonlinear - */ - pgidx = (addr - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (!PageAnon(page) && (page->index != pgidx)) { - lock_page(page); - err = page_convert_anon(page); - unlock_page(page); - if (err < 0) - goto err_free; - } pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); @@ -97,7 +77,7 @@ int install_page(struct mm_struct *mm, s mm->rss++; flush_icache_page(vma, page); set_pte(pte, mk_pte(page, prot)); - pte_chain = page_add_rmap(page, pte, pte_chain); + page_add_rmap(page, vma, addr); pte_val = *pte; pte_unmap(pte); update_mmu_cache(vma, addr, pte_val); @@ -105,9 +85,6 @@ int install_page(struct mm_struct *mm, s err = 0; err_unlock: spin_unlock(&mm->page_table_lock); -err_free: - pte_chain_free(pte_chain); -err: return err; } EXPORT_SYMBOL(install_page); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/Makefile sles-anobjrmap-2-works/mm/Makefile --- sles-anobjrmap-1/mm/Makefile 2004-02-29 17:47:30.000000000 +0100 +++ sles-anobjrmap-2-works/mm/Makefile 2004-03-10 20:26:16.000000000 +0100 @@ -4,7 +4,7 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ - mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ + mlock.o mmap.o mprotect.o mremap.o msync.o objrmap.o \ shmem.o vmalloc.o obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/memory.c sles-anobjrmap-2-works/mm/memory.c --- sles-anobjrmap-1/mm/memory.c 2004-03-05 05:24:35.000000000 +0100 +++ sles-anobjrmap-2-works/mm/memory.c 2004-03-12 01:19:58.384050544 +0100 @@ -43,12 +43,11 @@ #include #include #include -#include +#include #include #include #include -#include #include #include #include @@ -105,7 +104,6 @@ static inline void free_one_pmd(struct m } page = pmd_page(*dir); pmd_clear(dir); - pgtable_remove_rmap(page); pte_free_tlb(tlb, page); } @@ -164,7 +162,6 @@ pte_t fastcall * pte_alloc_map(struct mm pte_free(new); goto out; } - pgtable_add_rmap(new, mm, address); pmd_populate(mm, pmd, new); } out: @@ -190,7 +187,6 @@ pte_t fastcall * pte_alloc_kernel(struct pte_free_kernel(new); goto out; } - pgtable_add_rmap(virt_to_page(new), mm, address); pmd_populate_kernel(mm, pmd, new); } out: @@ -211,26 +207,18 @@ out: * but may be dropped within pmd_alloc() and pte_alloc_map(). */ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) + struct vm_area_struct *vma, struct vm_area_struct *orig_vma, + anon_vma_t ** anon_vma) { pgd_t * src_pgd, * dst_pgd; unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; unsigned long cow; - struct pte_chain *pte_chain = NULL; + int anon_vma_created = 0; if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst, src, vma); - pte_chain = pte_chain_alloc(GFP_ATOMIC); - if (!pte_chain) { - spin_unlock(&dst->page_table_lock); - pte_chain = pte_chain_alloc(GFP_KERNEL); - spin_lock(&dst->page_table_lock); - if (!pte_chain) - goto nomem; - } - cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; @@ -299,7 +287,7 @@ skip_copy_pte_range: pfn = pte_pfn(pte); /* the pte points outside of valid memory, the * mapping is assumed to be good, meaningful - * and not mapped via rmap - duplicate the + * and not mapped via objrmap - duplicate the * mapping as is. */ page = NULL; @@ -331,30 +319,22 @@ skip_copy_pte_range: dst->rss++; set_pte(dst_pte, pte); - pte_chain = page_add_rmap(page, dst_pte, - pte_chain); - if (pte_chain) - goto cont_copy_pte_range_noset; - pte_chain = pte_chain_alloc(GFP_ATOMIC); - if (pte_chain) - goto cont_copy_pte_range_noset; + page_add_rmap_fork(page, vma, orig_vma, + anon_vma, &anon_vma_created); + + if (need_resched()) { + pte_unmap_nested(src_pte); + pte_unmap(dst_pte); + spin_unlock(&src->page_table_lock); + spin_unlock(&dst->page_table_lock); + __cond_resched(); + spin_lock(&dst->page_table_lock); + spin_lock(&src->page_table_lock); + dst_pte = pte_offset_map(dst_pmd, address); + src_pte = pte_offset_map_nested(src_pmd, + address); + } - /* - * pte_chain allocation failed, and we need to - * run page reclaim. - */ - pte_unmap_nested(src_pte); - pte_unmap(dst_pte); - spin_unlock(&src->page_table_lock); - spin_unlock(&dst->page_table_lock); - pte_chain = pte_chain_alloc(GFP_KERNEL); - spin_lock(&dst->page_table_lock); - if (!pte_chain) - goto nomem; - spin_lock(&src->page_table_lock); - dst_pte = pte_offset_map(dst_pmd, address); - src_pte = pte_offset_map_nested(src_pmd, - address); cont_copy_pte_range_noset: address += PAGE_SIZE; if (address >= end) { @@ -377,10 +357,9 @@ cont_copy_pmd_range: out_unlock: spin_unlock(&src->page_table_lock); out: - pte_chain_free(pte_chain); return 0; + nomem: - pte_chain_free(pte_chain); return -ENOMEM; } @@ -417,11 +396,11 @@ zap_pte_range(struct mmu_gather *tlb, pm if (!PageReserved(page)) { if (pte_dirty(pte)) set_page_dirty(page); - if (page->mapping && pte_young(pte) && + if (page_mapping(page) && pte_young(pte) && !PageSwapCache(page)) mark_page_accessed(page); tlb->freed++; - page_remove_rmap(page, ptep); + page_remove_rmap(page); tlb_remove_page(tlb, page); } } @@ -1014,7 +993,6 @@ static int do_wp_page(struct mm_struct * { struct page *old_page, *new_page; unsigned long pfn = pte_pfn(pte); - struct pte_chain *pte_chain; pte_t entry; if (unlikely(!pfn_valid(pfn))) { @@ -1053,9 +1031,6 @@ static int do_wp_page(struct mm_struct * page_cache_get(old_page); spin_unlock(&mm->page_table_lock); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto no_pte_chain; new_page = alloc_page(GFP_HIGHUSER); if (!new_page) goto no_new_page; @@ -1069,10 +1044,10 @@ static int do_wp_page(struct mm_struct * if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; - page_remove_rmap(old_page, page_table); + page_remove_rmap(old_page); break_cow(vma, new_page, address, page_table); SetPageAnon(new_page); - pte_chain = page_add_rmap(new_page, page_table, pte_chain); + page_add_rmap(new_page, vma, address); lru_cache_add_active(new_page); /* Free the old page.. */ @@ -1082,12 +1057,9 @@ static int do_wp_page(struct mm_struct * page_cache_release(new_page); page_cache_release(old_page); spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); return VM_FAULT_MINOR; no_new_page: - pte_chain_free(pte_chain); -no_pte_chain: page_cache_release(old_page); return VM_FAULT_OOM; } @@ -1245,7 +1217,6 @@ static int do_swap_page(struct mm_struct swp_entry_t entry = pte_to_swp_entry(orig_pte); pte_t pte; int ret = VM_FAULT_MINOR; - struct pte_chain *pte_chain = NULL; pte_unmap(page_table); spin_unlock(&mm->page_table_lock); @@ -1275,11 +1246,6 @@ static int do_swap_page(struct mm_struct } mark_page_accessed(page); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) { - ret = VM_FAULT_OOM; - goto out; - } lock_page(page); /* @@ -1312,14 +1278,13 @@ static int do_swap_page(struct mm_struct flush_icache_page(vma, page); set_pte(page_table, pte); SetPageAnon(page); - pte_chain = page_add_rmap(page, page_table, pte_chain); + page_add_rmap(page, vma, address); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); out: - pte_chain_free(pte_chain); return ret; } @@ -1335,20 +1300,8 @@ do_anonymous_page(struct mm_struct *mm, { pte_t entry; struct page * page = ZERO_PAGE(addr); - struct pte_chain *pte_chain; int ret; - pte_chain = pte_chain_alloc(GFP_ATOMIC); - if (!pte_chain) { - pte_unmap(page_table); - spin_unlock(&mm->page_table_lock); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto no_mem; - spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, addr); - } - /* Read-only mapping of ZERO_PAGE. */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); @@ -1359,8 +1312,8 @@ do_anonymous_page(struct mm_struct *mm, spin_unlock(&mm->page_table_lock); page = alloc_page(GFP_HIGHUSER); - if (!page) - goto no_mem; + if (unlikely(!page)) + return VM_FAULT_OOM; clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); @@ -1370,8 +1323,7 @@ do_anonymous_page(struct mm_struct *mm, pte_unmap(page_table); page_cache_release(page); spin_unlock(&mm->page_table_lock); - ret = VM_FAULT_MINOR; - goto out; + return VM_FAULT_MINOR; } mm->rss++; entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, @@ -1383,20 +1335,16 @@ do_anonymous_page(struct mm_struct *mm, } set_pte(page_table, entry); - /* ignores ZERO_PAGE */ - pte_chain = page_add_rmap(page, page_table, pte_chain); pte_unmap(page_table); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); spin_unlock(&mm->page_table_lock); ret = VM_FAULT_MINOR; - goto out; -no_mem: - ret = VM_FAULT_OOM; -out: - pte_chain_free(pte_chain); + /* ignores ZERO_PAGE */ + page_add_rmap(page, vma, addr); + return ret; } @@ -1419,7 +1367,6 @@ do_no_page(struct mm_struct *mm, struct struct page * new_page; struct address_space *mapping = NULL; pte_t entry; - struct pte_chain *pte_chain; int sequence = 0; int ret = VM_FAULT_MINOR; @@ -1443,12 +1390,8 @@ retry: if (new_page == NOPAGE_OOM) return VM_FAULT_OOM; - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto oom; - /* See if nopage returned an anon page */ - if (!new_page->mapping || PageSwapCache(new_page)) + if (!new_page->as.mapping || PageSwapCache(new_page)) SetPageAnon(new_page); /* @@ -1476,7 +1419,6 @@ retry: sequence = atomic_read(&mapping->truncate_count); spin_unlock(&mm->page_table_lock); page_cache_release(new_page); - pte_chain_free(pte_chain); goto retry; } page_table = pte_offset_map(pmd, address); @@ -1500,7 +1442,7 @@ retry: if (write_access) entry = maybe_mkwrite(pte_mkdirty(entry), vma); set_pte(page_table, entry); - pte_chain = page_add_rmap(new_page, page_table, pte_chain); + page_add_rmap(new_page, vma, address); pte_unmap(page_table); } else { /* One of our sibling threads was faster, back out. */ @@ -1513,13 +1455,13 @@ retry: /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); spin_unlock(&mm->page_table_lock); - goto out; -oom: + out: + return ret; + + oom: page_cache_release(new_page); ret = VM_FAULT_OOM; -out: - pte_chain_free(pte_chain); - return ret; + goto out; } /* diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/mmap.c sles-anobjrmap-2-works/mm/mmap.c --- sles-anobjrmap-1/mm/mmap.c 2004-03-03 06:53:46.000000000 +0100 +++ sles-anobjrmap-2-works/mm/mmap.c 2004-03-12 01:31:46.232441232 +0100 @@ -6,6 +6,19 @@ * Address space accounting code */ +/* + * All modifications to vm_start/vm_pgoff must happen + * under the semaphore (for file mappings) and under the + * anon_vma->anon_vma_lock (for anon mappings), to serialize + * against truncate and other objrmap users. See move_vma_start. + * + * Do I need to document we must take always the i_shared_sem + * _semaphore_ before the anon_vma_lock _spinlock_? ;) + * + * We take the page_table_lock then the PG_maplock and finally + * the anon_vma_lock (fork requires that ordering). + */ + #include #include #include @@ -20,6 +33,7 @@ #include #include #include +#include #include #include @@ -62,7 +76,7 @@ EXPORT_SYMBOL(vm_committed_space); /* * Requires inode->i_mapping->i_shared_sem */ -static inline void +static void __remove_shared_vm_struct(struct vm_area_struct *vma, struct inode *inode) { if (inode) { @@ -254,6 +268,7 @@ __vma_link(struct mm_struct *mm, struct __vma_link_list(mm, vma, prev, rb_parent); __vma_link_rb(mm, vma, rb_link, rb_parent); __vma_link_file(vma); + __anon_vma_link(vma); } static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, @@ -267,7 +282,9 @@ static void vma_link(struct mm_struct *m if (mapping) down(&mapping->i_shared_sem); + anon_vma_lock(vma); __vma_link(mm, vma, prev, rb_link, rb_parent); + anon_vma_unlock(vma); if (mapping) up(&mapping->i_shared_sem); @@ -316,20 +333,15 @@ static inline int is_mergeable_vma(struc return 1; } -/* requires that the relevant i_shared_sem be held by the caller */ +/* + * Requires that the relevant i_shared_sem and anon_vma_lock + * be held by the caller. + */ static void move_vma_start(struct vm_area_struct *vma, unsigned long addr) { - struct inode *inode = NULL; - - if (vma->vm_file) - inode = vma->vm_file->f_dentry->d_inode; - if (inode) - __remove_shared_vm_struct(vma, inode); - /* If no vm_file, perhaps we should always keep vm_pgoff at 0?? */ - vma->vm_pgoff += (long)(addr - vma->vm_start) >> PAGE_SHIFT; + /* we must update pgoff even if no vm_file for the anon_vma */ + vma->vm_pgoff += (addr - vma->vm_start) >> PAGE_SHIFT; vma->vm_start = addr; - if (inode) - __vma_link_file(vma); } /* @@ -341,15 +353,28 @@ static void move_vma_start(struct vm_are * wrap, nor mmaps which cover the final page at index -1UL. */ static int -can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, - struct file *file, unsigned long vm_pgoff, unsigned long size) -{ - if (is_mergeable_vma(vma, file, vm_flags)) { - if (!file) - return 1; /* anon mapping */ - if (vma->vm_pgoff == vm_pgoff + size) +can_vma_merge_before(struct vm_area_struct *prev, + struct vm_area_struct *vma, unsigned long vm_flags, + struct file *file, unsigned long vm_pgoff, unsigned long size) +{ + if (is_mergeable_vma(vma, file, vm_flags)) + if (vma->vm_pgoff == vm_pgoff + size) { + if (prev) { + /* + * We can fill an hole only if the two + * anonymous mappings are queued in the same + * anon_vma, or if one of them is "direct" + * and it can be queued in the existing + * anon_vma. + * + * Must check this even if file != NULL + * for MAP_PRIVATE mappings. + */ + return ((!vma->anon_vma || !prev->anon_vma) || + (vma->anon_vma == prev->anon_vma)); + } return 1; - } + } return 0; } @@ -364,9 +389,6 @@ can_vma_merge_after(struct vm_area_struc if (is_mergeable_vma(vma, file, vm_flags)) { unsigned long vma_size; - if (!file) - return 1; /* anon mapping */ - vma_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; if (vma->vm_pgoff + vma_size == vm_pgoff) return 1; @@ -405,30 +427,43 @@ static int vma_merge(struct mm_struct *m * Can it merge with the predecessor? */ if (prev->vm_end == addr && - is_mergeable_vma(prev, file, vm_flags) && can_vma_merge_after(prev, vm_flags, file, pgoff)) { struct vm_area_struct *next; - int need_up = 0; - if (unlikely(file && prev->vm_next && - prev->vm_next->vm_file == file)) { - down(i_shared_sem); - need_up = 1; - } + /* + * this can happen outside the i_shared_sem and outside + * the anon_vma_lock since it only enlarge the size of + * the vma, there are no ptes mapped in this new extended + * region anyways. + */ prev->vm_end = end; /* * OK, it did. Can we now merge in the successor as well? */ next = prev->vm_next; + /* next cannot change under us, it's serialized by the mmap_sem */ if (next && prev->vm_end == next->vm_start && - can_vma_merge_before(next, vm_flags, file, + can_vma_merge_before(prev, next, vm_flags, file, pgoff, (end - addr) >> PAGE_SHIFT)) { + /* + * the vm_end extension on the right can happen as usual + * outside the i_shared_sem/anon_vma_lock. + */ prev->vm_end = next->vm_end; + + /* serialized by the mmap_sem */ __vma_unlink(mm, next, prev); + + if (file) + down(i_shared_sem); __remove_shared_vm_struct(next, inode); - if (need_up) + if (file) up(i_shared_sem); + + /* the anon_vma_lock is taken inside */ + anon_vma_merge(prev, next); + if (file) fput(file); @@ -436,8 +471,6 @@ static int vma_merge(struct mm_struct *m kmem_cache_free(vm_area_cachep, next); return 1; } - if (need_up) - up(i_shared_sem); return 1; } @@ -447,13 +480,15 @@ static int vma_merge(struct mm_struct *m prev = prev->vm_next; if (prev) { merge_next: - if (!can_vma_merge_before(prev, vm_flags, file, + if (!can_vma_merge_before(NULL, prev, vm_flags, file, pgoff, (end - addr) >> PAGE_SHIFT)) return 0; if (end == prev->vm_start) { if (file) down(i_shared_sem); + anon_vma_lock(prev); move_vma_start(prev, addr); + anon_vma_unlock(prev); if (file) up(i_shared_sem); return 1; @@ -576,6 +611,7 @@ unsigned long __do_mmap_pgoff(struct mm_ case MAP_SHARED: break; } + pgoff = addr >> PAGE_SHIFT; } error = security_file_mmap(file, prot, flags); @@ -615,7 +651,7 @@ munmap_back: /* Can we just expand an old anonymous mapping? */ if (!file && !(vm_flags & VM_SHARED) && rb_parent) if (vma_merge(mm, prev, rb_parent, addr, addr + len, - vm_flags, NULL, 0)) + vm_flags, NULL, pgoff)) goto out; /* @@ -639,6 +675,7 @@ munmap_back: vma->vm_private_data = NULL; vma->vm_next = NULL; INIT_LIST_HEAD(&vma->shared); + vma->anon_vma = NULL; if (file) { error = -EINVAL; @@ -1097,6 +1134,9 @@ static void unmap_vma(struct mm_struct * area->vm_ops->close(area); if (area->vm_file) fput(area->vm_file); + + anon_vma_unlink(area); + kmem_cache_free(vm_area_cachep, area); } @@ -1207,6 +1247,7 @@ int split_vma(struct mm_struct * mm, str if (mapping) down(&mapping->i_shared_sem); spin_lock(&mm->page_table_lock); + anon_vma_lock(vma); if (new_below) move_vma_start(vma, addr); @@ -1215,6 +1256,7 @@ int split_vma(struct mm_struct * mm, str __insert_vm_struct(mm, new); + anon_vma_unlock(vma); spin_unlock(&mm->page_table_lock); if (mapping) up(&mapping->i_shared_sem); @@ -1319,6 +1361,7 @@ unsigned long do_brk(unsigned long addr, struct vm_area_struct * vma, * prev; unsigned long flags; struct rb_node ** rb_link, * rb_parent; + unsigned long pgoff; len = PAGE_ALIGN(len); if (!len) @@ -1361,9 +1404,11 @@ unsigned long do_brk(unsigned long addr, flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; + pgoff = addr >> PAGE_SHIFT; + /* Can we just expand an old anonymous mapping? */ if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, - flags, NULL, 0)) + flags, NULL, pgoff)) goto out; /* @@ -1381,10 +1426,11 @@ unsigned long do_brk(unsigned long addr, vma->vm_flags = flags; vma->vm_page_prot = protection_map[flags & 0x0f]; vma->vm_ops = NULL; - vma->vm_pgoff = 0; + vma->vm_pgoff = pgoff; vma->vm_file = NULL; vma->vm_private_data = NULL; INIT_LIST_HEAD(&vma->shared); + vma->anon_vma = NULL; vma_link(mm, vma, prev, rb_link, rb_parent); @@ -1460,6 +1506,7 @@ void exit_mmap(struct mm_struct *mm) } if (vma->vm_file) fput(vma->vm_file); + anon_vma_unlink(vma); kmem_cache_free(vm_area_cachep, vma); vma = next; } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/mprotect.c sles-anobjrmap-2-works/mm/mprotect.c --- sles-anobjrmap-1/mm/mprotect.c 2004-02-29 17:47:30.000000000 +0100 +++ sles-anobjrmap-2-works/mm/mprotect.c 2004-03-11 16:47:19.308140848 +0100 @@ -106,6 +106,8 @@ change_protection(struct vm_area_struct spin_unlock(¤t->mm->page_table_lock); return; } + +#if VMA_MERGING_FIXUP /* * Try to merge a vma with the previous flag, return 1 if successful or 0 if it * was impossible. @@ -149,6 +151,7 @@ mprotect_attempt_merge(struct vm_area_st spin_unlock(&mm->page_table_lock); return 1; } +#endif static int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, @@ -184,6 +187,7 @@ mprotect_fixup(struct vm_area_struct *vm newprot = protection_map[newflags & 0xf]; if (start == vma->vm_start) { +#if VMA_MERGING_FIXUP /* * Try to merge with the previous vma. */ @@ -191,6 +195,7 @@ mprotect_fixup(struct vm_area_struct *vm vma = *pprev; goto success; } +#endif } else { error = split_vma(mm, vma, start, 1); if (error) @@ -212,7 +217,9 @@ mprotect_fixup(struct vm_area_struct *vm vma->vm_flags = newflags; vma->vm_page_prot = newprot; spin_unlock(&mm->page_table_lock); +#if VMA_MERGING_FIXUP success: +#endif change_protection(vma, start, end, newprot); return 0; @@ -315,6 +322,7 @@ do_mprotect(struct mm_struct *mm, unsign } } +#if VMA_MERGING_FIXUP if (next && prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags) && !prev->vm_file && !(prev->vm_flags & VM_SHARED)) { @@ -326,6 +334,7 @@ do_mprotect(struct mm_struct *mm, unsign kmem_cache_free(vm_area_cachep, next); prev->vm_mm->map_count--; } +#endif out: up_write(&mm->mmap_sem); return error; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/mremap.c sles-anobjrmap-2-works/mm/mremap.c --- sles-anobjrmap-1/mm/mremap.c 2004-03-05 05:24:38.000000000 +0100 +++ sles-anobjrmap-2-works/mm/mremap.c 2004-03-11 16:51:15.500234192 +0100 @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -81,7 +80,7 @@ static inline pte_t *alloc_one_pte_map(s static int copy_one_pte(struct vm_area_struct *vma, unsigned long old_addr, - pte_t *src, pte_t *dst, struct pte_chain **pte_chainp) + pte_t *src, pte_t *dst) { int error = 0; pte_t pte; @@ -91,8 +90,6 @@ copy_one_pte(struct vm_area_struct *vma, page = pte_page(*src); if (!pte_none(*src)) { - if (page) - page_remove_rmap(page, src); pte = ptep_clear_flush(vma, old_addr, src); if (!dst) { /* No dest? We must put it back. */ @@ -100,8 +97,6 @@ copy_one_pte(struct vm_area_struct *vma, error++; } set_pte(dst, pte); - if (page) - *pte_chainp = page_add_rmap(page, dst, *pte_chainp); } return error; } @@ -113,13 +108,7 @@ move_one_page(struct vm_area_struct *vma struct mm_struct *mm = vma->vm_mm; int error = 0; pte_t *src, *dst; - struct pte_chain *pte_chain; - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) { - error = -ENOMEM; - goto out; - } spin_lock(&mm->page_table_lock); src = get_one_pte_map_nested(mm, old_addr); if (src) { @@ -140,15 +129,12 @@ move_one_page(struct vm_area_struct *vma * page_table_lock, we should re-check the src entry... */ if (src) { - error = copy_one_pte(vma, old_addr, src, - dst, &pte_chain); + error = copy_one_pte(vma, old_addr, src, dst); pte_unmap_nested(src); } pte_unmap(dst); } spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); -out: return error; } @@ -190,12 +176,17 @@ static unsigned long move_vma(struct vm_ unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long new_addr) { +#if VMA_MERGING_FIXUP struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma, *next, *prev; +#else + struct vm_area_struct *new_vma; +#endif int allocated_vma; int split = 0; new_vma = NULL; +#if VMA_MERGING_FIXUP next = find_vma_prev(mm, new_addr, &prev); if (next) { if (prev && prev->vm_end == new_addr && @@ -237,6 +228,7 @@ static unsigned long move_vma(struct vm_ new_vma = prev; } } +#endif allocated_vma = 0; if (!new_vma) { diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/nommu.c sles-anobjrmap-2-works/mm/nommu.c --- sles-anobjrmap-1/mm/nommu.c 2004-02-04 16:07:06.000000000 +0100 +++ sles-anobjrmap-2-works/mm/nommu.c 2004-03-09 05:32:41.000000000 +0100 @@ -568,6 +568,6 @@ unsigned long get_unmapped_area(struct f return -ENOMEM; } -void pte_chain_init(void) +void anon_vma_init(void) { } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/objrmap.c sles-anobjrmap-2-works/mm/objrmap.c --- sles-anobjrmap-1/mm/objrmap.c 1970-01-01 01:00:00.000000000 +0100 +++ sles-anobjrmap-2-works/mm/objrmap.c 2004-03-12 02:57:41.422733184 +0100 @@ -0,0 +1,695 @@ +/* + * mm/objrmap.c + * + * Provides methods for unmapping all sort of mapped pages + * using the vma objects, the brainer part of objrmap is the + * tracking of the vma to analyze for every given mapped page. + * The anon_vma methods are tracking anonymous pages, + * and the inode methods are tracking pages belonging + * to an inode. + * + * anonymous methods by Andrea Arcangeli 2004 + * inode methods by Dave McCracken 2003, 2004 + */ + +/* + * try_to_unmap/page_referenced/page_add_rmap/page_remove_rmap + * inherit from the rmap design mm/rmap.c under + * Copyright 2001, Rik van Riel + * Released under the General Public License (GPL). + */ + +#include +#include +#include +#include + +kmem_cache_t * anon_vma_cachep; + +/* can be enabled only for debugging */ +#define OBJRMAP_DEBUG + +static inline void validate_anon_vma_find_vma(anon_vma_t * anon_vma, + struct vm_area_struct * find_vma) +{ +#ifdef OBJRMAP_DEBUG + struct vm_area_struct * vma; + unsigned long mapcount = 0; + int found = 0; + + list_for_each_entry(vma, &anon_vma->anon_vma_head, anon_vma_node) { + mapcount += 1; + BUG_ON(mapcount > 1000); + if (vma == find_vma) + found = 1; + } + BUG_ON(!found); +#endif +} + +/** + * find_pte - Find a pte pointer given a vma and a struct page. + * @vma: the vma to search + * @page: the page to find + * + * Determine if this page is mapped in this vma. If it is, map and rethrn + * the pte pointer associated with it. Return null if the page is not + * mapped in this vma for any reason. + * + * This is strictly an internal helper function for the object-based rmap + * functions. + * + * It is the caller's responsibility to unmap the pte if it is returned. + */ +static inline pte_t * +find_pte(struct vm_area_struct *vma, struct page *page, unsigned long *addr) +{ + struct mm_struct *mm = vma->vm_mm; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long loffset; + unsigned long address; + + loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)); + address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT); + if (address < vma->vm_start || address >= vma->vm_end) + goto out; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out; + + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) + goto out; + + pte = pte_offset_map(pmd, address); + if (!pte_present(*pte)) + goto out_unmap; + + if (page_to_pfn(page) != pte_pfn(*pte)) + goto out_unmap; + + if (addr) + *addr = address; + + return pte; + +out_unmap: + pte_unmap(pte); +out: + return NULL; +} + +/** + * page_referenced_one - referenced check for object-based rmap + * @vma: the vma to look in. + * @page: the page we're working on. + * + * Find a pte entry for a page/vma pair, then check and clear the referenced + * bit. + * + * This is strictly a helper function for page_referenced_inode. + */ +static int +page_referenced_one(struct vm_area_struct *vma, struct page *page) +{ + struct mm_struct *mm = vma->vm_mm; + pte_t *pte; + int referenced = 0; + + if (!spin_trylock(&mm->page_table_lock)) + return 1; + + pte = find_pte(vma, page, NULL); + if (pte) { + if (ptep_test_and_clear_young(pte)) + referenced++; + pte_unmap(pte); + } + + spin_unlock(&mm->page_table_lock); + return referenced; +} + +/** + * page_referenced_inode - referenced check for object-based rmap + * @page: the page we're checking references on. + * + * For an object-based mapped page, find all the places it is mapped and + * check/clear the referenced flag. This is done by following the page->as.mapping + * pointer, then walking the chain of vmas it holds. It returns the number + * of references it found. + * + * This function is only called from page_referenced for object-based pages. + * + * The semaphore address_space->i_shared_sem is tried. If it can't be gotten, + * assume a reference count of 1. + */ +static int +page_referenced_inode(struct page *page) +{ + struct address_space *mapping = page->as.mapping; + struct vm_area_struct *vma; + int referenced; + + BUG_ON(PageSwapCache(page)); + BUG_ON(PageDirect(page)); + + if (down_trylock(&mapping->i_shared_sem)) + return 1; + + referenced = 0; + + list_for_each_entry(vma, &mapping->i_mmap, shared) + referenced += page_referenced_one(vma, page); + + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) + referenced += page_referenced_one(vma, page); + + up(&mapping->i_shared_sem); + + return referenced; +} + +static int page_referenced_anon(struct page *page) +{ + int referenced; + + if (PageDirect(page)) + referenced = page_referenced_one(page->as.vma, page); + else { + struct vm_area_struct * vma; + anon_vma_t * anon_vma = page->as.anon_vma; + + referenced = 0; + spin_lock(&anon_vma->anon_vma_lock); + BUG_ON(list_empty(&anon_vma->anon_vma_head)); + list_for_each_entry(vma, &anon_vma->anon_vma_head, anon_vma_node) + referenced += page_referenced_one(vma, page); + spin_unlock(&anon_vma->anon_vma_lock); + } + + return referenced; +} + +/** + * page_referenced - test if the page was referenced + * @page: the page to test + * + * Quick test_and_clear_referenced for all mappings to a page, + * returns the number of processes which referenced the page. + * + * Caller needs to hold the page_map_lock. + */ +int fastcall page_referenced(struct page * page) +{ + int referenced = 0; + + if (!page_mapped(page)) + goto out; + + /* + * We need an object to reach the ptes, all mapped + * pages must provide some method in their mapping. + * Subtle: this checks for page->as.anon_vma/vma too ;). + */ + BUG_ON(!page->as.mapping); + + if (page_test_and_clear_young(page)) + mark_page_accessed(page); + + if (TestClearPageReferenced(page)) + referenced++; + + if (!PageAnon(page)) + referenced += page_referenced_inode(page); + else + referenced += page_referenced_anon(page); + + out: + return referenced; +} + +/* this needs the page->flags PG_map_lock held */ +static void inline anon_vma_page_link(struct page * page, struct vm_area_struct * vma, + unsigned long address) +{ + unsigned long index = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + + if (page->mapcount == 1) { + BUG_ON(PageDirect(page)); + BUG_ON(page->as.vma); + + SetPageDirect(page); + page->as.vma = vma; + page->index = index; + } else { + anon_vma_t * anon_vma = page->as.anon_vma; + + BUG_ON(!anon_vma); + validate_anon_vma_find_vma(anon_vma, vma); + BUG_ON(page->index != index); + + ClearPageDirect(page); + page->as.anon_vma = anon_vma; + } +} + +/** + * page_add_rmap - add reverse mapping entry to a page + * @page: the page to add the mapping to + * @vma: the vma that is covering the page + * + * Add a new pte reverse mapping to a page. + */ +void fastcall page_add_rmap(struct page *page, struct vm_area_struct * vma, + unsigned long address) +{ + if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) + return; + + page_map_lock(page); + + if (!page->mapcount++) + inc_page_state(nr_mapped); + + if (PageAnon(page)) + anon_vma_page_link(page, vma, address); + else { + /* + * If this is an object-based page, just count it. + * We can find the mappings by walking the object + * vma chain for that object. + */ + BUG_ON(!page->as.mapping); + BUG_ON(PageSwapCache(page)); + } + + page_map_unlock(page); +} + +/* this needs the page->flags PG_map_lock held */ +static void inline anon_vma_page_link_fork(struct page * page, struct vm_area_struct * vma, + struct vm_area_struct * orig_vma, + anon_vma_t ** anon_vma_p, int * anon_vma_created) +{ + anon_vma_t * anon_vma = orig_vma->anon_vma; + + if (!*anon_vma_created) { + *anon_vma_created = 1; + if (!anon_vma) { + BUG_ON(!PageDirect(page)); + BUG_ON(page->mapcount != 2); + + anon_vma = *anon_vma_p; + *anon_vma_p = NULL; + + /* it's single threaded here, avoid the anon_vma->anon_vma_lock */ + list_add(&vma->anon_vma_node, &anon_vma->anon_vma_head); + list_add(&orig_vma->anon_vma_node, &anon_vma->anon_vma_head); + + validate_anon_vma_find_vma(anon_vma, orig_vma); + validate_anon_vma_find_vma(anon_vma, vma); + + orig_vma->anon_vma = vma->anon_vma = anon_vma; + } else { + BUG_ON(page->mapcount <= 1); + + validate_anon_vma_find_vma(anon_vma, orig_vma); + + /* multithreaded here, anon_vma existed already */ + spin_lock(&anon_vma->anon_vma_lock); + list_add(&vma->anon_vma_node, &orig_vma->anon_vma_node); + spin_unlock(&anon_vma->anon_vma_lock); + + validate_anon_vma_find_vma(anon_vma, orig_vma); + validate_anon_vma_find_vma(anon_vma, vma); + } + } + + ClearPageDirect(page); + page->as.anon_vma = anon_vma; +} + +/* called from fork() */ +void fastcall page_add_rmap_fork(struct page *page, struct vm_area_struct * vma, + struct vm_area_struct * orig_vma, + anon_vma_t ** anon_vma, int * anon_vma_created) +{ + if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) + return; + + page_map_lock(page); + + if (!page->mapcount++) + inc_page_state(nr_mapped); + + if (PageAnon(page)) + anon_vma_page_link_fork(page, vma, orig_vma, + anon_vma, anon_vma_created); + else { + /* + * If this is an object-based page, just count it. + * We can find the mappings by walking the object + * vma chain for that object. + */ + BUG_ON(!page->as.mapping); + BUG_ON(PageSwapCache(page)); + } + + page_map_unlock(page); +} + +/* this needs the page->flags PG_map_lock held */ +static void inline anon_vma_page_unlink(struct page * page) +{ + /* + * Cleanup if this anon page is gone + * as far as the vm is concerned. + */ + if (!page->mapcount) { + page->as.vma = 0; +#if 0 + /* + * The above clears page->as.anon_vma too + * if the page wasn't direct. + */ + page->as.anon_vma = 0; +#endif + ClearPageDirect(page); + ClearPageAnon(page); + } +} + +/** + * page_remove_rmap - take down reverse mapping to a page + * @page: page to remove mapping from + * + * Removes the reverse mapping from the pte_chain of the page, + * after that the caller can clear the page table entry and free + * the page. + */ +void fastcall page_remove_rmap(struct page *page) +{ + if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) + return; + + page_map_lock(page); + + if (!page_mapped(page)) + goto out_unlock; + + if (!--page->mapcount) + dec_page_state(nr_mapped); + + if (PageAnon(page)) + anon_vma_page_unlink(page); + else { + /* + * If this is an object-based page, just uncount it. + * We can find the mappings by walking the object vma + * chain for that object. + */ + BUG_ON(!page->as.mapping); + BUG_ON(PageSwapCache(page)); + } + + out_unlock: + page_map_unlock(page); + return; +} + +/** + * try_to_unmap_one - unmap a page using the object-based rmap method + * @page: the page to unmap + * + * Determine whether a page is mapped in a given vma and unmap it if it's found. + * + * This function is strictly a helper function for try_to_unmap_inode. + */ +static int +try_to_unmap_one(struct vm_area_struct *vma, struct page *page) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte; + pte_t pteval; + int ret = SWAP_AGAIN; + + if (!spin_trylock(&mm->page_table_lock)) + return ret; + + pte = find_pte(vma, page, &address); + if (!pte) + goto out; + + BUG_ON(vma->vm_flags & VM_RESERVED); + if (vma->vm_flags & VM_LOCKED) { + ret = SWAP_FAIL; + goto out_unmap; + } + + flush_cache_page(vma, address); + pteval = ptep_clear_flush(vma, address, pte); + + if (PageSwapCache(page)) { + /* + * Store the swap location in the pte. + * See handle_pte_fault() ... + */ + swp_entry_t entry = { .val = page->private }; + swap_duplicate(entry); + set_pte(pte, swp_entry_to_pte(entry)); + BUG_ON(pte_file(*pte)); + } else { + unsigned long pgidx; + /* + * If a nonlinear mapping then store the file page offset + * in the pte. + */ + pgidx = (address - vma->vm_start) >> PAGE_SHIFT; + pgidx += vma->vm_pgoff; + pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; + if (page->index != pgidx) { + set_pte(pte, pgoff_to_pte(page->index)); + BUG_ON(!pte_file(*pte)); + } + } + + if (pte_dirty(pteval)) + set_page_dirty(page); + + BUG_ON(!page->mapcount); + + mm->rss--; + page->mapcount--; + if (PageAnon(page)) + anon_vma_page_unlink(page); + page_cache_release(page); + +out_unmap: + pte_unmap(pte); + +out: + spin_unlock(&mm->page_table_lock); + return ret; +} + +/** + * try_to_unmap_inode - unmap a page using the object-based rmap method + * @page: the page to unmap + * + * Find all the mappings of a page using the mapping pointer and the vma chains + * contained in the address_space struct it points to. + * + * This function is only called from try_to_unmap for object-based pages. + * + * The semaphore address_space->i_shared_sem is tried. If it can't be gotten, + * return a temporary error. + */ +static int +try_to_unmap_inode(struct page *page) +{ + struct address_space *mapping = page->as.mapping; + struct vm_area_struct *vma; + int ret = SWAP_AGAIN; + + BUG_ON(PageSwapCache(page)); + BUG_ON(PageDirect(page)); + + if (down_trylock(&mapping->i_shared_sem)) + return ret; + + list_for_each_entry(vma, &mapping->i_mmap, shared) { + ret = try_to_unmap_one(vma, page); + if (ret == SWAP_FAIL || !page->mapcount) + goto out; + } + + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { + ret = try_to_unmap_one(vma, page); + if (ret == SWAP_FAIL || !page->mapcount) + goto out; + } + +out: + up(&mapping->i_shared_sem); + return ret; +} + +static int +try_to_unmap_anon(struct page * page) +{ + int ret = SWAP_AGAIN; + + if (!PageSwapCache(page)) + return SWAP_AGAIN; + + if (PageDirect(page)) + ret = try_to_unmap_one(page->as.vma, page); + else { + struct vm_area_struct * vma; + anon_vma_t * anon_vma = page->as.anon_vma; + + spin_lock(&anon_vma->anon_vma_lock); + BUG_ON(list_empty(&anon_vma->anon_vma_head)); + list_for_each_entry(vma, &anon_vma->anon_vma_head, anon_vma_node) { + ret = try_to_unmap_one(vma, page); + if (ret == SWAP_FAIL || !page->mapcount) { + spin_unlock(&anon_vma->anon_vma_lock); + goto out; + } + } + spin_unlock(&anon_vma->anon_vma_lock); + } + +out: + return ret; +} + +/** + * try_to_unmap - try to remove all page table mappings to a page + * @page: the page to get unmapped + * + * Tries to remove all the page table entries which are mapping this + * page, used in the pageout path. + * + * Caller must hold the page_map_lock. + * + * Return values are: + * + * SWAP_SUCCESS - we succeeded in removing all mappings + * SWAP_AGAIN - we missed a trylock, try again later + * SWAP_FAIL - the page is unswappable + */ +int fastcall try_to_unmap(struct page * page) +{ + int ret = SWAP_SUCCESS; + + /* This page should not be on the pageout lists. */ + BUG_ON(PageReserved(page)); + BUG_ON(!PageLocked(page)); + + /* + * We need an object to reach the ptes. + * Subtle: this checks for page->as.anon_vma too ;). + */ + BUG_ON(!page->as.mapping); + + if (!PageAnon(page)) + ret = try_to_unmap_inode(page); + else + ret = try_to_unmap_anon(page); + + if (!page_mapped(page)) { + dec_page_state(nr_mapped); + ret = SWAP_SUCCESS; + } + return ret; +} + +/* + * No more VM stuff below this comment, only anon_vma helper + * functions. + */ + +void fastcall anon_vma_merge(struct vm_area_struct * vma, + struct vm_area_struct * vma_dying) +{ + anon_vma_t * anon_vma; + + anon_vma = vma_dying->anon_vma; + if (!anon_vma) + return; + + if (!vma->anon_vma) { + /* this is serialized by the mmap_sem */ + vma->anon_vma = anon_vma; + + spin_lock(&anon_vma->anon_vma_lock); + list_add(&vma->anon_vma_node, &vma_dying->anon_vma_node); + list_del(&vma_dying->anon_vma_node); + spin_unlock(&anon_vma->anon_vma_lock); + } else { + /* if they're both non-null they must be the same */ + BUG_ON(vma->anon_vma != anon_vma); + + spin_lock(&anon_vma->anon_vma_lock); + list_del(&vma_dying->anon_vma_node); + spin_unlock(&anon_vma->anon_vma_lock); + } +} + +void fastcall __anon_vma_link(struct vm_area_struct * vma) +{ + anon_vma_t * anon_vma; + + anon_vma = vma->anon_vma; + if (anon_vma) + list_add(&vma->anon_vma_node, &anon_vma->anon_vma_head); +} + +void fastcall anon_vma_unlink(struct vm_area_struct * vma) +{ + anon_vma_t * anon_vma; + int empty = 0; + + anon_vma = vma->anon_vma; + if (!anon_vma) + return; + + spin_lock(&anon_vma->anon_vma_lock); + list_del(&vma->anon_vma_node); + /* We must garbage collect the anon_vma if it's empty */ + if (list_empty(&anon_vma->anon_vma_head)) + empty = 1; + spin_unlock(&anon_vma->anon_vma_lock); + + if (empty) + anon_vma_free(anon_vma); +} + +static void +anon_vma_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) +{ + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + anon_vma_t * anon_vma = (anon_vma_t *) data; + + spin_lock_init(&anon_vma->anon_vma_lock); + INIT_LIST_HEAD(&anon_vma->anon_vma_head); + } +} + +void __init anon_vma_init(void) +{ + /* this is intentonally not hw aligned to avoid wasting ram */ + anon_vma_cachep = kmem_cache_create("anon_vma", + sizeof(anon_vma_t), 0, 0, + anon_vma_ctor, NULL); + + if(!anon_vma_cachep) + panic("Cannot create anon_vma SLAB cache"); +} diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/page_alloc.c sles-anobjrmap-2-works/mm/page_alloc.c --- sles-anobjrmap-1/mm/page_alloc.c 2004-03-03 06:45:38.000000000 +0100 +++ sles-anobjrmap-2-works/mm/page_alloc.c 2004-03-11 15:33:19.671068272 +0100 @@ -78,7 +78,7 @@ static void bad_page(const char *functio { printk("Bad page state at %s\n", function); printk("flags:0x%08lx mapping:%p mapped:%d count:%d\n", - page->flags, page->mapping, + page->flags, page->as.mapping, page_mapped(page), page_count(page)); printk("Backtrace:\n"); dump_stack(); @@ -88,9 +88,14 @@ static void bad_page(const char *functio 1 << PG_lru | 1 << PG_active | 1 << PG_dirty | + 1 << PG_swapcache | + 1 << PG_anon | + 1 << PG_direct | + 1 << PG_maplock | 1 << PG_writeback); set_page_count(page, 0); - page->mapping = NULL; + page->as.mapping = NULL; + page->mapcount = 0; } #if !defined(CONFIG_HUGETLB_PAGE) && !defined(CONFIG_CRASH_DUMP) \ @@ -216,8 +221,7 @@ static inline void __free_pages_bulk (st static inline void free_pages_check(const char *function, struct page *page) { - if ( page_mapped(page) || - page->mapping != NULL || + if ( page->as.mapping != NULL || page_count(page) != 0 || (page->flags & ( 1 << PG_lru | @@ -226,12 +230,14 @@ static inline void free_pages_check(cons 1 << PG_active | 1 << PG_reclaim | 1 << PG_slab | + 1 << PG_swapcache | + 1 << PG_anon | + 1 << PG_direct | + 1 << PG_maplock | 1 << PG_writeback ))) bad_page(function, page); if (PageDirty(page)) ClearPageDirty(page); - if (PageAnon(page)) - ClearPageAnon(page); } /* @@ -329,7 +335,7 @@ static inline void set_page_refs(struct */ static void prep_new_page(struct page *page, int order) { - if (page->mapping || page_mapped(page) || + if (page->as.mapping || (page->flags & ( 1 << PG_private | 1 << PG_locked | @@ -337,6 +343,10 @@ static void prep_new_page(struct page *p 1 << PG_active | 1 << PG_dirty | 1 << PG_reclaim | + 1 << PG_anon | + 1 << PG_direct | + 1 << PG_maplock | + 1 << PG_swapcache | 1 << PG_writeback ))) bad_page(__FUNCTION__, page); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/page_io.c sles-anobjrmap-2-works/mm/page_io.c --- sles-anobjrmap-1/mm/page_io.c 2002-12-15 04:18:17.000000000 +0100 +++ sles-anobjrmap-2-works/mm/page_io.c 2004-03-12 02:09:38.239043728 +0100 @@ -32,7 +32,7 @@ get_swap_bio(int gfp_flags, struct page swp_entry_t entry; BUG_ON(!PageSwapCache(page)); - entry.val = page->index; + entry.val = page->private; sis = get_swap_info_struct(swp_type(entry)); bio->bi_sector = map_swap_page(sis, swp_offset(entry)) * @@ -150,9 +150,9 @@ int rw_swap_page_sync(int rw, swp_entry_ lock_page(page); - BUG_ON(page->mapping); - page->mapping = &swapper_space; - page->index = entry.val; + BUG_ON(page_mapping(page)); + SetPageSwapCache(page); + page->private = entry.val; if (rw == READ) { ret = swap_readpage(NULL, page); @@ -161,7 +161,7 @@ int rw_swap_page_sync(int rw, swp_entry_ ret = swap_writepage(page, &swap_wbc); wait_on_page_writeback(page); } - page->mapping = NULL; + ClearPageSwapCache(page); if (ret == 0 && (!PageUptodate(page) || PageError(page))) ret = -EIO; return ret; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/page-writeback.c sles-anobjrmap-2-works/mm/page-writeback.c --- sles-anobjrmap-1/mm/page-writeback.c 2004-02-04 16:07:06.000000000 +0100 +++ sles-anobjrmap-2-works/mm/page-writeback.c 2004-03-11 16:23:19.323051856 +0100 @@ -458,7 +458,7 @@ int do_writepages(struct address_space * */ int write_one_page(struct page *page, int wait) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -510,12 +510,12 @@ int __set_page_dirty_nobuffers(struct pa int ret = 0; if (!TestSetPageDirty(page)) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); if (mapping) { spin_lock(&mapping->page_lock); - if (page->mapping) { /* Race with truncate? */ - BUG_ON(page->mapping != mapping); + if (page_mapping(page)) { /* Race with truncate? */ + BUG_ON(page_mapping(page) != mapping); if (!mapping->backing_dev_info->memory_backed) inc_page_state(nr_dirty); list_del(&page->list); @@ -559,7 +559,7 @@ EXPORT_SYMBOL(set_page_dirty_lock); int test_clear_page_dirty(struct page *page) { if (TestClearPageDirty(page)) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); if (mapping && !mapping->backing_dev_info->memory_backed) dec_page_state(nr_dirty); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/rmap.c sles-anobjrmap-2-works/mm/rmap.c --- sles-anobjrmap-1/mm/rmap.c 2004-03-05 05:40:21.000000000 +0100 +++ sles-anobjrmap-2-works/mm/rmap.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,908 +0,0 @@ -/* - * mm/rmap.c - physical to virtual reverse mappings - * - * Copyright 2001, Rik van Riel - * Released under the General Public License (GPL). - * - * - * Simple, low overhead pte-based reverse mapping scheme. - * This is kept modular because we may want to experiment - * with object-based reverse mapping schemes. Please try - * to keep this thing as modular as possible. - */ - -/* - * Locking: - * - the page->pte.chain is protected by the PG_chainlock bit, - * which nests within the the mm->page_table_lock, - * which nests within the page lock. - * - because swapout locking is opposite to the locking order - * in the page fault path, the swapout path uses trylocks - * on the mm->page_table_lock - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* #define DEBUG_RMAP */ - -/* - * Shared pages have a chain of pte_chain structures, used to locate - * all the mappings to this page. We only need a pointer to the pte - * here, the page struct for the page table page contains the process - * it belongs to and the offset within that process. - * - * We use an array of pte pointers in this structure to minimise cache misses - * while traversing reverse maps. - */ -#define NRPTE ((L1_CACHE_BYTES - sizeof(unsigned long))/sizeof(pte_addr_t)) - -/* - * next_and_idx encodes both the address of the next pte_chain and the - * offset of the highest-index used pte in ptes[]. - */ -struct pte_chain { - unsigned long next_and_idx; - pte_addr_t ptes[NRPTE]; -} ____cacheline_aligned; - -kmem_cache_t *pte_chain_cache; - -static inline struct pte_chain *pte_chain_next(struct pte_chain *pte_chain) -{ - return (struct pte_chain *)(pte_chain->next_and_idx & ~NRPTE); -} - -static inline struct pte_chain *pte_chain_ptr(unsigned long pte_chain_addr) -{ - return (struct pte_chain *)(pte_chain_addr & ~NRPTE); -} - -static inline int pte_chain_idx(struct pte_chain *pte_chain) -{ - return pte_chain->next_and_idx & NRPTE; -} - -static inline unsigned long -pte_chain_encode(struct pte_chain *pte_chain, int idx) -{ - return (unsigned long)pte_chain | idx; -} - -/* - * pte_chain list management policy: - * - * - If a page has a pte_chain list then it is shared by at least two processes, - * because a single sharing uses PageDirect. (Well, this isn't true yet, - * coz this code doesn't collapse singletons back to PageDirect on the remove - * path). - * - A pte_chain list has free space only in the head member - all succeeding - * members are 100% full. - * - If the head element has free space, it occurs in its leading slots. - * - All free space in the pte_chain is at the start of the head member. - * - Insertion into the pte_chain puts a pte pointer in the last free slot of - * the head member. - * - Removal from a pte chain moves the head pte of the head member onto the - * victim pte and frees the head member if it became empty. - */ - -/** - ** VM stuff below this comment - **/ - -/** - * find_pte - Find a pte pointer given a vma and a struct page. - * @vma: the vma to search - * @page: the page to find - * - * Determine if this page is mapped in this vma. If it is, map and rethrn - * the pte pointer associated with it. Return null if the page is not - * mapped in this vma for any reason. - * - * This is strictly an internal helper function for the object-based rmap - * functions. - * - * It is the caller's responsibility to unmap the pte if it is returned. - */ -static inline pte_t * -find_pte(struct vm_area_struct *vma, struct page *page, unsigned long *addr) -{ - struct mm_struct *mm = vma->vm_mm; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - unsigned long loffset; - unsigned long address; - - loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)); - address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT); - if (address < vma->vm_start || address >= vma->vm_end) - goto out; - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto out; - - pmd = pmd_offset(pgd, address); - if (!pmd_present(*pmd)) - goto out; - - pte = pte_offset_map(pmd, address); - if (!pte_present(*pte)) - goto out_unmap; - - if (page_to_pfn(page) != pte_pfn(*pte)) - goto out_unmap; - - if (addr) - *addr = address; - - return pte; - -out_unmap: - pte_unmap(pte); -out: - return NULL; -} - -/** - * page_referenced_obj_one - referenced check for object-based rmap - * @vma: the vma to look in. - * @page: the page we're working on. - * - * Find a pte entry for a page/vma pair, then check and clear the referenced - * bit. - * - * This is strictly a helper function for page_referenced_obj. - */ -static int -page_referenced_obj_one(struct vm_area_struct *vma, struct page *page) -{ - struct mm_struct *mm = vma->vm_mm; - pte_t *pte; - int referenced = 0; - - if (!spin_trylock(&mm->page_table_lock)) - return 1; - - pte = find_pte(vma, page, NULL); - if (pte) { - if (ptep_test_and_clear_young(pte)) - referenced++; - pte_unmap(pte); - } - - spin_unlock(&mm->page_table_lock); - return referenced; -} - -/** - * page_referenced_obj_one - referenced check for object-based rmap - * @page: the page we're checking references on. - * - * For an object-based mapped page, find all the places it is mapped and - * check/clear the referenced flag. This is done by following the page->mapping - * pointer, then walking the chain of vmas it holds. It returns the number - * of references it found. - * - * This function is only called from page_referenced for object-based pages. - * - * The semaphore address_space->i_shared_sem is tried. If it can't be gotten, - * assume a reference count of 1. - */ -static int -page_referenced_obj(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct vm_area_struct *vma; - int referenced = 0; - - if (!page->pte.mapcount) - return 0; - - if (!mapping) - BUG(); - - if (PageSwapCache(page)) - BUG(); - - if (down_trylock(&mapping->i_shared_sem)) - return 1; - - list_for_each_entry(vma, &mapping->i_mmap, shared) - referenced += page_referenced_obj_one(vma, page); - - list_for_each_entry(vma, &mapping->i_mmap_shared, shared) - referenced += page_referenced_obj_one(vma, page); - - up(&mapping->i_shared_sem); - - return referenced; -} - -/** - * page_referenced - test if the page was referenced - * @page: the page to test - * - * Quick test_and_clear_referenced for all mappings to a page, - * returns the number of processes which referenced the page. - * Caller needs to hold the pte_chain_lock. - * - * If the page has a single-entry pte_chain, collapse that back to a PageDirect - * representation. This way, it's only done under memory pressure. - */ -int fastcall page_referenced(struct page * page) -{ - struct pte_chain *pc; - int referenced = 0; - - if (page_test_and_clear_young(page)) - mark_page_accessed(page); - - if (TestClearPageReferenced(page)) - referenced++; - - if (!PageAnon(page)) { - referenced += page_referenced_obj(page); - goto out; - } - if (PageDirect(page)) { - pte_t *pte = rmap_ptep_map(page->pte.direct); - if (ptep_test_and_clear_young(pte)) - referenced++; - rmap_ptep_unmap(pte); - } else { - int nr_chains = 0; - - /* Check all the page tables mapping this page. */ - for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) { - int i; - - for (i = pte_chain_idx(pc); i < NRPTE; i++) { - pte_addr_t pte_paddr = pc->ptes[i]; - pte_t *p; - - p = rmap_ptep_map(pte_paddr); - if (ptep_test_and_clear_young(p)) - referenced++; - rmap_ptep_unmap(p); - nr_chains++; - } - } - if (nr_chains == 1) { - pc = page->pte.chain; - page->pte.direct = pc->ptes[NRPTE-1]; - SetPageDirect(page); - pc->ptes[NRPTE-1] = 0; - __pte_chain_free(pc); - } - } -out: - return referenced; -} - -/** - * page_add_rmap - add reverse mapping entry to a page - * @page: the page to add the mapping to - * @ptep: the page table entry mapping this page - * - * Add a new pte reverse mapping to a page. - * The caller needs to hold the mm->page_table_lock. - */ -struct pte_chain * fastcall -page_add_rmap(struct page *page, pte_t *ptep, struct pte_chain *pte_chain) -{ - pte_addr_t pte_paddr = ptep_to_paddr(ptep); - struct pte_chain *cur_pte_chain; - - if (PageReserved(page)) - return pte_chain; - - pte_chain_lock(page); - - /* - * If this is an object-based page, just count it. We can - * find the mappings by walking the object vma chain for that object. - */ - if (!PageAnon(page)) { - if (!page->mapping) - BUG(); - if (PageSwapCache(page)) - BUG(); - if (!page->pte.mapcount) - inc_page_state(nr_mapped); - page->pte.mapcount++; - goto out; - } - - if (page->pte.direct == 0) { - page->pte.direct = pte_paddr; - SetPageDirect(page); - inc_page_state(nr_mapped); - goto out; - } - - if (PageDirect(page)) { - /* Convert a direct pointer into a pte_chain */ - ClearPageDirect(page); - pte_chain->ptes[NRPTE-1] = page->pte.direct; - pte_chain->ptes[NRPTE-2] = pte_paddr; - pte_chain->next_and_idx = pte_chain_encode(NULL, NRPTE-2); - page->pte.direct = 0; - page->pte.chain = pte_chain; - pte_chain = NULL; /* We consumed it */ - goto out; - } - - cur_pte_chain = page->pte.chain; - if (cur_pte_chain->ptes[0]) { /* It's full */ - pte_chain->next_and_idx = pte_chain_encode(cur_pte_chain, - NRPTE - 1); - page->pte.chain = pte_chain; - pte_chain->ptes[NRPTE-1] = pte_paddr; - pte_chain = NULL; /* We consumed it */ - goto out; - } - cur_pte_chain->ptes[pte_chain_idx(cur_pte_chain) - 1] = pte_paddr; - cur_pte_chain->next_and_idx--; -out: - pte_chain_unlock(page); - return pte_chain; -} - -/** - * page_remove_rmap - take down reverse mapping to a page - * @page: page to remove mapping from - * @ptep: page table entry to remove - * - * Removes the reverse mapping from the pte_chain of the page, - * after that the caller can clear the page table entry and free - * the page. - * Caller needs to hold the mm->page_table_lock. - */ -void fastcall page_remove_rmap(struct page *page, pte_t *ptep) -{ - pte_addr_t pte_paddr = ptep_to_paddr(ptep); - struct pte_chain *pc; - - if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) - return; - - pte_chain_lock(page); - - if (!page_mapped(page)) - goto out_unlock; - - /* - * If this is an object-based page, just uncount it. We can - * find the mappings by walking the object vma chain for that object. - */ - if (!PageAnon(page)) { - if (!page->mapping) - BUG(); - if (PageSwapCache(page)) - BUG(); - if (!page->pte.mapcount) - BUG(); - page->pte.mapcount--; - if (!page->pte.mapcount) - dec_page_state(nr_mapped); - goto out_unlock; - } - - if (PageDirect(page)) { - if (page->pte.direct == pte_paddr) { - page->pte.direct = 0; - ClearPageDirect(page); - goto out; - } - } else { - struct pte_chain *start = page->pte.chain; - struct pte_chain *next; - int victim_i = pte_chain_idx(start); - - for (pc = start; pc; pc = next) { - int i; - - next = pte_chain_next(pc); - if (next) - prefetch(next); - for (i = pte_chain_idx(pc); i < NRPTE; i++) { - pte_addr_t pa = pc->ptes[i]; - - if (pa != pte_paddr) - continue; - pc->ptes[i] = start->ptes[victim_i]; - start->ptes[victim_i] = 0; - if (victim_i == NRPTE-1) { - /* Emptied a pte_chain */ - page->pte.chain = pte_chain_next(start); - __pte_chain_free(start); - } else { - start->next_and_idx++; - } - goto out; - } - } - } -out: - if (page->pte.direct == 0 && page_test_and_clear_dirty(page)) - set_page_dirty(page); - if (!page_mapped(page)) - dec_page_state(nr_mapped); -out_unlock: - pte_chain_unlock(page); - return; -} - -/** - * try_to_unmap_obj - unmap a page using the object-based rmap method - * @page: the page to unmap - * - * Determine whether a page is mapped in a given vma and unmap it if it's found. - * - * This function is strictly a helper function for try_to_unmap_obj. - */ -static inline int -try_to_unmap_obj_one(struct vm_area_struct *vma, struct page *page) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long address; - pte_t *pte; - pte_t pteval; - int ret = SWAP_AGAIN; - - if (!spin_trylock(&mm->page_table_lock)) - return ret; - - pte = find_pte(vma, page, &address); - if (!pte) - goto out; - - BUG_ON(vma->vm_flags & VM_RESERVED); - if (vma->vm_flags & VM_LOCKED) { - ret = SWAP_FAIL; - goto out_unmap; - } - - flush_cache_page(vma, address); - pteval = ptep_get_and_clear(pte); - flush_tlb_page(vma, address); - - if (pte_dirty(pteval)) - set_page_dirty(page); - - if (!page->pte.mapcount) - BUG(); - - mm->rss--; - page->pte.mapcount--; - page_cache_release(page); - -out_unmap: - pte_unmap(pte); - -out: - spin_unlock(&mm->page_table_lock); - return ret; -} - -/** - * try_to_unmap_obj - unmap a page using the object-based rmap method - * @page: the page to unmap - * - * Find all the mappings of a page using the mapping pointer and the vma chains - * contained in the address_space struct it points to. - * - * This function is only called from try_to_unmap for object-based pages. - * - * The semaphore address_space->i_shared_sem is tried. If it can't be gotten, - * return a temporary error. - */ -static int -try_to_unmap_obj(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct vm_area_struct *vma; - int ret = SWAP_AGAIN; - - if (!mapping) - BUG(); - - if (PageSwapCache(page)) - BUG(); - - if (down_trylock(&mapping->i_shared_sem)) - return ret; - - list_for_each_entry(vma, &mapping->i_mmap, shared) { - ret = try_to_unmap_obj_one(vma, page); - if (ret == SWAP_FAIL || !page->pte.mapcount) - goto out; - } - - list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { - ret = try_to_unmap_obj_one(vma, page); - if (ret == SWAP_FAIL || !page->pte.mapcount) - goto out; - } - -out: - up(&mapping->i_shared_sem); - return ret; -} - -/** - * try_to_unmap_one - worker function for try_to_unmap - * @page: page to unmap - * @ptep: page table entry to unmap from page - * - * Internal helper function for try_to_unmap, called for each page - * table entry mapping a page. Because locking order here is opposite - * to the locking order used by the page fault path, we use trylocks. - * Locking: - * page lock shrink_list(), trylock - * pte_chain_lock shrink_list() - * mm->page_table_lock try_to_unmap_one(), trylock - */ -static int FASTCALL(try_to_unmap_one(struct page *, pte_addr_t)); -static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr) -{ - pte_t *ptep = rmap_ptep_map(paddr); - unsigned long address = ptep_to_address(ptep); - struct mm_struct * mm = ptep_to_mm(ptep); - struct vm_area_struct * vma; - pte_t pte; - int ret; - - if (!mm) - BUG(); - - /* - * We need the page_table_lock to protect us from page faults, - * munmap, fork, etc... - */ - if (!spin_trylock(&mm->page_table_lock)) { - rmap_ptep_unmap(ptep); - return SWAP_AGAIN; - } - - - /* During mremap, it's possible pages are not in a VMA. */ - vma = find_vma(mm, address); - if (!vma) { - ret = SWAP_FAIL; - goto out_unlock; - } - - /* The page is mlock()d, we cannot swap it out. */ - if (vma->vm_flags & VM_LOCKED) { - ret = SWAP_FAIL; - goto out_unlock; - } - - /* Nuke the page table entry. */ - flush_cache_page(vma, address); - pte = ptep_clear_flush(vma, address, ptep); - - if (PageSwapCache(page)) { - /* - * Store the swap location in the pte. - * See handle_pte_fault() ... - */ - swp_entry_t entry = { .val = page->index }; - swap_duplicate(entry); - set_pte(ptep, swp_entry_to_pte(entry)); - BUG_ON(pte_file(*ptep)); - } else { - unsigned long pgidx; - /* - * If a nonlinear mapping then store the file page offset - * in the pte. - */ - pgidx = (address - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (page->index != pgidx) { - set_pte(ptep, pgoff_to_pte(page->index)); - BUG_ON(!pte_file(*ptep)); - } - } - - /* Move the dirty bit to the physical page now the pte is gone. */ - if (pte_dirty(pte)) - set_page_dirty(page); - - mm->rss--; - page_cache_release(page); - ret = SWAP_SUCCESS; - -out_unlock: - rmap_ptep_unmap(ptep); - spin_unlock(&mm->page_table_lock); - return ret; -} - -/** - * try_to_unmap - try to remove all page table mappings to a page - * @page: the page to get unmapped - * - * Tries to remove all the page table entries which are mapping this - * page, used in the pageout path. Caller must hold the page lock - * and its pte chain lock. Return values are: - * - * SWAP_SUCCESS - we succeeded in removing all mappings - * SWAP_AGAIN - we missed a trylock, try again later - * SWAP_FAIL - the page is unswappable - */ -int fastcall try_to_unmap(struct page * page) -{ - struct pte_chain *pc, *next_pc, *start; - int ret = SWAP_SUCCESS; - int victim_i; - - /* This page should not be on the pageout lists. */ - if (PageReserved(page)) - BUG(); - if (!PageLocked(page)) - BUG(); - /* We need backing store to swap out a page. */ - if (!page->mapping) - BUG(); - - /* - * If it's an object-based page, use the object vma chain to find all - * the mappings. - */ - if (!PageAnon(page)) { - ret = try_to_unmap_obj(page); - goto out; - } - - if (PageDirect(page)) { - ret = try_to_unmap_one(page, page->pte.direct); - if (ret == SWAP_SUCCESS) { - if (page_test_and_clear_dirty(page)) - set_page_dirty(page); - page->pte.direct = 0; - ClearPageDirect(page); - } - goto out; - } - - start = page->pte.chain; - victim_i = pte_chain_idx(start); - for (pc = start; pc; pc = next_pc) { - int i; - - next_pc = pte_chain_next(pc); - if (next_pc) - prefetch(next_pc); - for (i = pte_chain_idx(pc); i < NRPTE; i++) { - pte_addr_t pte_paddr = pc->ptes[i]; - - switch (try_to_unmap_one(page, pte_paddr)) { - case SWAP_SUCCESS: - /* - * Release a slot. If we're releasing the - * first pte in the first pte_chain then - * pc->ptes[i] and start->ptes[victim_i] both - * refer to the same thing. It works out. - */ - pc->ptes[i] = start->ptes[victim_i]; - start->ptes[victim_i] = 0; - victim_i++; - if (victim_i == NRPTE) { - page->pte.chain = pte_chain_next(start); - __pte_chain_free(start); - start = page->pte.chain; - victim_i = 0; - } else { - start->next_and_idx++; - } - if (page->pte.direct == 0 && - page_test_and_clear_dirty(page)) - set_page_dirty(page); - break; - case SWAP_AGAIN: - /* Skip this pte, remembering status. */ - ret = SWAP_AGAIN; - continue; - case SWAP_FAIL: - ret = SWAP_FAIL; - goto out; - } - } - } -out: - if (!page_mapped(page)) { - dec_page_state(nr_mapped); - ret = SWAP_SUCCESS; - } - return ret; -} - -/** - * page_convert_anon - Convert an object-based mapped page to pte_chain-based. - * @page: the page to convert - * - * Find all the mappings for an object-based page and convert them - * to 'anonymous', ie create a pte_chain and store all the pte pointers there. - * - * This function takes the address_space->i_shared_sem, sets the PageAnon flag, - * then sets the mm->page_table_lock for each vma and calls page_add_rmap. This - * means there is a period when PageAnon is set, but still has some mappings - * with no pte_chain entry. This is in fact safe, since page_remove_rmap will - * simply not find it. try_to_unmap might erroneously return success, but it - * will never be called because the page_convert_anon() caller has locked the - * page. - * - * page_referenced() may fail to scan all the appropriate pte's and may return - * an inaccurate result. This is so rare that it does not matter. - */ -int page_convert_anon(struct page *page) -{ - struct address_space *mapping; - struct vm_area_struct *vma; - struct pte_chain *pte_chain = NULL; - pte_t *pte; - int err = 0; - - mapping = page->mapping; - if (mapping == NULL) - goto out; /* truncate won the lock_page() race */ - - down(&mapping->i_shared_sem); - pte_chain_lock(page); - - /* - * Has someone else done it for us before we got the lock? - * If so, pte.direct or pte.chain has replaced pte.mapcount. - */ - if (PageAnon(page)) { - pte_chain_unlock(page); - goto out_unlock; - } - - SetPageAnon(page); - if (page->pte.mapcount == 0) { - pte_chain_unlock(page); - goto out_unlock; - } - /* This is gonna get incremented by page_add_rmap */ - dec_page_state(nr_mapped); - page->pte.mapcount = 0; - - /* - * Now that the page is marked as anon, unlock it. page_add_rmap will - * lock it as necessary. - */ - pte_chain_unlock(page); - - list_for_each_entry(vma, &mapping->i_mmap, shared) { - if (!pte_chain) { - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) { - err = -ENOMEM; - goto out_unlock; - } - } - spin_lock(&vma->vm_mm->page_table_lock); - pte = find_pte(vma, page, NULL); - if (pte) { - /* Make sure this isn't a duplicate */ - page_remove_rmap(page, pte); - pte_chain = page_add_rmap(page, pte, pte_chain); - pte_unmap(pte); - } - spin_unlock(&vma->vm_mm->page_table_lock); - } - list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { - if (!pte_chain) { - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) { - err = -ENOMEM; - goto out_unlock; - } - } - spin_lock(&vma->vm_mm->page_table_lock); - pte = find_pte(vma, page, NULL); - if (pte) { - /* Make sure this isn't a duplicate */ - page_remove_rmap(page, pte); - pte_chain = page_add_rmap(page, pte, pte_chain); - pte_unmap(pte); - } - spin_unlock(&vma->vm_mm->page_table_lock); - } - -out_unlock: - pte_chain_free(pte_chain); - up(&mapping->i_shared_sem); -out: - return err; -} - -/** - ** No more VM stuff below this comment, only pte_chain helper - ** functions. - **/ - -static void pte_chain_ctor(void *p, kmem_cache_t *cachep, unsigned long flags) -{ - struct pte_chain *pc = p; - - memset(pc, 0, sizeof(*pc)); -} - -DEFINE_PER_CPU(struct pte_chain *, local_pte_chain) = 0; - -/** - * __pte_chain_free - free pte_chain structure - * @pte_chain: pte_chain struct to free - */ -void __pte_chain_free(struct pte_chain *pte_chain) -{ - struct pte_chain **pte_chainp; - - pte_chainp = &get_cpu_var(local_pte_chain); - if (pte_chain->next_and_idx) - pte_chain->next_and_idx = 0; - if (*pte_chainp) - kmem_cache_free(pte_chain_cache, *pte_chainp); - *pte_chainp = pte_chain; - put_cpu_var(local_pte_chain); -} - -/* - * pte_chain_alloc(): allocate a pte_chain structure for use by page_add_rmap(). - * - * The caller of page_add_rmap() must perform the allocation because - * page_add_rmap() is invariably called under spinlock. Often, page_add_rmap() - * will not actually use the pte_chain, because there is space available in one - * of the existing pte_chains which are attached to the page. So the case of - * allocating and then freeing a single pte_chain is specially optimised here, - * with a one-deep per-cpu cache. - */ -struct pte_chain *pte_chain_alloc(int gfp_flags) -{ - struct pte_chain *ret; - struct pte_chain **pte_chainp; - - might_sleep_if(gfp_flags & __GFP_WAIT); - - pte_chainp = &get_cpu_var(local_pte_chain); - if (*pte_chainp) { - ret = *pte_chainp; - *pte_chainp = NULL; - put_cpu_var(local_pte_chain); - } else { - put_cpu_var(local_pte_chain); - ret = kmem_cache_alloc(pte_chain_cache, gfp_flags); - } - return ret; -} - -void __init pte_chain_init(void) -{ - pte_chain_cache = kmem_cache_create( "pte_chain", - sizeof(struct pte_chain), - 0, - SLAB_MUST_HWCACHE_ALIGN, - pte_chain_ctor, - NULL); - - if (!pte_chain_cache) - panic("failed to create pte_chain cache!\n"); -} diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/shmem.c sles-anobjrmap-2-works/mm/shmem.c --- sles-anobjrmap-1/mm/shmem.c 2004-02-04 16:07:06.000000000 +0100 +++ sles-anobjrmap-2-works/mm/shmem.c 2004-03-11 17:01:05.973468616 +0100 @@ -721,7 +721,9 @@ static int shmem_writepage(struct page * BUG_ON(!PageLocked(page)); BUG_ON(page_mapped(page)); - mapping = page->mapping; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + mapping = page->as.mapping; index = page->index; inode = mapping->host; info = SHMEM_I(inode); @@ -1162,7 +1164,10 @@ static struct inode_operations shmem_sym static int shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) { - struct inode *inode = page->mapping->host; + struct inode *inode; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + inode = page->as.mapping->host; return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL); } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/swapfile.c sles-anobjrmap-2-works/mm/swapfile.c --- sles-anobjrmap-1/mm/swapfile.c 2004-03-05 05:24:44.000000000 +0100 +++ sles-anobjrmap-2-works/mm/swapfile.c 2004-03-12 02:09:20.030811800 +0100 @@ -21,8 +21,8 @@ #include #include #include -#include #include +#include #include #include @@ -247,7 +247,7 @@ static int exclusive_swap_page(struct pa struct swap_info_struct * p; swp_entry_t entry; - entry.val = page->index; + entry.val = page->private; p = swap_info_get(entry); if (p) { /* Is the only swap cache user the cache itself? */ @@ -315,7 +315,7 @@ int remove_exclusive_swap_page(struct pa if (page_count(page) != 2) /* 2: us + cache */ return 0; - entry.val = page->index; + entry.val = page->private; p = swap_info_get(entry); if (!p) return 0; @@ -385,20 +385,20 @@ void free_swap_and_cache(swp_entry_t ent /* vma->vm_mm->page_table_lock is held */ static void unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir, - swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) + swp_entry_t entry, struct page *page) { vma->vm_mm->rss++; get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); SetPageAnon(page); - *pte_chainp = page_add_rmap(page, dir, *pte_chainp); + page_add_rmap(page, vma, address); swap_free(entry); } /* vma->vm_mm->page_table_lock is held */ static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long size, unsigned long offset, - swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) + swp_entry_t entry, struct page *page) { pte_t * pte; unsigned long end; @@ -424,7 +424,7 @@ static int unuse_pmd(struct vm_area_stru */ if (unlikely(pte_same(*pte, swp_pte))) { unuse_pte(vma, offset + address, pte, - entry, page, pte_chainp); + entry, page); pte_unmap(pte); return 1; } @@ -438,7 +438,7 @@ static int unuse_pmd(struct vm_area_stru /* vma->vm_mm->page_table_lock is held */ static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long size, - swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) + swp_entry_t entry, struct page *page) { pmd_t * pmd; unsigned long offset, end; @@ -460,7 +460,7 @@ static int unuse_pgd(struct vm_area_stru BUG(); do { if (unuse_pmd(vma, pmd, address, end - address, - offset, entry, page, pte_chainp)) + offset, entry, page)) return 1; address = (address + PMD_SIZE) & PMD_MASK; pmd++; @@ -470,7 +470,7 @@ static int unuse_pgd(struct vm_area_stru /* vma->vm_mm->page_table_lock is held */ static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, - swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) + swp_entry_t entry, struct page *page) { unsigned long start = vma->vm_start, end = vma->vm_end; @@ -478,7 +478,7 @@ static int unuse_vma(struct vm_area_stru BUG(); do { if (unuse_pgd(vma, pgdir, start, end - start, - entry, page, pte_chainp)) + entry, page)) return 1; start = (start + PGDIR_SIZE) & PGDIR_MASK; pgdir++; @@ -490,11 +490,6 @@ static int unuse_process(struct mm_struc swp_entry_t entry, struct page* page) { struct vm_area_struct* vma; - struct pte_chain *pte_chain; - - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - return -ENOMEM; /* * Go through process' page directory. @@ -502,11 +497,10 @@ static int unuse_process(struct mm_struc spin_lock(&mm->page_table_lock); for (vma = mm->mmap; vma; vma = vma->vm_next) { pgd_t * pgd = pgd_offset(mm, vma->vm_start); - if (unuse_vma(vma, pgd, entry, page, &pte_chain)) + if (unuse_vma(vma, pgd, entry, page)) break; } spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); return 0; } @@ -999,7 +993,7 @@ int page_queue_congested(struct page *pa bdi = page->mapping->backing_dev_info; if (PageSwapCache(page)) { - swp_entry_t entry = { .val = page->index }; + swp_entry_t entry = { .val = page->private }; struct swap_info_struct *sis; sis = get_swap_info_struct(swp_type(entry)); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/swap_state.c sles-anobjrmap-2-works/mm/swap_state.c --- sles-anobjrmap-1/mm/swap_state.c 2003-08-16 15:00:13.000000000 +0200 +++ sles-anobjrmap-2-works/mm/swap_state.c 2004-03-12 03:02:57.595667592 +0100 @@ -63,8 +63,8 @@ static int add_to_swap_cache(struct page { int error; - if (page->mapping) - BUG(); + BUG_ON(page_mapping(page)); + BUG_ON(PageSwapCache(page)); if (!swap_duplicate(entry)) { INC_CACHE_INFO(noent_race); return -ENOENT; @@ -74,15 +74,14 @@ static int add_to_swap_cache(struct page * Anon pages are already on the LRU, we don't run lru_cache_add here. */ if (error != 0) { + BUG_ON(PageSwapCache(page)); swap_free(entry); if (error == -EEXIST) INC_CACHE_INFO(exist_race); return error; } - if (!PageLocked(page)) - BUG(); - if (!PageSwapCache(page)) - BUG(); + BUG_ON(!PageLocked(page)); + BUG_ON(!PageSwapCache(page)); INC_CACHE_INFO(add_total); return 0; } @@ -180,7 +179,7 @@ void delete_from_swap_cache(struct page BUG_ON(PageWriteback(page)); BUG_ON(PagePrivate(page)); - entry.val = page->index; + entry.val = page->private; spin_lock(&swapper_space.page_lock); __delete_from_swap_cache(page); @@ -192,9 +191,13 @@ void delete_from_swap_cache(struct page int move_to_swap_cache(struct page *page, swp_entry_t entry) { - struct address_space *mapping = page->mapping; + struct address_space *mapping; int err; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + mapping = page->as.mapping; + spin_lock(&swapper_space.page_lock); spin_lock(&mapping->page_lock); @@ -229,7 +232,7 @@ int move_from_swap_cache(struct page *pa BUG_ON(PageWriteback(page)); BUG_ON(PagePrivate(page)); - entry.val = page->index; + entry.val = page->private; spin_lock(&swapper_space.page_lock); spin_lock(&mapping->page_lock); @@ -320,8 +323,10 @@ struct page * lookup_swap_cache(swp_entr * that, but no need to change: we _have_ got the right page. */ INC_CACHE_INFO(find_total); - if (found) + if (found) { + BUG_ON(!PageSwapCache(found)); INC_CACHE_INFO(find_success); + } return found; } @@ -344,8 +349,10 @@ struct page * read_swap_cache_async(swp_ * directly. */ found_page = find_get_page(&swapper_space, entry.val); - if (found_page) + if (found_page) { + BUG_ON(!PageSwapCache(found_page)); break; + } /* * Get a new page to read into from swap. diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/truncate.c sles-anobjrmap-2-works/mm/truncate.c --- sles-anobjrmap-1/mm/truncate.c 2003-10-31 05:56:46.000000000 +0100 +++ sles-anobjrmap-2-works/mm/truncate.c 2004-03-11 16:24:44.180151616 +0100 @@ -19,7 +19,9 @@ static int do_invalidatepage(struct page *page, unsigned long offset) { int (*invalidatepage)(struct page *, unsigned long); - invalidatepage = page->mapping->a_ops->invalidatepage; + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + invalidatepage = page->as.mapping->a_ops->invalidatepage; if (invalidatepage == NULL) invalidatepage = block_invalidatepage; return (*invalidatepage)(page, offset); @@ -45,7 +47,9 @@ static inline void truncate_partial_page static void truncate_complete_page(struct address_space *mapping, struct page *page) { - if (page->mapping != mapping) + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (page->as.mapping != mapping) return; if (PagePrivate(page)) @@ -68,7 +72,9 @@ truncate_complete_page(struct address_sp static int invalidate_complete_page(struct address_space *mapping, struct page *page) { - if (page->mapping != mapping) + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (page->as.mapping != mapping) return 0; if (PagePrivate(page) && !try_to_release_page(page, 0)) @@ -255,7 +261,9 @@ void invalidate_inode_pages2(struct addr struct page *page = pvec.pages[i]; lock_page(page); - if (page->mapping == mapping) { /* truncate race? */ + BUG_ON(PageAnon(page)); + BUG_ON(PageSwapCache(page)); + if (page->as.mapping == mapping) { /* truncate race? */ wait_on_page_writeback(page); next = page->index + 1; if (page_mapped(page)) diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids sles-anobjrmap-1/mm/vmscan.c sles-anobjrmap-2-works/mm/vmscan.c --- sles-anobjrmap-1/mm/vmscan.c 2004-03-05 05:24:48.000000000 +0100 +++ sles-anobjrmap-2-works/mm/vmscan.c 2004-03-12 02:10:35.689309960 +0100 @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include @@ -170,10 +170,10 @@ static int shrink_slab(long scanned, uns return 0; } -/* Must be called with page's pte_chain_lock held. */ +/* Must be called with page's page_map_lock held. */ static inline int page_mapping_inuse(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_mapping(page); /* Page is in somebody's page tables. */ if (page_mapped(page)) @@ -230,7 +230,7 @@ static void handle_write_error(struct ad struct page *page, int error) { lock_page(page); - if (page->mapping == mapping) { + if (page_mapping(page) == mapping) { if (error == -ENOSPC) set_bit(AS_ENOSPC, &mapping->flags); else @@ -275,15 +275,15 @@ shrink_list(struct list_head *page_list, if (PageWriteback(page)) goto keep_locked; - pte_chain_lock(page); + page_map_lock(page); referenced = page_referenced(page); if (referenced && page_mapping_inuse(page)) { /* In active use or really unfreeable. Activate it. */ - pte_chain_unlock(page); + page_map_unlock(page); goto activate_locked; } - mapping = page->mapping; + mapping = page_mapping(page); #ifdef CONFIG_SWAP /* @@ -293,11 +293,11 @@ shrink_list(struct list_head *page_list, * XXX: implement swap clustering ? */ if (page_mapped(page) && !mapping && !PagePrivate(page)) { - pte_chain_unlock(page); + page_map_unlock(page); if (!add_to_swap(page)) goto activate_locked; - pte_chain_lock(page); - mapping = page->mapping; + page_map_lock(page); + mapping = page_mapping(page); } #endif /* CONFIG_SWAP */ @@ -311,16 +311,16 @@ shrink_list(struct list_head *page_list, if (page_mapped(page) && mapping) { switch (try_to_unmap(page)) { case SWAP_FAIL: - pte_chain_unlock(page); + page_map_unlock(page); goto activate_locked; case SWAP_AGAIN: - pte_chain_unlock(page); + page_map_unlock(page); goto keep_locked; case SWAP_SUCCESS: ; /* try to free the page below */ } } - pte_chain_unlock(page); + page_map_unlock(page); /* * If the page is dirty, only perform writeback if that write @@ -427,7 +427,7 @@ shrink_list(struct list_head *page_list, #ifdef CONFIG_SWAP if (PageSwapCache(page)) { - swp_entry_t swap = { .val = page->index }; + swp_entry_t swap = { .val = page->private }; __delete_from_swap_cache(page); spin_unlock(&mapping->page_lock); swap_free(swap); @@ -657,13 +657,13 @@ refill_inactive_zone(struct zone *zone, page = list_entry(l_hold.prev, struct page, lru); list_del(&page->lru); if (page_mapped(page)) { - pte_chain_lock(page); - if (page_mapped(page) && page_referenced(page)) { - pte_chain_unlock(page); + page_map_lock(page); + if (page_referenced(page)) { + page_map_unlock(page); list_add(&page->lru, &l_active); continue; } - pte_chain_unlock(page); + page_map_unlock(page); if (!reclaim_mapped) { list_add(&page->lru, &l_active); continue; @@ -673,7 +673,7 @@ refill_inactive_zone(struct zone *zone, * FIXME: need to consider page_count(page) here if/when we * reap orphaned pages via the LRU (Daniel's locking stuff) */ - if (total_swap_pages == 0 && !page->mapping && + if (total_swap_pages == 0 && !page_mapping(page) && !PagePrivate(page)) { list_add(&page->lru, &l_active); continue;