diff -urpN mm3-2.5.42/arch/i386/kernel/sys_i386.c hugetlbfs-2.5.42/arch/i386/kernel/sys_i386.c --- mm3-2.5.42/arch/i386/kernel/sys_i386.c 2002-10-15 09:51:07.000000000 -0700 +++ hugetlbfs-2.5.42/arch/i386/kernel/sys_i386.c 2002-10-17 20:12:42.000000000 -0700 @@ -313,7 +313,7 @@ sys_free_hugepages(unsigned long addr) extern int free_hugepages(struct vm_area_struct *); vma = find_vma(current->mm, addr); - if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr)) + if ((!vma) || (!(vma->vm_flags & VM_HUGETLB)) || (vma->vm_start!=addr)) return -EINVAL; down_write(&mm->mmap_sem); spin_lock(&mm->page_table_lock); diff -urpN mm3-2.5.42/arch/i386/mm/hugetlbpage.c hugetlbfs-2.5.42/arch/i386/mm/hugetlbpage.c --- mm3-2.5.42/arch/i386/mm/hugetlbpage.c 2002-10-15 09:51:07.000000000 -0700 +++ hugetlbfs-2.5.42/arch/i386/mm/hugetlbpage.c 2002-10-17 20:26:48.000000000 -0700 @@ -64,34 +64,6 @@ alloc_hugetlb_page(void) return page; } -static void -free_hugetlb_page(struct page *page) -{ - spin_lock(&htlbpage_lock); - if ((page->mapping != NULL) && (page_count(page) == 2)) { - struct inode *inode = page->mapping->host; - int i; - - ClearPageDirty(page); - remove_from_page_cache(page); - set_page_count(page, 1); - if ((inode->i_size -= HPAGE_SIZE) == 0) { - for (i = 0; i < MAX_ID; i++) - if (htlbpagek[i].key == inode->i_ino) { - htlbpagek[i].key = 0; - htlbpagek[i].in = NULL; - break; - } - kfree(inode); - } - } - if (put_page_testzero(page)) { - list_add(&page->list, &htlbpage_freelist); - htlbpagemem++; - } - spin_unlock(&htlbpage_lock); -} - static pte_t * huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { @@ -191,9 +163,7 @@ out_error: /* Error case, remove the pa return -1; } -int -copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) +int copy_hugepage_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { pte_t *src_pte, *dst_pte, entry; struct page *ptepage; @@ -218,10 +188,7 @@ nomem: return -ENOMEM; } -int -follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *st, int *length, int i) +int follow_hugepage(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *st, int *length, int i) { pte_t *ptep, pte; unsigned long start = *st; @@ -254,27 +221,59 @@ back1: return i; } -void -zap_hugetlb_resources(struct vm_area_struct *mpnt) +void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); +void zap_hugetlb_resources(struct vm_area_struct *vma) +{ + /* zap_hugepage_range(vma, vma->vm_start, vma->vm_end); */ +} + +void free_huge_page(struct page *page) +{ + BUG_ON(page_count(page)); + BUG_ON(page->mapping); + + INIT_LIST_HEAD(&page->list); + + spin_lock(&htlbpage_lock); + list_add(&page->list, &htlbpage_freelist); + htlbpagemem++; + spin_unlock(&htlbpage_lock); +} + +void huge_page_release(struct page *page) +{ + if (!put_page_testzero(page)) + return; + + free_huge_page(page); +} + +void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - struct mm_struct *mm = mpnt->vm_mm; - unsigned long len, addr, end; - pte_t *ptep; + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte; struct page *page; - addr = mpnt->vm_start; - end = mpnt->vm_end; - len = end - addr; - do { - ptep = huge_pte_offset(mm, addr); - page = pte_page(*ptep); - pte_clear(ptep); - free_hugetlb_page(page); - addr += HPAGE_SIZE; - } while (addr < end); - mm->rss -= (len >> PAGE_SHIFT); - mpnt->vm_ops = NULL; - flush_tlb_range(mpnt, end - len, end); + BUG_ON(start & (HPAGE_SIZE - 1)); + BUG_ON(end & (HPAGE_SIZE - 1)); + + for (address = start; address < end; address += HPAGE_SIZE) { + pte = huge_pte_offset(mm, address); + page = pte_page(*pte); + huge_page_release(page); + pte_clear(pte); + } + mm->rss -= (end - start) >> PAGE_SHIFT; + flush_tlb_range(vma, start, end); +} + +void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long length) +{ + struct mm_struct *mm = vma->vm_mm; + spin_lock(&mm->page_table_lock); + unmap_hugepage_range(vma, start, start + length); + spin_unlock(&mm->page_table_lock); } static void @@ -298,11 +297,10 @@ unlink_vma(struct vm_area_struct *mpnt) } int -free_hugepages(struct vm_area_struct *mpnt) +free_hugepages(struct vm_area_struct *vma) { - unlink_vma(mpnt); - zap_hugetlb_resources(mpnt); - kmem_cache_free(vm_area_cachep, mpnt); + unlink_vma(vma); + unmap_vma(vma->vm_mm, vma); return 1; } diff -urpN mm3-2.5.42/arch/ia64/kernel/sys_ia64.c hugetlbfs-2.5.42/arch/ia64/kernel/sys_ia64.c --- mm3-2.5.42/arch/ia64/kernel/sys_ia64.c 2002-10-15 09:51:07.000000000 -0700 +++ hugetlbfs-2.5.42/arch/ia64/kernel/sys_ia64.c 2002-10-17 20:13:30.000000000 -0700 @@ -284,7 +284,7 @@ sys_free_hugepages (unsigned long addr) int retval; vma = find_vma(mm, addr); - if (!vma || !is_vm_hugetlb_page(vma) || (vma->vm_start != addr)) + if (!vma || !(vma->vm_flags & VM_HUGETLB) || (vma->vm_start != addr)) return -EINVAL; down_write(&mm->mmap_sem); diff -urpN mm3-2.5.42/arch/sparc64/kernel/sys_sparc.c hugetlbfs-2.5.42/arch/sparc64/kernel/sys_sparc.c --- mm3-2.5.42/arch/sparc64/kernel/sys_sparc.c 2002-10-15 09:51:08.000000000 -0700 +++ hugetlbfs-2.5.42/arch/sparc64/kernel/sys_sparc.c 2002-10-17 20:13:02.000000000 -0700 @@ -755,7 +755,7 @@ sys_free_hugepages(unsigned long addr) int retval; vma = find_vma(current->mm, addr); - if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr)) + if ((!vma) || (!(vma->vm_flags & VM_HUGETLB)) || (vma->vm_start!=addr)) return -EINVAL; down_write(&mm->mmap_sem); spin_lock(&mm->page_table_lock); diff -urpN mm3-2.5.42/fs/attr.c hugetlbfs-2.5.42/fs/attr.c --- mm3-2.5.42/fs/attr.c 2002-10-15 09:51:08.000000000 -0700 +++ hugetlbfs-2.5.42/fs/attr.c 2002-10-16 22:25:13.000000000 -0700 @@ -93,7 +93,7 @@ out: return error; } -static int setattr_mask(unsigned int ia_valid) +int setattr_mask(unsigned int ia_valid) { unsigned long dn_mask = 0; diff -urpN mm3-2.5.42/fs/hugetlbfs/inode.c hugetlbfs-2.5.42/fs/hugetlbfs/inode.c --- mm3-2.5.42/fs/hugetlbfs/inode.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/fs/hugetlbfs/inode.c 2002-10-16 23:58:24.000000000 -0700 @@ -7,14 +7,21 @@ */ #include +#include +#include +#include /* remove ASAP */ #include #include +#include #include #include #include #include #include #include +#include +#include +#include #include @@ -80,6 +87,277 @@ static int hugetlbfs_commit_write(struct return -EINVAL; } +void huge_pagevec_release(struct pagevec *pvec) +{ + int i; + + for (i = 0; i < pagevec_count(pvec); ++i) + huge_page_release(pvec->pages[i]); + + pagevec_reinit(pvec); +} + +void truncate_partial_hugepage(struct page *page, unsigned partial) +{ + int i; + const unsigned piece = partial & (PAGE_SIZE - 1); + const unsigned tailstart = PAGE_SIZE - piece; + const unsigned whole_pages = partial / PAGE_SIZE; + const unsigned last_page_offset = HPAGE_SIZE/PAGE_SIZE - whole_pages; + + for (i = HPAGE_SIZE/PAGE_SIZE - 1; i >= last_page_offset; ++i) + memclear_highpage_flush(&page[i], 0, PAGE_SIZE); + + if (!piece) + return; + + memclear_highpage_flush(&page[last_page_offset - 1], tailstart, piece); +} + +void truncate_huge_page(struct address_space *mapping, struct page *page) +{ + if (page->mapping != mapping) + return; + + clear_page_dirty(page); + ClearPageUptodate(page); + remove_from_page_cache(page); + huge_page_release(page); +} + +void truncate_hugepages(struct address_space *mapping, loff_t lstart) +{ + const pgoff_t start = (lstart + HPAGE_SIZE - 1) >> HPAGE_SHIFT; + const unsigned partial = lstart & (HPAGE_SIZE - 1); + struct pagevec pvec; + pgoff_t next; + int i; + + pagevec_init(&pvec, 0); + next = start; + + while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + for (i = 0; i < pagevec_count(&pvec); ++i) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + + if (page_index > next) + next = page_index; + + ++next; + + if (TestSetPageLocked(page)) + continue; + + if (PageWriteback(page)) { + unlock_page(page); + continue; + } + + truncate_huge_page(mapping, page); + unlock_page(page); + } + huge_pagevec_release(&pvec); + cond_resched(); + } + + if (partial) { + struct page *page = find_lock_page(mapping, start - 1); + if (page) { + wait_on_page_writeback(page); + truncate_partial_hugepage(page, partial); + unlock_page(page); + huge_page_release(page); + } + } + + next = start; + + while (1) { + if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + if (next == start) + break; + next = start; + continue; + } + + for (i = 0; i < pagevec_count(&pvec); ++i) { + struct page *page = pvec.pages[i]; + + lock_page(page); + wait_on_page_writeback(page); + if (page->index > next) + next = page->index; + ++next; + truncate_huge_page(mapping, page); + unlock_page(page); + } + huge_pagevec_release(&pvec); + } + BUG_ON(!lstart && mapping->nrpages); +} + +static void hugetlbfs_delete_inode(struct inode *inode) +{ + list_del_init(&inode->i_hash); + list_del_init(&inode->i_list); + inode->i_state |= I_FREEING; + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); + + if (inode->i_data.nrpages) + truncate_hugepages(&inode->i_data, 0); + + security_ops->inode_delete(inode); + + clear_inode(inode); + destroy_inode(inode); +} + +static void hugetlbfs_forget_inode(struct inode *inode) +{ + struct super_block *super_block = inode->i_sb; + + if (list_empty(&inode->i_hash)) + goto out_truncate; + + if (!(inode->i_state & (I_DIRTY|I_LOCK))) { + list_del(&inode->i_list); + list_add(&inode->i_list, &inode_unused); + } + inodes_stat.nr_unused++; + spin_unlock(&inode_lock); + if (!super_block | (super_block->s_flags & MS_ACTIVE)) + return; + + /* write_inode_now() ? */ + spin_lock(&inode_lock); + inodes_stat.nr_unused--; + list_del_init(&inode->i_hash); +out_truncate: + list_del_init(&inode->i_list); + inode->i_state |= I_FREEING; + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); + if (inode->i_data.nrpages) + truncate_hugepages(&inode->i_data, 0); + clear_inode(inode); + destroy_inode(inode); +} + +static void hugetlbfs_drop_inode(struct inode *inode) +{ + if (!inode->i_nlink) + hugetlbfs_delete_inode(inode); + else + hugetlbfs_forget_inode(inode); +} + +static void hugetlb_vmtruncate_list(struct list_head *list, unsigned long pgoff) +{ + unsigned long start, end, length, delta; + struct vm_area_struct *vma; + + list_for_each_entry(vma, list, shared) { + start = vma->vm_start; + end = vma->vm_end; + length = end - start; + + if (vma->vm_pgoff >= pgoff) { + zap_hugepage_range(vma, start, length); + continue; + } + + length >>= PAGE_SHIFT; + delta = pgoff = vma->vm_pgoff; + if (delta >= length) + continue; + + start += delta << PAGE_SHIFT; + length = (length - delta) << PAGE_SHIFT; + zap_hugepage_range(vma, start, length); + } +} + +static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) +{ + unsigned long pgoff; + struct address_space *mapping = inode->i_mapping; + unsigned long limit; + + pgoff = (offset + HPAGE_SIZE - 1) >> HPAGE_SHIFT; + + if (inode->i_size < offset) + goto do_expand; + + inode->i_size = offset; + spin_lock(&mapping->i_shared_lock); + if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared)) + goto out_unlock; + if (!list_empty(&mapping->i_mmap)) + hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); + if (!list_empty(&mapping->i_mmap_shared)) + hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff); + +out_unlock: + spin_unlock(&mapping->i_shared_lock); + truncate_hugepages(mapping, offset); + return 0; + +do_expand: + limit = current->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out; + inode->i_size = offset; + return 0; + +out_sig: + send_sig(SIGXFSZ, current, 0); +out: + return -EFBIG; +} + +static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int error; + unsigned int ia_valid = attr->ia_valid; + unsigned long dn_mask; + + BUG_ON(!inode); + + error = inode_change_ok(inode, attr); + if (error) + goto out; + + error = security_ops->inode_setattr(dentry, attr); + if (error) + goto out; + + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) + error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; + if (error) + goto out; + + if (ia_valid & ATTR_SIZE) { + error = hugetlb_vmtruncate(inode, attr->ia_size); + if (error) + goto out; + attr->ia_valid &= ~ATTR_SIZE; + error = inode_setattr(inode, attr); + } + if (error) + goto out; + dn_mask = setattr_mask(ia_valid); + if (dn_mask) + dnotify_parent(dentry, dn_mask); +out: + return error; +} + struct inode *hugetlbfs_get_inode(struct super_block *sb, int mode, int dev) { struct inode * inode = new_inode(sb); @@ -189,11 +467,12 @@ static struct inode_operations hugetlbfs rmdir: simple_rmdir, mknod: hugetlbfs_mknod, rename: simple_rename, + setattr: hugetlbfs_setattr, }; static struct super_operations hugetlbfs_ops = { statfs: simple_statfs, - drop_inode: generic_delete_inode, + drop_inode: hugetlbfs_drop_inode, }; static int hugetlbfs_fill_super(struct super_block * sb, void * data, int silent) diff -urpN mm3-2.5.42/fs/inode.c hugetlbfs-2.5.42/fs/inode.c --- mm3-2.5.42/fs/inode.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/fs/inode.c 2002-10-16 23:55:25.000000000 -0700 @@ -142,7 +142,7 @@ static struct inode *alloc_inode(struct return inode; } -static void destroy_inode(struct inode *inode) +void destroy_inode(struct inode *inode) { if (inode_has_buffers(inode)) BUG(); diff -urpN mm3-2.5.42/fs/proc/array.c hugetlbfs-2.5.42/fs/proc/array.c --- mm3-2.5.42/fs/proc/array.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/fs/proc/array.c 2002-10-17 20:13:56.000000000 -0700 @@ -414,7 +414,7 @@ int proc_pid_statm(task_t *task, char *b int pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; size += pages; - if (is_vm_hugetlb_page(vma)) { + if (vma->vm_flags & VM_HUGETLB) { if (!(vma->vm_flags & VM_DONTCOPY)) shared += pages; continue; diff -urpN mm3-2.5.42/include/linux/fs.h hugetlbfs-2.5.42/include/linux/fs.h --- mm3-2.5.42/include/linux/fs.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/fs.h 2002-10-16 22:26:57.000000000 -0700 @@ -1143,6 +1143,7 @@ extern int filemap_fdatawrite(struct add extern int filemap_fdatawait(struct address_space *); extern void sync_supers(void); extern sector_t bmap(struct inode *, sector_t); +extern int setattr_mask(unsigned int); extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int); extern int vfs_permission(struct inode *, int); @@ -1221,6 +1222,7 @@ static inline struct inode *iget(struct extern void __iget(struct inode * inode); extern void clear_inode(struct inode *); +extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); extern void remove_suid(struct dentry *); diff -urpN mm3-2.5.42/include/linux/hugetlb.h hugetlbfs-2.5.42/include/linux/hugetlb.h --- mm3-2.5.42/include/linux/hugetlb.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/hugetlb.h 2002-10-17 20:24:37.000000000 -0700 @@ -2,47 +2,24 @@ #define _LINUX_HUGETLB_H #ifdef CONFIG_HUGETLB_PAGE -static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) -{ - return vma->vm_flags & VM_HUGETLB; -} -int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); -int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, - struct page **, struct vm_area_struct **, unsigned long *, int *, int); +int copy_hugepage_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); +int follow_hugepage(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); int free_hugepages(struct vm_area_struct *); +void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); +void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); +void huge_page_release(struct page *); #else /* !CONFIG_HUGETLB_PAGE */ -static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) -{ - return 0; -} - -static inline int -copy_hugetlb_page_range(struct mm_struct *src, struct mm_struct *dst, - struct vm_area_struct *vma) -{ - return -ENOSYS; -} - -static inline int -follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *start, int *len, int i) -{ - return -ENOSYS; -} -static inline int free_hugepages(struct vm_area_struct *vma) -{ - return -EINVAL; -} +#define follow_hugepage(m,v,p,vs,a,b,i) ({ BUG(); 0; }) +#define free_hugepages(vma) ({ BUG(); 0; }) +#define copy_hugepage_range(src, dst, vma) ({ BUG(); 0; }) +#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) +#define zap_hugepage_range(vma, start, len) BUG() +#define unmap_hugepage_range(vma, start, end) BUG() +#define huge_page_release(page) BUG() -static inline int -hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) -{ - return -ENOSYS; -} #endif /* !CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_HUGETLBFS @@ -50,29 +27,21 @@ extern struct file_operations hugetlbfs_ extern struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_zero_setup(size_t); -static inline int is_file_hugetlb_page(struct file *file) +static inline int is_file_hugepages(struct file *file) { return file->f_op == &hugetlbfs_file_operations; } -static inline void set_file_hugetlb_page(struct file *file) +static inline void set_file_hugepages(struct file *file) { file->f_op = &hugetlbfs_file_operations; } #else /* !CONFIG_HUGETLBFS */ -static inline int is_file_hugetlb_page(struct file *file) -{ - return 0; -} -static inline void set_file_hugetlb_page(struct file *file) -{ -} +#define is_file_hugepages(file) 0 +#define set_file_hugepages(file) BUG() +#define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS); -static inline struct file *hugetlb_zero_setup(size_t size) -{ - return ERR_PTR(-ENOSYS); -} #endif /* !CONFIG_HUGETLBFS */ #endif /* _LINUX_HUGETLB_H */ diff -urpN mm3-2.5.42/include/linux/mm.h hugetlbfs-2.5.42/include/linux/mm.h --- mm3-2.5.42/include/linux/mm.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/mm.h 2002-10-17 19:05:05.000000000 -0700 @@ -524,6 +524,7 @@ extern struct vm_area_struct * find_vma_ struct vm_area_struct **pprev); extern int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, int new_below); +extern void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area); /* Look up the first VMA which intersects the interval start_addr..end_addr-1, NULL if none. Assume start_addr < end_addr. */ diff -urpN mm3-2.5.42/include/linux/rmap-locking.h hugetlbfs-2.5.42/include/linux/rmap-locking.h --- mm3-2.5.42/include/linux/rmap-locking.h 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/include/linux/rmap-locking.h 2002-10-15 16:44:25.000000000 -0700 @@ -23,6 +23,18 @@ static inline void pte_chain_lock(struct #endif } +static inline int pte_chain_trylock(struct page *page) +{ + preempt_disable(); +#ifdef CONFIG_SMP + if (test_and_set_bit(PG_chainlock, &page->flags)) { + preempt_enable(); + return 0; + } +#endif + return 1; +} + static inline void pte_chain_unlock(struct page *page) { #ifdef CONFIG_SMP @@ -32,5 +44,6 @@ static inline void pte_chain_unlock(stru preempt_enable(); } -#define pte_page_lock pte_chain_lock -#define pte_page_unlock pte_chain_unlock +#define pte_page_lock pte_chain_lock +#define pte_page_trylock pte_chain_trylock +#define pte_page_unlock pte_chain_unlock diff -urpN mm3-2.5.42/ipc/shm.c hugetlbfs-2.5.42/ipc/shm.c --- mm3-2.5.42/ipc/shm.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/ipc/shm.c 2002-10-17 20:16:00.000000000 -0700 @@ -115,7 +115,7 @@ static void shm_destroy (struct shmid_ke shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shm_rmid (shp->id); shm_unlock(shp->id); - if (!is_file_hugetlb_page(shp->shm_file)) + if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0); fput (shp->shm_file); security_ops->shm_free_security(shp); @@ -221,7 +221,7 @@ static int newseg (key_t key, int shmflg shp->shm_file = file; file->f_dentry->d_inode->i_ino = shp->id; if (shmflg & SHM_HUGETLB) - set_file_hugetlb_page(file); + set_file_hugepages(file); else file->f_op = &shm_file_operations; shm_tot += numpages; @@ -264,6 +264,7 @@ asmlinkage long sys_shmget (key_t key, s shm_unlock(id); } up(&shm_ids.sem); + return err; } @@ -388,8 +389,10 @@ asmlinkage long sys_shmctl (int shmid, i struct shmid_kernel *shp; int err, version; - if (cmd < 0 || shmid < 0) - return -EINVAL; + if (cmd < 0 || shmid < 0) { + err = -EINVAL; + goto out; + } version = ipc_parse_version(&cmd); @@ -410,7 +413,7 @@ asmlinkage long sys_shmctl (int shmid, i err= shm_ids.max_id; if(err<0) err = 0; - return err; + goto out; } case SHM_INFO: { @@ -427,10 +430,13 @@ asmlinkage long sys_shmctl (int shmid, i err = shm_ids.max_id; shm_unlockall(); up(&shm_ids.sem); - if(copy_to_user (buf, &shm_info, sizeof(shm_info))) - return -EFAULT; + if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { + err = -EFAULT; + goto out; + } - return err < 0 ? 0 : err; + err = err < 0 ? 0 : err; + goto out; } case SHM_STAT: case IPC_STAT: @@ -439,9 +445,10 @@ asmlinkage long sys_shmctl (int shmid, i int result; memset(&tbuf, 0, sizeof(tbuf)); shp = shm_lock(shmid); - if(shp==NULL) - return -EINVAL; - if(cmd==SHM_STAT) { + if(shp==NULL) { + err = -EINVAL; + goto out; + } else if(cmd==SHM_STAT) { err = -EINVAL; if (shmid > shm_ids.max_id) goto out_unlock; @@ -465,8 +472,10 @@ asmlinkage long sys_shmctl (int shmid, i tbuf.shm_nattch = shp->shm_nattch; shm_unlock(shmid); if(copy_shmid_to_user (buf, &tbuf, version)) - return -EFAULT; - return result; + err = -EFAULT; + else + err = result; + goto out; } case SHM_LOCK: case SHM_UNLOCK: @@ -474,26 +483,30 @@ asmlinkage long sys_shmctl (int shmid, i /* Allow superuser to lock segment in memory */ /* Should the pages be faulted in here or leave it to user? */ /* need to determine interaction with current->swappable */ - if (!capable(CAP_IPC_LOCK)) - return -EPERM; + if (!capable(CAP_IPC_LOCK)) { + err = -EPERM; + goto out; + } shp = shm_lock(shmid); - if(shp==NULL) - return -EINVAL; + if(shp==NULL) { + err = -EINVAL; + goto out; + } err = shm_checkid(shp,shmid); if(err) goto out_unlock; if(cmd==SHM_LOCK) { - if (!is_file_hugetlb_page(shp->shm_file)) + if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 1); shp->shm_flags |= SHM_LOCKED; } else { - if (!is_file_hugetlb_page(shp->shm_file)) + if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0); shp->shm_flags &= ~SHM_LOCKED; } shm_unlock(shmid); - return err; + goto out; } case IPC_RMID: { @@ -529,13 +542,15 @@ asmlinkage long sys_shmctl (int shmid, i } else shm_destroy (shp); up(&shm_ids.sem); - return err; + goto out; } case IPC_SET: { - if(copy_shmid_from_user (&setbuf, buf, version)) - return -EFAULT; + if(copy_shmid_from_user (&setbuf, buf, version)) { + err = -EFAULT; + goto out; + } down(&shm_ids.sem); shp = shm_lock(shmid); err=-EINVAL; @@ -560,7 +575,8 @@ asmlinkage long sys_shmctl (int shmid, i } default: - return -EINVAL; + err = -EINVAL; + goto out; } err = 0; @@ -568,9 +584,10 @@ out_unlock_up: shm_unlock(shmid); out_up: up(&shm_ids.sem); - return err; + goto out; out_unlock: shm_unlock(shmid); +out: return err; } @@ -590,10 +607,10 @@ asmlinkage long sys_shmat (int shmid, ch int acc_mode; void *user_addr; - if (shmid < 0) - return -EINVAL; - - if ((addr = (ulong)shmaddr)) { + if (shmid < 0) { + err = -EINVAL; + goto out; + } else if ((addr = (ulong)shmaddr)) { if (addr & (SHMLBA-1)) { if (shmflg & SHM_RND) addr &= ~(SHMLBA-1); /* round down */ @@ -623,16 +640,19 @@ asmlinkage long sys_shmat (int shmid, ch * additional creator id... */ shp = shm_lock(shmid); - if(shp == NULL) - return -EINVAL; + if(shp == NULL) { + err = -EINVAL; + goto out; + } err = shm_checkid(shp,shmid); if (err) { shm_unlock(shmid); - return err; + goto out; } if (ipcperms(&shp->shm_perm, acc_mode)) { shm_unlock(shmid); - return -EACCES; + err = -EACCES; + goto out; } file = shp->shm_file; size = file->f_dentry->d_inode->i_size; @@ -673,8 +693,8 @@ invalid: err = 0; if (IS_ERR(user_addr)) err = PTR_ERR(user_addr); +out: return err; - } /* @@ -697,10 +717,8 @@ asmlinkage long sys_shmdt (char *shmaddr /* ->vm_pgoff is always 0, see do_mmap() in sys_shmat() */ retval = 0; - if (vma->vm_ops == &shm_vm_ops) + if (vma->vm_ops == &shm_vm_ops || (vma->vm_flags & VM_HUGETLB)) do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); - else if (is_vm_hugetlb_page(vma)) - free_hugepages(vma); else retval = -EINVAL; out: diff -urpN mm3-2.5.42/mm/memory.c hugetlbfs-2.5.42/mm/memory.c --- mm3-2.5.42/mm/memory.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/memory.c 2002-10-17 20:22:41.000000000 -0700 @@ -499,8 +499,8 @@ int share_page_range(struct mm_struct *d unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; - if (is_vm_hugetlb_page(vma)) - return copy_hugetlb_page_range(dst, src, vma); + if (vma->vm_flags & VM_HUGETLB) + return copy_hugepage_range(dst, src, vma); src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; @@ -601,8 +601,8 @@ int copy_page_range(struct mm_struct *ds unsigned long end = vma->vm_end; unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; - if (is_vm_hugetlb_page(vma)) - return copy_hugetlb_page_range(dst, src, vma); + if (vma->vm_flags & VM_HUGETLB) + return copy_hugepage_range(dst, src, vma); src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; @@ -863,6 +863,11 @@ void unmap_page_range(mmu_gather_t *tlb, { pgd_t * dir; + if (vma->vm_flags & VM_HUGETLB) { + unmap_hugepage_range(vma, address, end); + return; + } + BUG_ON(address >= end); dir = pgd_offset(vma->vm_mm, address); @@ -902,6 +907,11 @@ void zap_page_range(struct vm_area_struc mmu_gather_t *tlb; unsigned long end, block; + if (vma->vm_flags & VM_HUGETLB) { + zap_hugepage_range(vma, address, size); + return; + } + spin_lock(&mm->page_table_lock); #ifdef CONFIG_SHAREPTE @@ -948,10 +958,6 @@ void unmap_all_pages(struct mm_struct *m goto out; mm->map_count--; - if (is_vm_hugetlb_page(vma)) { - vma->vm_ops->close(vma); - goto next_vma; - } address = vma->vm_start; end = ((address + PGDIR_SIZE) & PGDIR_MASK); @@ -983,16 +989,11 @@ skip_pmd: pmd++; address = (address + PMD_SIZE) & PMD_MASK; if (address >= vma->vm_end) { -next_vma: vma = vma->vm_next; if (!vma) goto out; mm->map_count--; - if (is_vm_hugetlb_page(vma)) { - vma->vm_ops->close(vma); - goto next_vma; - } address = vma->vm_start; end = ((address + PGDIR_SIZE) & PGDIR_MASK); @@ -1086,9 +1087,8 @@ int get_user_pages(struct task_struct *t || !(flags & vma->vm_flags)) return i ? : -EFAULT; - if (is_vm_hugetlb_page(vma)) { - i = follow_hugetlb_page(mm, vma, pages, vmas, - &start, &len, i); + if (vma->vm_flags & VM_HUGETLB) { + i = follow_hugepage(mm, vma, pages, vmas, &start, &len, i); continue; } spin_lock(&mm->page_table_lock); diff -urpN mm3-2.5.42/mm/mmap.c hugetlbfs-2.5.42/mm/mmap.c --- mm3-2.5.42/mm/mmap.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/mmap.c 2002-10-17 19:47:00.000000000 -0700 @@ -947,7 +947,7 @@ no_mmaps: * By the time this function is called, the area struct has been * removed from the process mapping list. */ -static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) +void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) { size_t len = area->vm_end - area->vm_start; @@ -1036,14 +1036,10 @@ static struct vm_area_struct *touched_by touched = NULL; do { struct vm_area_struct *next = mpnt->vm_next; - if (!(is_vm_hugetlb_page(mpnt))) { - mpnt->vm_next = touched; - touched = mpnt; - rb_erase(&mpnt->vm_rb, &mm->mm_rb); - mm->map_count--; - } - else - free_hugepages(mpnt); + mpnt->vm_next = touched; + touched = mpnt; + rb_erase(&mpnt->vm_rb, &mm->mm_rb); + mm->map_count--; mpnt = next; } while (mpnt && mpnt->vm_start < end); *npp = mpnt; diff -urpN mm3-2.5.42/mm/mprotect.c hugetlbfs-2.5.42/mm/mprotect.c --- mm3-2.5.42/mm/mprotect.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/mprotect.c 2002-10-17 20:11:22.000000000 -0700 @@ -276,7 +276,7 @@ sys_mprotect(unsigned long start, size_t /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ - if (is_vm_hugetlb_page(vma)) { + if (vma->vm_flags & VM_HUGETLB) { error = -EACCES; goto out; } diff -urpN mm3-2.5.42/mm/mremap.c hugetlbfs-2.5.42/mm/mremap.c --- mm3-2.5.42/mm/mremap.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/mremap.c 2002-10-17 20:11:38.000000000 -0700 @@ -340,7 +340,7 @@ unsigned long do_mremap(unsigned long ad vma = find_vma(current->mm, addr); if (!vma || vma->vm_start > addr) goto out; - if (is_vm_hugetlb_page(vma)) { + if (vma->vm_flags & VM_HUGETLB) { ret = -EINVAL; goto out; } diff -urpN mm3-2.5.42/mm/rmap.c hugetlbfs-2.5.42/mm/rmap.c --- mm3-2.5.42/mm/rmap.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/rmap.c 2002-10-15 16:44:25.000000000 -0700 @@ -14,11 +14,11 @@ /* * Locking: * - the page->pte.chain is protected by the PG_chainlock bit, - * which nests within the zone->lru_lock, then the - * mm->page_table_lock, and then the page lock. + * which nests within the zone->lru_lock, then the pte_page_lock, + * and then the page lock. * - because swapout locking is opposite to the locking order * in the page fault path, the swapout path uses trylocks - * on the mm->page_table_lock + * on the pte_page_lock. */ #include #include @@ -280,7 +280,7 @@ void pgtable_remove_rmap(struct page * p * @ptep: the page table entry mapping this page * * Add a new pte reverse mapping to a page. - * The caller needs to hold the mm->page_table_lock. + * The caller needs to hold the pte_page_lock. */ void page_add_rmap(struct page * page, pte_t * ptep) { @@ -377,7 +377,7 @@ out: * Removes the reverse mapping from the pte_chain of the page, * after that the caller can clear the page table entry and free * the page. - * Caller needs to hold the mm->page_table_lock. + * Caller needs to hold the pte_page_lock. */ void page_remove_rmap(struct page * page, pte_t * ptep) { @@ -560,7 +560,7 @@ void increment_rss(struct page *ptepage) * zone->lru_lock page_launder() * page lock page_launder(), trylock * pte_chain_lock page_launder() - * mm->page_table_lock try_to_unmap_one(), trylock + * pte_page_lock try_to_unmap_one(), trylock */ static int FASTCALL(try_to_unmap_one(struct page *, pte_addr_t)); static int try_to_unmap_one(struct page * page, pte_addr_t paddr) @@ -571,7 +571,10 @@ static int try_to_unmap_one(struct page unsigned long address = ptep_to_address(ptep); int ret; - pte_page_lock(ptepage); + if (!pte_page_trylock(ptepage)) { + rmap_ptep_unmap(ptep); + return SWAP_AGAIN; + } ret = pgtable_check_mlocked(ptepage, address); if (ret != SWAP_SUCCESS) diff -urpN mm3-2.5.42/mm/vmscan.c hugetlbfs-2.5.42/mm/vmscan.c --- mm3-2.5.42/mm/vmscan.c 2002-10-15 09:51:09.000000000 -0700 +++ hugetlbfs-2.5.42/mm/vmscan.c 2002-10-15 16:44:19.000000000 -0700 @@ -542,10 +542,13 @@ refill_inactive_zone(struct zone *zone, long mapped_ratio; long distress; long swap_tendency; + int max_scan; lru_add_drain(); spin_lock_irq(&zone->lru_lock); - while (nr_pages && !list_empty(&zone->active_list)) { + max_scan = zone->nr_active; + while (max_scan && nr_pages && !list_empty(&zone->active_list)) { + max_scan--; page = list_entry(zone->active_list.prev, struct page, lru); prefetchw_prev_lru_page(page, &zone->active_list, flags); if (!TestClearPageLRU(page))