# This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.636 -> 1.643 # include/linux/mm.h 1.85 -> 1.87 # ipc/shm.c 1.17 -> 1.18 # fs/Makefile 1.36 -> 1.37 # fs/Config.in 1.35 -> 1.36 # include/linux/shm.h 1.3 -> 1.4 # arch/i386/mm/hugetlbpage.c 1.2 -> 1.5 # (new) -> 1.5 fs/hugetlbfs/inode.c # (new) -> 1.2 fs/hugetlbfs/Makefile # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 02/09/28 wli@elm3b17.eng.beaverton.ibm.com 1.637 # inode.c: # Numerous fixes. # Makefile, inode.c: # new file # -------------------------------------------- # 02/09/28 wli@elm3b17.eng.beaverton.ibm.com 1.638 # hugetlbpage.c: # Add hugetlb_prefault_vma() # -------------------------------------------- # 02/09/29 wli@elm3b17.eng.beaverton.ibm.com 1.639 # inode.c: # hugetlbfs fixes. # Makefile: # hugetlbfs makefile update # Config.in: # Add in a hugetlbfs option. # Makefile: # Add in hugetlbfs. # hugetlbpage.c: # Lots of prefaulter fixes. # -------------------------------------------- # 02/09/29 wli@elm3b17.eng.beaverton.ibm.com 1.640 # hugetlbpage.c, inode.c: # Remove debugging code. # -------------------------------------------- # 02/09/30 wli@elm3b17.eng.beaverton.ibm.com 1.641 # mm.h: # Declare hugetlb_prefault() and hugetlb_vm_ops. # -------------------------------------------- # 02/09/30 wli@elm3b17.eng.beaverton.ibm.com 1.642 # mm.h: # Declare hugetlb_zero_setup() & others. # shm.h: # Call hugetlb_zero_setup() from newseg() when SHM_HUGETLB flag is set. # inode.c: # Add hugetlb_zero_setup(). # -------------------------------------------- # 02/09/30 wli@elm3b17.eng.beaverton.ibm.com 1.643 # shm.c: # Call out to hugetlb_zero_setup() in newseg(). # -------------------------------------------- # diff --minimal -Nru a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c --- a/arch/i386/mm/hugetlbpage.c Mon Sep 30 13:19:13 2002 +++ b/arch/i386/mm/hugetlbpage.c Mon Sep 30 13:19:13 2002 @@ -17,7 +17,7 @@ #include #include -static struct vm_operations_struct hugetlb_vm_ops; +struct vm_operations_struct hugetlb_vm_ops; struct list_head htlbpage_freelist; spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED; extern long htlbpagemem; @@ -44,24 +44,22 @@ static struct page * alloc_hugetlb_page(void) { - struct list_head *curr, *head; + int i; struct page *page; spin_lock(&htlbpage_lock); - - head = &htlbpage_freelist; - curr = head->next; - - if (curr == head) { + if (list_empty(&htlbpage_freelist)) { spin_unlock(&htlbpage_lock); return NULL; } - page = list_entry(curr, struct page, list); - list_del(curr); + + page = list_entry(htlbpage_freelist.next, struct page, list); + list_del(&page->list); htlbpagemem--; spin_unlock(&htlbpage_lock); set_page_count(page, 1); - memset(page_address(page), 0, HPAGE_SIZE); + for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) + clear_highpage(&page[i]); return page; } @@ -459,6 +457,46 @@ return retval; } +int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) +{ + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret = 0; + + BUG_ON(vma->vm_start & ~HPAGE_MASK); + BUG_ON(vma->vm_end & ~HPAGE_MASK); + + spin_lock(&mm->page_table_lock); + for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { + unsigned long idx; + pte_t *pte = huge_pte_alloc(mm, addr); + struct page *page; + + if (!pte) { + ret = -ENOMEM; + goto out; + } + if (!pte_none(*pte)) + continue; + + idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) + + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); + page = find_get_page(mapping, idx); + if (!page) { + page = alloc_hugetlb_page(); + if (!page) { + ret = -ENOMEM; + goto out; + } + add_to_page_cache(page, mapping, idx); + } + set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); + } +out: + spin_unlock(&mm->page_table_lock); + return ret; +} + static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot, int flag) @@ -540,6 +578,13 @@ return (int) htlbzone_pages; } -static struct vm_operations_struct hugetlb_vm_ops = { +static struct page * hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int unused) +{ + BUG(); + return NULL; +} + +struct vm_operations_struct hugetlb_vm_ops = { .close = zap_hugetlb_resources, + .nopage = hugetlb_nopage, }; diff --minimal -Nru a/fs/Config.in b/fs/Config.in --- a/fs/Config.in Mon Sep 30 13:19:13 2002 +++ b/fs/Config.in Mon Sep 30 13:19:13 2002 @@ -56,6 +56,11 @@ bool 'Virtual memory file system support (former shm fs)' CONFIG_TMPFS define_bool CONFIG_RAMFS y +if [ "$CONFIG_HUGETLB_PAGE" = "y" ] ; then + bool 'HugeTLB file system support' CONFIG_HUGETLBFS + define_bool CONFIG_HUGETLBFS y +fi + tristate 'ISO 9660 CDROM file system support' CONFIG_ISO9660_FS dep_mbool ' Microsoft Joliet CDROM extensions' CONFIG_JOLIET $CONFIG_ISO9660_FS dep_mbool ' Transparent decompression extension' CONFIG_ZISOFS $CONFIG_ISO9660_FS diff --minimal -Nru a/fs/Makefile b/fs/Makefile --- a/fs/Makefile Mon Sep 30 13:19:13 2002 +++ b/fs/Makefile Mon Sep 30 13:19:13 2002 @@ -46,6 +46,7 @@ obj-$(CONFIG_EXT2_FS) += ext2/ obj-$(CONFIG_CRAMFS) += cramfs/ obj-$(CONFIG_RAMFS) += ramfs/ +obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_INTERMEZZO_FS) += intermezzo/ obj-$(CONFIG_MINIX_FS) += minix/ diff --minimal -Nru a/fs/hugetlbfs/Makefile b/fs/hugetlbfs/Makefile --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/fs/hugetlbfs/Makefile Mon Sep 30 13:19:13 2002 @@ -0,0 +1,9 @@ +# +# Makefile for the linux ramfs routines. +# + +obj-$(CONFIG_HUGETLBFS) += hugetlbfs.o + +hugetlbfs-objs := inode.o + +include $(TOPDIR)/Rules.make diff --minimal -Nru a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/fs/hugetlbfs/inode.c Mon Sep 30 13:19:13 2002 @@ -0,0 +1,419 @@ +/* + * Resizable simple ram filesystem for Linux. + * + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * + * Usage limits added by David Gibson, Linuxcare Australia. + * This file is released under the GPL. + */ + +/* + * NOTE! This filesystem is probably most useful + * not as a real filesystem, but as an example of + * how virtual filesystems can be written. + * + * It doesn't get much simpler than this. Consider + * that this file implements the full semantics of + * a POSIX-compliant read-write filesystem. + * + * Note in particular how the filesystem does not + * need to implement any data structures of its own + * to keep track of the virtual data: using the VFS + * caches is sufficient. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* some random number */ +#define HUGETLBFS_MAGIC 0x958458f6 + +static struct super_operations hugetlbfs_ops; +static struct address_space_operations hugetlbfs_aops; +struct file_operations hugetlbfs_file_operations; +static struct inode_operations hugetlbfs_dir_inode_operations; + +static struct backing_dev_info hugetlbfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .memory_backed = 1, /* Does not contribute to dirty memory */ +}; + +static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct inode *inode =file->f_dentry->d_inode; + struct address_space *mapping = inode->i_mapping; + int ret; + + down(&inode->i_sem); + + UPDATE_ATIME(inode); + vma->vm_flags |= VM_HUGETLB | VM_RESERVED; + vma->vm_ops = &hugetlb_vm_ops; + ret = hugetlb_prefault(mapping, vma); + + up(&inode->i_sem); + + return ret; +} + +/* + * Read a page. Again trivial. If it didn't already exist + * in the page cache, it is zero-filled. + */ +static int hugetlbfs_readpage(struct file *file, struct page * page) +{ + return -EINVAL; +} + +static int hugetlbfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + return -EINVAL; +} + +static int hugetlbfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + return -EINVAL; +} + +struct inode *hugetlbfs_get_inode(struct super_block *sb, int mode, int dev) +{ + struct inode * inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_rdev = NODEV; + inode->i_mapping->a_ops = &hugetlbfs_aops; + inode->i_mapping->backing_dev_info = &hugetlbfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_fop = &hugetlbfs_file_operations; + break; + case S_IFDIR: + inode->i_op = &hugetlbfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inode->i_nlink++; + break; + case S_IFLNK: + inode->i_op = &page_symlink_inode_operations; + break; + } + } + return inode; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +/* SMP-safe */ +static int hugetlbfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) +{ + struct inode * inode = hugetlbfs_get_inode(dir->i_sb, mode, dev); + int error = -ENOSPC; + + if (inode) { + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + } + return error; +} + +static int hugetlbfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); + if (!retval) + dir->i_nlink++; + return retval; +} + +static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode) +{ + return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); +} + +/* + * Link a file.. + */ +static int hugetlbfs_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry) +{ + struct inode *inode = old_dentry->d_inode; + + inode->i_nlink++; + atomic_inc(&inode->i_count); /* New dentry reference */ + dget(dentry); /* Extra pinning count for the created dentry */ + d_instantiate(dentry, inode); + return 0; +} + +static inline int hugetlbfs_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +/* + * Check that a directory is empty (this works + * for regular files too, they'll just always be + * considered empty..). + * + * Note that an empty directory can still have + * children, they just all have to be negative.. + */ +static int hugetlbfs_empty(struct dentry *dentry) +{ + struct list_head *list; + + spin_lock(&dcache_lock); + list = dentry->d_subdirs.next; + + while (list != &dentry->d_subdirs) { + struct dentry *de = list_entry(list, struct dentry, d_child); + + if (hugetlbfs_positive(de)) { + spin_unlock(&dcache_lock); + return 0; + } + list = list->next; + } + spin_unlock(&dcache_lock); + return 1; +} + +/* + * Unlink a hugetlbfs entry + */ +static int hugetlbfs_unlink(struct inode * dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + + inode->i_nlink--; + dput(dentry); /* Undo the count from "create" - this does all the work */ + return 0; +} + +static int hugetlbfs_rmdir(struct inode * dir, struct dentry *dentry) +{ + int retval = -ENOTEMPTY; + + if (hugetlbfs_empty(dentry)) { + dentry->d_inode->i_nlink--; + hugetlbfs_unlink(dir, dentry); + dir->i_nlink--; + retval = 0; + } + return retval; +} + +/* + * The VFS layer already does all the dentry stuff for rename, + * we just have to decrement the usage count for the target if + * it exists so that the VFS layer correctly free's it when it + * gets overwritten. + */ +static int hugetlbfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry) +{ + int error = -ENOTEMPTY; + + if (hugetlbfs_empty(new_dentry)) { + struct inode *inode = new_dentry->d_inode; + if (inode) { + inode->i_nlink--; + dput(new_dentry); + } + if (S_ISDIR(old_dentry->d_inode->i_mode)) { + old_dir->i_nlink--; + new_dir->i_nlink++; + } + error = 0; + } + return error; +} + +static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +{ + struct inode *inode; + int error = -ENOSPC; + + inode = hugetlbfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); + if (inode) { + int l = strlen(symname)+1; + error = page_symlink(inode, symname, l); + if (!error) { + d_instantiate(dentry, inode); + dget(dentry); + } else + iput(inode); + } + return error; +} + +static int hugetlbfs_sync_file(struct file * file, struct dentry *dentry, int datasync) +{ + return 0; +} + +static struct address_space_operations hugetlbfs_aops = { + readpage: hugetlbfs_readpage, + writepage: fail_writepage, + prepare_write: hugetlbfs_prepare_write, + commit_write: hugetlbfs_commit_write +}; + +struct file_operations hugetlbfs_file_operations = { + read: generic_file_read, + write: generic_file_write, + mmap: hugetlbfs_file_mmap, + fsync: hugetlbfs_sync_file, + sendfile: generic_file_sendfile, +}; + +static struct inode_operations hugetlbfs_dir_inode_operations = { + create: hugetlbfs_create, + lookup: simple_lookup, + link: hugetlbfs_link, + unlink: hugetlbfs_unlink, + symlink: hugetlbfs_symlink, + mkdir: hugetlbfs_mkdir, + rmdir: hugetlbfs_rmdir, + mknod: hugetlbfs_mknod, + rename: hugetlbfs_rename, +}; + +static struct super_operations hugetlbfs_ops = { + statfs: simple_statfs, + drop_inode: generic_delete_inode, +}; + +static int hugetlbfs_fill_super(struct super_block * sb, void * data, int silent) +{ + struct inode * inode; + struct dentry * root; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = HUGETLBFS_MAGIC; + sb->s_op = &hugetlbfs_ops; + inode = hugetlbfs_get_inode(sb, S_IFDIR | 0755, 0); + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + return 0; +} + +static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type, + int flags, char *dev_name, void *data) +{ + return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super); +} + +static struct file_system_type hugetlbfs_fs_type = { + name: "hugetlbfs", + get_sb: hugetlbfs_get_sb, + kill_sb: kill_litter_super, +}; + +static struct vfsmount *hugetlbfs_vfsmount; + +static atomic_t hugetlbfs_counter = ATOMIC_INIT(0); + +struct file *hugetlb_zero_setup(size_t size) +{ + int error, n; + struct file *file; + struct inode *inode; + struct dentry *dentry, *root; + struct qstr quick_string; + char buf[16]; + + n = atomic_read(&hugetlbfs_counter); + atomic_inc(&hugetlbfs_counter); + + root = hugetlbfs_vfsmount->mnt_root; + snprintf(buf, 16, "%d", n); + quick_string.name = buf; + quick_string.len = strlen(quick_string.name); + quick_string.hash = 0; + dentry = d_alloc(root, &quick_string); + if (!dentry) + return ERR_PTR(-ENOMEM); + + error = -ENFILE; + file = get_empty_filp(); + if (!file) + goto out_dentry; + + error = -ENOSPC; + inode = hugetlbfs_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); + if (!inode) + goto out_file; + + d_instantiate(dentry, inode); + inode->i_size = size; + inode->i_nlink = 0; + file->f_vfsmnt = mntget(hugetlbfs_vfsmount); + file->f_dentry = dentry; + file->f_op = &hugetlbfs_file_operations; + file->f_mode = FMODE_WRITE | FMODE_READ; + return file; + +out_file: + put_filp(file); +out_dentry: + dput(dentry); + return ERR_PTR(error); +} + +static int __init init_hugetlbfs_fs(void) +{ + int error; + struct vfsmount *vfsmount; + + error = register_filesystem(&hugetlbfs_fs_type); + if (error) + return error; + + vfsmount = kern_mount(&hugetlbfs_fs_type); + + if (!IS_ERR(vfsmount)) { + hugetlbfs_vfsmount = vfsmount; + return 0; + } + + error = PTR_ERR(vfsmount); + return error; +} + +static void __exit exit_hugetlbfs_fs(void) +{ + unregister_filesystem(&hugetlbfs_fs_type); +} + +module_init(init_hugetlbfs_fs) +module_exit(exit_hugetlbfs_fs) + +MODULE_LICENSE("GPL"); diff --minimal -Nru a/include/linux/mm.h b/include/linux/mm.h --- a/include/linux/mm.h Mon Sep 30 13:19:13 2002 +++ b/include/linux/mm.h Mon Sep 30 13:19:13 2002 @@ -386,7 +386,9 @@ extern int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); extern int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); extern int free_hugepages(struct vm_area_struct *); - +extern int hugetlb_prefault(struct address_space *, struct vm_area_struct *); +extern struct file *hugetlb_zero_setup(size_t); +extern struct file_operations hugetlbfs_file_operations; #else #define is_vm_hugetlb_page(vma) (0) #define follow_hugetlb_page(mm, vma, pages, vmas, start, len, i) (0) @@ -394,6 +396,7 @@ #define free_hugepages(mpnt) do { } while(0) #endif +extern struct vm_operations_struct hugetlb_vm_ops; /* * If the mapping doesn't provide a set_page_dirty a_op, then diff --minimal -Nru a/include/linux/shm.h b/include/linux/shm.h --- a/include/linux/shm.h Mon Sep 30 13:19:13 2002 +++ b/include/linux/shm.h Mon Sep 30 13:19:13 2002 @@ -88,6 +88,7 @@ /* shm_mode upper byte flags */ #define SHM_DEST 01000 /* segment will be destroyed on last detach */ #define SHM_LOCKED 02000 /* segment will not be swapped */ +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ asmlinkage long sys_shmget (key_t key, size_t size, int flag); asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, unsigned long *addr); diff --minimal -Nru a/ipc/shm.c b/ipc/shm.c --- a/ipc/shm.c Mon Sep 30 13:19:13 2002 +++ b/ipc/shm.c Mon Sep 30 13:19:13 2002 @@ -185,8 +185,12 @@ shp->shm_perm.key = key; shp->shm_flags = (shmflg & S_IRWXUGO); - sprintf (name, "SYSV%08x", key); - file = shmem_file_setup(name, size, VM_ACCOUNT); + if (shmflg & SHM_HUGETLB) + file = hugetlb_zero_setup(size); + else { + sprintf (name, "SYSV%08x", key); + file = shmem_file_setup(name, size, VM_ACCOUNT); + } error = PTR_ERR(file); if (IS_ERR(file)) goto no_file; @@ -205,7 +209,10 @@ shp->id = shm_buildid(id,shp->shm_perm.seq); shp->shm_file = file; file->f_dentry->d_inode->i_ino = shp->id; - file->f_op = &shm_file_operations; + if (shmflg & SHM_HUGETLB) + file->f_op = &hugetlbfs_file_operations; + else + file->f_op = &shm_file_operations; shm_tot += numpages; shm_unlock (id); return shp->id;