From: James Morris Below is an updated version of the patch which moves duplicated transaction-based file operation code into libfs. Since the last post, the patch has been through a couple of iterations with Al, who suggested a number of cleanups including locking and interface simplification. For filesystem writers, the interface is now much simpler. The simple_transaction_get() helper should be part of the file op write method. This safely obtains the transaction request data during write(), allocates a page for it and stores it there. The data is returned to the caller for potential further processing, which then makes it available for the next read() call via simple_transaction_set(). See the selinuxfs and nfsctl code for examples of use. Signed-off-by: James Morris Signed-off-by: Andrew Morton --- 25-akpm/fs/libfs.c | 55 ++++++++++++++++++ 25-akpm/fs/nfsd/nfsctl.c | 96 ++++---------------------------- 25-akpm/include/linux/fs.h | 33 +++++++++++ 25-akpm/security/selinux/selinuxfs.c | 103 +++++------------------------------ 4 files changed, 117 insertions(+), 170 deletions(-) diff -puN fs/libfs.c~libfs-move-transaction-file-ops-into-libfs fs/libfs.c --- 25/fs/libfs.c~libfs-move-transaction-file-ops-into-libfs 2004-08-15 22:49:58.370729312 -0700 +++ 25-akpm/fs/libfs.c 2004-08-15 22:49:58.378728096 -0700 @@ -469,6 +469,58 @@ ssize_t simple_read_from_buffer(void __u return count; } +/* + * Transaction based IO. + * The file expects a single write which triggers the transaction, and then + * possibly a read which collects the result - which is stored in a + * file-local buffer. + */ +char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) +{ + struct simple_transaction_argresp *ar; + static spinlock_t simple_transaction_lock = SPIN_LOCK_UNLOCKED; + + if (size > SIMPLE_TRANSACTION_LIMIT - 1) + return ERR_PTR(-EFBIG); + + ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL); + if (!ar) + return ERR_PTR(-ENOMEM); + + spin_lock(&simple_transaction_lock); + + /* only one write allowed per open */ + if (file->private_data) { + spin_unlock(&simple_transaction_lock); + free_page((unsigned long)ar); + return ERR_PTR(-EBUSY); + } + + file->private_data = ar; + + spin_unlock(&simple_transaction_lock); + + if (copy_from_user(ar->data, buf, size)) + return ERR_PTR(-EFAULT); + + return ar->data; +} + +ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) +{ + struct simple_transaction_argresp *ar = file->private_data; + + if (!ar) + return 0; + return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); +} + +int simple_transaction_release(struct inode *inode, struct file *file) +{ + free_page((unsigned long)file->private_data); + return 0; +} + EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); @@ -492,3 +544,6 @@ EXPORT_SYMBOL(simple_statfs); EXPORT_SYMBOL(simple_sync_file); EXPORT_SYMBOL(simple_unlink); EXPORT_SYMBOL(simple_read_from_buffer); +EXPORT_SYMBOL(simple_transaction_get); +EXPORT_SYMBOL(simple_transaction_read); +EXPORT_SYMBOL(simple_transaction_release); diff -puN fs/nfsd/nfsctl.c~libfs-move-transaction-file-ops-into-libfs fs/nfsd/nfsctl.c --- 25/fs/nfsd/nfsctl.c~libfs-move-transaction-file-ops-into-libfs 2004-08-15 22:49:58.372729008 -0700 +++ 25-akpm/fs/nfsd/nfsctl.c 2004-08-15 22:49:58.379727944 -0700 @@ -80,101 +80,31 @@ static ssize_t (*write_op[])(struct file [NFSD_Leasetime] = write_leasetime, }; -/* an argresp is stored in an allocated page and holds the - * size of the argument or response, along with its content - */ -struct argresp { - ssize_t size; - char data[0]; -}; - -/* - * transaction based IO methods. - * The file expects a single write which triggers the transaction, and then - * possibly a read which collects the result - which is stored in a - * file-local buffer. - */ -static ssize_t TA_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) +static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { ino_t ino = file->f_dentry->d_inode->i_ino; - struct argresp *ar; - ssize_t rv = 0; + char *data; + ssize_t rv; if (ino >= sizeof(write_op)/sizeof(write_op[0]) || !write_op[ino]) return -EINVAL; - if (file->private_data) - return -EINVAL; /* only one write allowed per open */ - if (size > PAGE_SIZE - sizeof(struct argresp)) - return -EFBIG; - ar = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!ar) - return -ENOMEM; - ar->size = 0; - down(&file->f_dentry->d_inode->i_sem); - if (file->private_data) - rv = -EINVAL; - else - file->private_data = ar; - up(&file->f_dentry->d_inode->i_sem); - if (rv) { - kfree(ar); - return rv; - } - if (copy_from_user(ar->data, buf, size)) - return -EFAULT; - - rv = write_op[ino](file, ar->data, size); + data = simple_transaction_get(file, buf, size); + if (IS_ERR(data)) + return PTR_ERR(data); + + rv = write_op[ino](file, data, size); if (rv>0) { - ar->size = rv; + simple_transaction_set(file, rv); rv = size; } return rv; } - -static ssize_t TA_read(struct file *file, char __user *buf, size_t size, loff_t *pos) -{ - struct argresp *ar; - ssize_t rv = 0; - - if (file->private_data == NULL) - rv = TA_write(file, buf, 0, pos); - if (rv < 0) - return rv; - - ar = file->private_data; - if (!ar) - return 0; - if (*pos >= ar->size) - return 0; - if (*pos + size > ar->size) - size = ar->size - *pos; - if (copy_to_user(buf, ar->data + *pos, size)) - return -EFAULT; - *pos += size; - return size; -} - -static int TA_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return 0; -} - -static int TA_release(struct inode *inode, struct file *file) -{ - void *p = file->private_data; - file->private_data = NULL; - kfree(p); - return 0; -} - static struct file_operations transaction_ops = { - .write = TA_write, - .read = TA_read, - .open = TA_open, - .release = TA_release, + .write = nfsctl_transaction_write, + .read = simple_transaction_read, + .release = simple_transaction_release, }; extern struct seq_operations nfs_exports_op; @@ -366,7 +296,7 @@ static ssize_t write_filehandle(struct f if (len) return len; - mesg = buf; len = PAGE_SIZE-sizeof(struct argresp); + mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); mesg[-1] = '\n'; return mesg - buf; diff -puN include/linux/fs.h~libfs-move-transaction-file-ops-into-libfs include/linux/fs.h --- 25/include/linux/fs.h~libfs-move-transaction-file-ops-into-libfs 2004-08-15 22:49:58.373728856 -0700 +++ 25-akpm/include/linux/fs.h 2004-08-15 22:49:58.381727640 -0700 @@ -1578,6 +1578,39 @@ static inline ino_t parent_ino(struct de /* kernel/fork.c */ extern int unshare_files(void); +/* Transaction based IO helpers */ + +/* + * An argresp is stored in an allocated page and holds the + * size of the argument or response, along with its content + */ +struct simple_transaction_argresp { + ssize_t size; + char data[0]; +}; + +#define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) + +char *simple_transaction_get(struct file *file, const char __user *buf, + size_t size); +ssize_t simple_transaction_read(struct file *file, char __user *buf, + size_t size, loff_t *pos); +int simple_transaction_release(struct inode *inode, struct file *file); + +static inline void simple_transaction_set(struct file *file, size_t n) +{ + struct simple_transaction_argresp *ar = file->private_data; + + BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); + + /* + * The barrier ensures that ar->size will really remain zero until + * ar->data is ready for reading. + */ + smp_mb(); + ar->size = n; +} + #ifdef CONFIG_SECURITY static inline char *alloc_secdata(void) { diff -puN security/selinux/selinuxfs.c~libfs-move-transaction-file-ops-into-libfs security/selinux/selinuxfs.c --- 25/security/selinux/selinuxfs.c~libfs-move-transaction-file-ops-into-libfs 2004-08-15 22:49:58.375728552 -0700 +++ 25-akpm/security/selinux/selinuxfs.c 2004-08-15 22:49:58.383727336 -0700 @@ -390,103 +390,31 @@ static ssize_t (*write_op[])(struct file [SEL_USER] = sel_write_user, }; -/* an argresp is stored in an allocated page and holds the - * size of the argument or response, along with its content - */ -struct argresp { - ssize_t size; - char data[0]; -}; - -#define PAYLOAD_SIZE (PAGE_SIZE - sizeof(struct argresp)) - -/* - * transaction based IO methods. - * The file expects a single write which triggers the transaction, and then - * possibly a read which collects the result - which is stored in a - * file-local buffer. - */ -static ssize_t TA_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) +static ssize_t selinux_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { ino_t ino = file->f_dentry->d_inode->i_ino; - struct argresp *ar; - ssize_t rv = 0; + char *data; + ssize_t rv; if (ino >= sizeof(write_op)/sizeof(write_op[0]) || !write_op[ino]) return -EINVAL; - if (file->private_data) - return -EINVAL; /* only one write allowed per open */ - if (size > PAYLOAD_SIZE - 1) /* allow one byte for null terminator */ - return -EFBIG; - ar = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!ar) - return -ENOMEM; - memset(ar, 0, PAGE_SIZE); /* clear buffer, particularly last byte */ - ar->size = 0; - down(&file->f_dentry->d_inode->i_sem); - if (file->private_data) - rv = -EINVAL; - else - file->private_data = ar; - up(&file->f_dentry->d_inode->i_sem); - if (rv) { - kfree(ar); - return rv; - } - if (copy_from_user(ar->data, buf, size)) - return -EFAULT; + data = simple_transaction_get(file, buf, size); + if (IS_ERR(data)) + return PTR_ERR(data); - rv = write_op[ino](file, ar->data, size); + rv = write_op[ino](file, data, size); if (rv>0) { - ar->size = rv; + simple_transaction_set(file, rv); rv = size; } return rv; } -static ssize_t TA_read(struct file *file, char __user *buf, size_t size, loff_t *pos) -{ - struct argresp *ar; - ssize_t rv = 0; - - if (file->private_data == NULL) - rv = TA_write(file, buf, 0, pos); - if (rv < 0) - return rv; - - ar = file->private_data; - if (!ar) - return 0; - if (*pos >= ar->size) - return 0; - if (*pos + size > ar->size) - size = ar->size - *pos; - if (copy_to_user(buf, ar->data + *pos, size)) - return -EFAULT; - *pos += size; - return size; -} - -static int TA_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return 0; -} - -static int TA_release(struct inode *inode, struct file *file) -{ - void *p = file->private_data; - file->private_data = NULL; - kfree(p); - return 0; -} - static struct file_operations transaction_ops = { - .write = TA_write, - .read = TA_read, - .open = TA_open, - .release = TA_release, + .write = selinux_transaction_write, + .read = simple_transaction_read, + .release = simple_transaction_release, }; /* @@ -534,7 +462,8 @@ static ssize_t sel_write_access(struct f if (length < 0) goto out2; - length = scnprintf(buf, PAYLOAD_SIZE, "%x %x %x %x %u", + length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, + "%x %x %x %x %u", avd.allowed, avd.decided, avd.auditallow, avd.auditdeny, avd.seqno); @@ -588,7 +517,7 @@ static ssize_t sel_write_create(struct f if (length < 0) goto out2; - if (len > PAYLOAD_SIZE) { + if (len > SIMPLE_TRANSACTION_LIMIT) { printk(KERN_ERR "%s: context size (%u) exceeds payload " "max\n", __FUNCTION__, len); length = -ERANGE; @@ -649,7 +578,7 @@ static ssize_t sel_write_relabel(struct if (length < 0) goto out2; - if (len > PAYLOAD_SIZE) { + if (len > SIMPLE_TRANSACTION_LIMIT) { length = -ERANGE; goto out3; } @@ -709,7 +638,7 @@ static ssize_t sel_write_user(struct fil length = rc; goto out3; } - if ((length + len) >= PAYLOAD_SIZE) { + if ((length + len) >= SIMPLE_TRANSACTION_LIMIT) { kfree(newcon); length = -ERANGE; goto out3; _