From: Manfred Spraul Here are the updates to the patch-mq-03-core: It fixes the bug with the notification descriptors and adds more documentation. What's still missing is a description of the change to signal.c. --- init/Kconfig | 1 ipc/mqueue.c | 141 +++++++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 105 insertions(+), 37 deletions(-) diff -puN init/Kconfig~mq-03-core-update init/Kconfig --- 25/init/Kconfig~mq-03-core-update 2004-02-29 02:12:01.000000000 -0800 +++ 25-akpm/init/Kconfig 2004-02-29 02:12:01.000000000 -0800 @@ -93,6 +93,7 @@ config SYSVIPC config POSIX_MQUEUE bool "POSIX Message Queues" + depends on EXPERIMENTAL ---help--- POSIX variant of message queues is a part of IPC. In POSIX message queues every message has a priority which decides about succession diff -puN ipc/mqueue.c~mq-03-core-update ipc/mqueue.c --- 25/ipc/mqueue.c~mq-03-core-update 2004-02-29 02:12:01.000000000 -0800 +++ 25-akpm/ipc/mqueue.c 2004-02-29 02:12:33.000000000 -0800 @@ -105,7 +105,8 @@ static struct inode *mqueue_get_inode(st inode->i_gid = current->fsgid; inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; - inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME; + inode->i_mtime = inode->i_ctime = inode->i_atime = + CURRENT_TIME; if (S_ISREG(mode)) { struct mqueue_inode_info *info; @@ -166,7 +167,7 @@ static void init_once(void *foo, kmem_ca struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo; if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + SLAB_CTOR_CONSTRUCTOR) inode_init_once(&p->vfs_inode); } @@ -174,7 +175,7 @@ static struct inode *mqueue_alloc_inode( { struct mqueue_inode_info *ei; - ei = (struct mqueue_inode_info *)kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -208,7 +209,8 @@ static void mqueue_delete_inode(struct i spin_unlock(&mq_lock); } -static int mqueue_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +static int mqueue_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) { struct inode *inode; int error; @@ -272,7 +274,7 @@ static void wq_add(struct mqueue_inode_i * sr: SEND or RECV */ static int wq_sleep(struct mqueue_inode_info *info, int sr, - long timeout, struct ext_wait_queue *ewp) + long timeout, struct ext_wait_queue *ewp) { int retval; signed long time; @@ -305,7 +307,6 @@ static int wq_sleep(struct mqueue_inode_ retval = -ETIMEDOUT; break; } - printk(KERN_WARNING "mqueue: Spurious wakeup in wq_sleep()\n"); } list_del(&ewp->list); out_unlock: @@ -314,7 +315,9 @@ out: return retval; } -/* Returns waiting task that should be serviced first or NULL if none exists */ +/* + * Returns waiting task that should be serviced first or NULL if none exists + */ static struct ext_wait_queue *wq_get_first_waiter( struct mqueue_inode_info *info, int sr) { @@ -386,7 +389,8 @@ static long prepare_timeout(const struct long timeout; if (u_arg) { - if (unlikely(copy_from_user(&ts, u_arg, sizeof(struct timespec)))) + if (unlikely(copy_from_user(&ts, u_arg, + sizeof(struct timespec)))) return -EFAULT; if (unlikely(ts.tv_nsec < 0 || ts.tv_sec < 0 @@ -410,7 +414,26 @@ static long prepare_timeout(const struct return timeout; } -static unsigned int mqueue_notify_poll(struct file *filp, struct poll_table_struct *poll_tab) +/* + * File descriptor based notification, intended to be used to implement + * SIGEV_THREAD: + * SIGEV_THREAD means that a notification function should be called in the + * context of a new thread. The kernel can't do that. Therefore mq_notify + * calls with SIGEV_THREAD return a new file descriptor. A user space helper + * must create a new thread and then read from the given file descriptor. + * The read always returns one byte. If it's NOTIFY_WOKENUP, then it must + * call the notification function. If it's NOTIFY_REMOVED, then the + * notification was removed. The file descriptor supports poll, thus one + * supervisor thread can manage multiple message queue notifications. + * + * The implementation must support multiple outstanding notifications: + * It's possible that a new notification is added and signaled before user + * space calls mqueue_notify_read for the previous notification. + * Therefore the notification state is stored in the private_data field of + * the file descriptor. + */ +static unsigned int mqueue_notify_poll(struct file *filp, + struct poll_table_struct *poll_tab) { struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode); int retval; @@ -424,7 +447,8 @@ static unsigned int mqueue_notify_poll(s return retval; } -static ssize_t mqueue_notify_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) +static ssize_t mqueue_notify_read(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) { struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode); char result; @@ -454,6 +478,19 @@ static ssize_t mqueue_notify_read(struct return 1; } +static int mqueue_notify_release(struct inode *inode, struct file *filp) +{ + struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode); + + spin_lock(&info->lock); + if (info->notify_owner && info->notify_filp == filp) + info->notify_owner = 0; + filp->private_data = NP_REMOVED; + spin_unlock(&info->lock); + + return 0; +} + static void remove_notification(struct mqueue_inode_info *info) { if (info->notify.sigev_notify == SIGEV_THREAD) { @@ -467,7 +504,7 @@ static void remove_notification(struct m * Invoked when creating a new queue via sys_mq_open */ static struct file *do_create(struct dentry *dir, struct dentry *dentry, - int oflag, mode_t mode, struct mq_attr __user *u_attr) + int oflag, mode_t mode, struct mq_attr __user *u_attr) { struct file *filp; struct inode *inode; @@ -486,7 +523,8 @@ static struct file *do_create(struct den if (attr.mq_maxmsg > HARD_MSGMAX) return ERR_PTR(-EINVAL); } else { - if (attr.mq_maxmsg > msg_max || attr.mq_msgsize > msgsize_max) + if (attr.mq_maxmsg > msg_max || + attr.mq_msgsize > msgsize_max) return ERR_PTR(-EINVAL); } } else { @@ -520,8 +558,9 @@ static struct file *do_create(struct den /* Opens existing queue */ static struct file *do_open(struct dentry *dentry, int oflag) { +static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, + MAY_READ | MAY_WRITE }; struct file *filp; - static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) return ERR_PTR(-EINVAL); @@ -538,12 +577,12 @@ static struct file *do_open(struct dentr } asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode, - struct mq_attr __user *u_attr) + struct mq_attr __user *u_attr) { struct dentry *dentry; struct file *filp; - char *name; - int fd, error; + char *name; + int fd, error; if (IS_ERR(name = getname(u_name))) return PTR_ERR(name); @@ -562,12 +601,15 @@ asmlinkage long sys_mq_open(const char _ if (oflag & O_CREAT) { if (dentry->d_inode) { /* entry already exists */ - filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) : do_open(dentry, oflag); + filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) : + do_open(dentry, oflag); } else { - filp = do_create(mqueue_mnt->mnt_root, dentry, oflag, mode, u_attr); + filp = do_create(mqueue_mnt->mnt_root, dentry, + oflag, mode, u_attr); } } else - filp = (dentry->d_inode) ? do_open(dentry, oflag) : ERR_PTR(-ENOENT); + filp = (dentry->d_inode) ? do_open(dentry, oflag) : + ERR_PTR(-ENOENT); dput(dentry); @@ -635,8 +677,20 @@ out_unlock: return err; } -/* Pipelined send and receive functions. Do not confuse this with SysV message - * queues terminology. It is little bit different. */ +/* Pipelined send and receive functions. + * + * If a receiver finds no waiting message, then it registers itself in the + * list of waiting receivers. A sender checks that list before adding the new + * message into the message array. If there is a waiting receiver, then it + * bypasses the message array and directly hands the message over to the + * receiver. + * The receiver accepts the message and returns without grabbing the queue + * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers + * are necessary. The same algorithm is used for sysv semaphores, see + * ipc/sem.c fore more details. + * + * The same algorithm is used for senders. + */ /* pipelined_send() - send a message directly to the task waiting in * sys_mq_timedreceive() (without inserting message into a queue). */ @@ -670,7 +724,8 @@ static inline void pipelined_receive(str } asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, - size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout) + size_t msg_len, unsigned int msg_prio, + const struct timespec __user *u_abs_timeout) { struct file *filp; struct inode *inode; @@ -678,7 +733,7 @@ asmlinkage long sys_mq_timedsend(mqd_t m struct ext_wait_queue *receiver; struct msg_msg *msg_ptr; struct mqueue_inode_info *info; - long timeout; + long timeout; int ret; if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) @@ -738,7 +793,8 @@ asmlinkage long sys_mq_timedsend(mqd_t m msg_insert(msg_ptr, info); __do_notify(info); } - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_atime = inode->i_mtime = inode->i_ctime = + CURRENT_TIME; spin_unlock(&info->lock); ret = 0; } @@ -798,7 +854,8 @@ asmlinkage ssize_t sys_mq_timedreceive(m } else { msg_ptr = msg_get(info); - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_atime = inode->i_mtime = inode->i_ctime = + CURRENT_TIME; /* There is now free space in queue. */ pipelined_receive(info); @@ -820,11 +877,13 @@ out: return ret; } -/* Notes: the case when user wants us to deregister (with NULL as pointer or SIGEV_NONE) - * and he isn't currently owner of notification will be silently discarded. - * It isn't explicitly defined in the POSIX. +/* + * Notes: the case when user wants us to deregister (with NULL as pointer + * or SIGEV_NONE) and he isn't currently owner of notification will be + * silently discarded. It isn't explicitly defined in the POSIX. */ -asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification) +asmlinkage long sys_mq_notify(mqd_t mqdes, + const struct sigevent __user *u_notification) { int ret, fd; struct file *filp, *nfilp; @@ -835,7 +894,8 @@ asmlinkage long sys_mq_notify(mqd_t mqde if (u_notification == NULL) { notification.sigev_notify = SIGEV_NONE; } else { - if (copy_from_user(¬ification, u_notification, sizeof(struct sigevent))) + if (copy_from_user(¬ification, u_notification, + sizeof(struct sigevent))) return -EFAULT; if (unlikely(notification.sigev_notify != SIGEV_NONE && @@ -843,7 +903,8 @@ asmlinkage long sys_mq_notify(mqd_t mqde notification.sigev_notify != SIGEV_THREAD)) return -EINVAL; if (notification.sigev_notify == SIGEV_SIGNAL && - (notification.sigev_signo < 0 || notification.sigev_signo > _NSIG)) { + (notification.sigev_signo < 0 || + notification.sigev_signo > _NSIG)) { return -EINVAL; } } @@ -915,8 +976,9 @@ out: return ret; } -asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *u_mqstat, - struct mq_attr __user *u_omqstat) +asmlinkage long sys_mq_getsetattr(mqd_t mqdes, + const struct mq_attr __user *u_mqstat, + struct mq_attr __user *u_omqstat) { int ret; struct mq_attr mqstat, omqstat; @@ -955,7 +1017,8 @@ asmlinkage long sys_mq_getsetattr(mqd_t spin_unlock(&info->lock); ret = 0; - if (u_omqstat != NULL && copy_to_user(u_omqstat, &omqstat, sizeof(struct mq_attr))) + if (u_omqstat != NULL && copy_to_user(u_omqstat, &omqstat, + sizeof(struct mq_attr))) ret = -EFAULT; out_fput: @@ -977,6 +1040,7 @@ static struct file_operations mqueue_fil static struct file_operations mqueue_notify_fops = { .poll = mqueue_notify_poll, .read = mqueue_notify_read, + .release = mqueue_notify_release, }; @@ -1056,7 +1120,8 @@ static int __init init_mqueue_fs(void) int error; mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", - sizeof(struct mqueue_inode_info), 0, SLAB_HWCACHE_ALIGN, init_once, NULL); + sizeof(struct mqueue_inode_info), 0, + SLAB_HWCACHE_ALIGN, init_once, NULL); if (mqueue_inode_cachep == NULL) return -ENOMEM; @@ -1086,8 +1151,10 @@ out_filesystem: out_sysctl: unregister_sysctl_table(mq_sysctl_table); out_cache: - if (kmem_cache_destroy(mqueue_inode_cachep)) - printk(KERN_INFO "mqueue_inode_cache: not all structures were freed\n"); + if (kmem_cache_destroy(mqueue_inode_cachep)) { + printk(KERN_INFO + "mqueue_inode_cache: not all structures were freed\n"); + } return error; } _