From: John Myers It's basically waiting for someone to merge the patch. There were some people making unsubstantiated claims that it didn't scale, but the available benchmarks showed that it scaled perfectly across the parameters tested. DESC aio-poll: don't put extern decls in .c! EDESC grrrr! It defeats typechecking. slap. fs/Makefile | 2 fs/aio.c | 23 +++++- fs/aiopoll.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/select.c | 1 include/linux/aio.h | 2 include/linux/aio_abi.h | 2 include/linux/poll.h | 2 7 files changed, 204 insertions(+), 6 deletions(-) diff -puN fs/aio.c~aio-poll fs/aio.c --- 25/fs/aio.c~aio-poll 2003-05-25 15:37:48.000000000 -0700 +++ 25-akpm/fs/aio.c 2003-05-25 15:37:58.000000000 -0700 @@ -1292,12 +1292,26 @@ static ssize_t aio_fsync(struct kiocb *i return ret; } -/* +static ssize_t generic_aio_poll(struct kiocb *kiocb, struct iocb *iocb) +{ + unsigned events = iocb->aio_buf; + + /* + * Did the user set any bits they weren't supposed to? (The + * above is actually a cast. + */ + if (unlikely(events != iocb->aio_buf)) + return -EINVAL; + + return async_poll(kiocb, events); +} + +/* * aio_setup_iocb: * Performs the initial checks and aio retry method * setup for the kiocb at the time of io submission. */ -ssize_t aio_setup_iocb(struct kiocb *kiocb) +ssize_t aio_setup_iocb(struct kiocb *kiocb, struct iocb __user *iocb) { struct file *file = kiocb->ki_filp; ssize_t ret = 0; @@ -1337,6 +1351,9 @@ ssize_t aio_setup_iocb(struct kiocb *kio if (file->f_op->aio_fsync) kiocb->ki_retry = aio_fsync; break; + case IOCB_CMD_POLL: + ret = generic_aio_poll(kiocb, iocb); + break; default: dprintk("EINVAL: io_submit: no operation provided\n"); ret = -EINVAL; @@ -1434,7 +1451,7 @@ int io_submit_one(struct kioctx *ctx, st aio_run = 0; aio_wakeups = 0; - ret = aio_setup_iocb(req); + ret = aio_setup_iocb(req, iocb); if ((-EBADF == ret) || (-EFAULT == ret)) goto out_put_req; diff -puN /dev/null fs/aiopoll.c --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/fs/aiopoll.c 2003-05-25 15:37:58.000000000 -0700 @@ -0,0 +1,178 @@ +/* + * This file contains the procedures for the handling of aio poll + */ + +#include +#include +#include +#include + +struct async_poll_iocb; + +struct async_poll_entry { + wait_queue_t wait; + wait_queue_head_t *whead; + struct async_poll_entry *next; + struct async_poll_iocb *apiocb; +}; + +struct async_poll_iocb { + poll_table pt; + void *armed; + int outofmem; + int events; /* event mask for async poll */ + struct async_poll_entry *ehead; + struct async_poll_entry entry[2]; /* space for two entries */ +}; + +static kmem_cache_t *async_poll_entry_cache; + +static inline struct async_poll_iocb *kiocb_to_apiocb(struct kiocb *kiocb) +{ + BUG_ON(sizeof(struct async_poll_iocb) > KIOCB_PRIVATE_SIZE); + return (struct async_poll_iocb *)kiocb->private; +} + +static inline struct kiocb *apiocb_to_kiocb(struct async_poll_iocb *apiocb) +{ + return container_of((void *)apiocb, struct kiocb, private); +} + +static void async_poll_freewait(struct async_poll_iocb *apiocb, + wait_queue_t *wait) +{ + struct async_poll_entry *entry = apiocb->ehead; + struct async_poll_entry *old; + + while (entry) { + if (wait != &entry->wait) + remove_wait_queue(entry->whead, &entry->wait); + else + __remove_wait_queue(entry->whead, &entry->wait); + old = entry; + entry = entry->next; + if (old != &apiocb->entry[0] && old != &apiocb->entry[1]) + kmem_cache_free(async_poll_entry_cache, old); + } +} + +static int async_poll_waiter(wait_queue_t *wait, unsigned mode, int sync) +{ + struct async_poll_entry *entry = (struct async_poll_entry *)wait; + struct async_poll_iocb *apiocb = entry->apiocb; + struct kiocb *iocb = apiocb_to_kiocb(apiocb); + unsigned int mask; + + mask = iocb->ki_filp->f_op->poll(iocb->ki_filp, NULL); + mask &= apiocb->events | POLLERR | POLLHUP; + if (mask) { + if (xchg(&apiocb->armed, NULL)) { + async_poll_freewait(apiocb, wait); + aio_complete(iocb, mask, 0); + return 1; + } + } + return 0; +} + +int async_poll_cancel(struct kiocb *kiocb, struct io_event *res) +{ + struct async_poll_iocb *apiocb = kiocb_to_apiocb(kiocb); + void *armed; + + armed = xchg(&apiocb->armed, NULL); + aio_put_req(kiocb); + if (armed) { + async_poll_freewait(apiocb, NULL); + /* + * Since async_poll_freewait() locks the wait queue, we + * know that async_poll_waiter() is either not going to + * be run or has finished all its work. + */ + aio_put_req(kiocb); + return 0; + } + return -EAGAIN; +} + +static void async_poll_queue_proc(struct file *file, + wait_queue_head_t *whead, poll_table *pt) +{ + struct async_poll_iocb *apiocb = (struct async_poll_iocb *)pt; + struct async_poll_entry *entry; + + if (!apiocb->ehead) { + entry = &apiocb->entry[0]; + } else if (apiocb->ehead == &apiocb->entry[0]) { + entry = &apiocb->entry[1]; + } else { + entry = kmem_cache_alloc(async_poll_entry_cache, SLAB_KERNEL); + if (!entry) { + apiocb->outofmem = 1; + return; + } + } + init_waitqueue_func_entry(&entry->wait, async_poll_waiter); + entry->whead = whead; + entry->next = apiocb->ehead; + entry->apiocb = apiocb; + add_wait_queue(whead, &entry->wait); + apiocb->ehead = entry; +} + +int async_poll(struct kiocb *kiocb, int events) +{ + unsigned int mask; + struct async_poll_iocb *apiocb = kiocb_to_apiocb(kiocb); + + /* Fast path */ + if (kiocb->ki_filp->f_op && kiocb->ki_filp->f_op->poll) { + mask = kiocb->ki_filp->f_op->poll(kiocb->ki_filp, NULL); + mask &= events | POLLERR | POLLHUP; + if (mask & events) + return events; + } + + init_poll_funcptr(&apiocb->pt, async_poll_queue_proc); + apiocb->armed = &apiocb; + apiocb->outofmem = 0; + apiocb->events = events; + apiocb->ehead = NULL; + + kiocb->ki_users++; + + /* + * Flush the preceeding before letting the complete + * or cancel paths get at this iocb. + */ + wmb(); + + mask = DEFAULT_POLLMASK; + if (kiocb->ki_filp->f_op && kiocb->ki_filp->f_op->poll) + mask = kiocb->ki_filp->f_op->poll(kiocb->ki_filp, &apiocb->pt); + mask &= events | POLLERR | POLLHUP; + if (mask && xchg(&apiocb->armed, NULL)) { + async_poll_freewait(apiocb, NULL); + aio_complete(kiocb, mask, 0); + } + if (unlikely(apiocb->outofmem) && xchg(&apiocb->armed, NULL)) { + async_poll_freewait(apiocb, NULL); + aio_put_req(kiocb); + return -ENOMEM; + } + + kiocb->ki_cancel = async_poll_cancel; + aio_put_req(kiocb); + return -EIOCBQUEUED; +} + +static int __init async_poll_init(void) +{ + async_poll_entry_cache = kmem_cache_create("async_poll", + sizeof(struct async_poll_entry), 0, 0, NULL, NULL); + if (!async_poll_entry_cache) + panic("unable to alloc poll_entry_cache"); + return 0; +} + +module_init(async_poll_init); diff -puN fs/Makefile~aio-poll fs/Makefile --- 25/fs/Makefile~aio-poll 2003-05-25 15:37:48.000000000 -0700 +++ 25-akpm/fs/Makefile 2003-05-25 15:37:48.000000000 -0700 @@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table. namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o dnotify.o \ filesystems.o namespace.o seq_file.o xattr.o libfs.o \ - fs-writeback.o mpage.o direct-io.o aio.o + fs-writeback.o mpage.o direct-io.o aio.o aiopoll.o obj-$(CONFIG_EPOLL) += eventpoll.o diff -puN fs/select.c~aio-poll fs/select.c --- 25/fs/select.c~aio-poll 2003-05-25 15:37:48.000000000 -0700 +++ 25-akpm/fs/select.c 2003-05-25 15:37:48.000000000 -0700 @@ -24,7 +24,6 @@ #include #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) -#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) struct poll_table_entry { struct file * filp; diff -puN include/linux/aio_abi.h~aio-poll include/linux/aio_abi.h --- 25/include/linux/aio_abi.h~aio-poll 2003-05-25 15:37:48.000000000 -0700 +++ 25-akpm/include/linux/aio_abi.h 2003-05-25 15:37:48.000000000 -0700 @@ -38,8 +38,8 @@ enum { IOCB_CMD_FDSYNC = 3, /* These two are experimental. * IOCB_CMD_PREADX = 4, - * IOCB_CMD_POLL = 5, */ + IOCB_CMD_POLL = 5, IOCB_CMD_NOOP = 6, }; diff -puN include/linux/poll.h~aio-poll include/linux/poll.h --- 25/include/linux/poll.h~aio-poll 2003-05-25 15:37:48.000000000 -0700 +++ 25-akpm/include/linux/poll.h 2003-05-25 15:37:48.000000000 -0700 @@ -10,6 +10,8 @@ #include #include +#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) + struct poll_table_struct; /* diff -puN include/linux/aio.h~aio-poll include/linux/aio.h --- 25/include/linux/aio.h~aio-poll 2003-05-25 15:37:58.000000000 -0700 +++ 25-akpm/include/linux/aio.h 2003-05-25 15:37:58.000000000 -0700 @@ -167,6 +167,8 @@ extern int FASTCALL(io_submit_one(struct struct kioctx *lookup_ioctx(unsigned long ctx_id); int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb, struct iocb *iocb)); +int async_poll(struct kiocb *iocb, int events); + #define get_ioctx(kioctx) do { if (unlikely(atomic_read(&(kioctx)->users) <= 0)) BUG(); atomic_inc(&(kioctx)->users); } while (0) #define put_ioctx(kioctx) do { if (unlikely(atomic_dec_and_test(&(kioctx)->users))) __put_ioctx(kioctx); else if (unlikely(atomic_read(&(kioctx)->users) < 0)) BUG(); } while (0) _