diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/aio.c x/fs/aio.c --- x-ref/fs/aio.c 2003-07-17 07:26:29.000000000 +0200 +++ x/fs/aio.c 2003-07-17 08:12:18.000000000 +0200 @@ -902,6 +902,19 @@ asmlinkage long sys_io_destroy(aio_conte return -EINVAL; } +ssize_t generic_aio_poll(struct file *file, struct kiocb *req, struct iocb *iocb) +{ + unsigned events = iocb->aio_buf; + + /* Did the user set any bits they weren't supposed to? (The + * above is actually a cast. + */ + if (unlikely(events != iocb->aio_buf)) + return -EINVAL; + + return async_poll(req, events); +} + static inline int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb, struct iocb *iocb) { @@ -978,6 +991,9 @@ static inline int io_submit_one(struct k case IOCB_CMD_FSYNC: op = file->f_op->aio_fsync; break; + case IOCB_CMD_POLL: + op = generic_aio_poll; + break; default: dprintk("EINVAL: io_submit: no operation %d provided by aio\n", iocb->aio_lio_opcode); diff -urNp --exclude CVS --exclude BitKeeper x-ref/fs/select.c x/fs/select.c --- x-ref/fs/select.c 2003-07-17 07:26:16.000000000 +0200 +++ x/fs/select.c 2003-07-17 07:20:52.000000000 +0200 @@ -12,6 +12,12 @@ * 24 January 2000 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). + * June 2001 + * Added async_poll implementation. -bcrl + * Nov 2001 + * Async poll improvments from Suparna Bhattacharya + * April 2002 + * smp safe async poll plus cancellation. -bcrl */ #include @@ -19,6 +25,8 @@ #include #include /* for STICKY_TIMEOUTS */ #include +#include +#include #include @@ -26,19 +34,36 @@ #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) struct poll_table_entry { - struct file * filp; wait_queue_t wait; wait_queue_head_t * wait_address; + struct file * filp; + poll_table * p; }; struct poll_table_page { + unsigned long size; struct poll_table_page * next; struct poll_table_entry * entry; struct poll_table_entry entries[0]; }; #define POLL_TABLE_FULL(table) \ - ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) + ((unsigned long)((table)->entry+1) > \ + (table)->size + (unsigned long)(table)) + +/* async poll uses only one entry per poll table as it is linked to an iocb */ +typedef struct async_poll_table_struct { + poll_table pt; + struct worktodo wtd; + int events; /* event mask for async poll */ + int wake; + long sync; + struct poll_table_page pt_page; /* one poll table page hdr */ + struct poll_table_entry entries[1]; /* space for a single entry */ +} async_poll_table; + + +static kmem_cache_t *async_poll_table_cache; /* * Ok, Peter made a complicated, but straightforward multiple_wait() function. @@ -53,7 +78,7 @@ struct poll_table_page { * poll table. */ -void poll_freewait(poll_table* pt) +void __poll_freewait(poll_table* pt, wait_queue_t *wait) { struct poll_table_page * p = pt->table; while (p) { @@ -61,17 +86,140 @@ void poll_freewait(poll_table* pt) struct poll_table_page *old; entry = p->entry; + if (entry == p->entries) /* may happen with async poll */ + break; do { entry--; - remove_wait_queue(entry->wait_address,&entry->wait); + if (wait != &entry->wait) + remove_wait_queue(entry->wait_address,&entry->wait); + else + __remove_wait_queue(entry->wait_address,&entry->wait); fput(entry->filp); } while (entry > p->entries); old = p; p = p->next; - free_page((unsigned long) old); + if (old->size == PAGE_SIZE) + free_page((unsigned long) old); + } + if (pt->iocb) + kmem_cache_free(async_poll_table_cache, pt); +} + +void poll_freewait(poll_table* pt) +{ + __poll_freewait(pt, NULL); +} + +void async_poll_complete(void *data) +{ + async_poll_table *pasync = data; + poll_table *p = data; + struct kiocb *iocb = p->iocb; + unsigned int mask; + + pasync->wake = 0; + wmb(); + do { + mask = iocb->filp->f_op->poll(iocb->filp, p); + mask &= pasync->events | POLLERR | POLLHUP; + if (mask) { + poll_table *p2 = xchg(&iocb->data, NULL); + if (p2) { + poll_freewait(p2); + aio_complete(iocb, mask, 0); + } + return; + } + pasync->sync = 0; + wmb(); + } while (pasync->wake); +} + +static void async_poll_waiter(wait_queue_t *wait) +{ + struct poll_table_entry *entry = (struct poll_table_entry *)wait; + async_poll_table *pasync = (async_poll_table *)(entry->p); + struct kiocb *iocb = pasync->pt.iocb; + unsigned int mask; + + mask = iocb->filp->f_op->poll(iocb->filp, NULL); + mask &= pasync->events | POLLERR | POLLHUP; + if (mask) { + poll_table *p2 = xchg(&iocb->data, NULL); + if (p2) { + __poll_freewait(p2, wait); + aio_complete(iocb, mask, 0); + } + return; } } +int async_poll_cancel(struct kiocb *iocb, struct io_event *res) +{ + poll_table *p; + + p = xchg(&iocb->data, NULL); + aio_put_req(iocb); + if (p) { + poll_freewait(p); + /* + * Since poll_freewait() locks the wait queue, we know that + * async_poll_waiter() is either not going to be run or has + * finished all its work. + */ + aio_put_req(iocb); + return 0; + } + return -EAGAIN; +} + +int async_poll(struct kiocb *iocb, int events) +{ + unsigned int mask; + async_poll_table *pasync; + poll_table *p; + + /* Fast path */ + if (iocb->filp->f_op && iocb->filp->f_op->poll) { + mask = iocb->filp->f_op->poll(iocb->filp, NULL); + mask &= events | POLLERR | POLLHUP; + if (mask & events) + return mask; + } + + pasync = kmem_cache_alloc(async_poll_table_cache, SLAB_KERNEL); + if (!pasync) + return -ENOMEM; + + p = (poll_table *)pasync; + poll_initwait(p); + wtd_set_action(&pasync->wtd, async_poll_complete, pasync); + p->iocb = iocb; + pasync->wake = 0; + pasync->sync = 0; + pasync->events = events; + pasync->pt_page.entry = pasync->pt_page.entries; + pasync->pt_page.size = sizeof(pasync->pt_page); + p->table = &pasync->pt_page; + + iocb->data = p; + iocb->users ++; + wmb(); + + mask = DEFAULT_POLLMASK; + if (iocb->filp->f_op && iocb->filp->f_op->poll) + mask = iocb->filp->f_op->poll(iocb->filp, p); + mask &= events | POLLERR | POLLHUP; + if (mask && xchg(&iocb->data, NULL)) { + poll_freewait(p); + aio_complete(iocb, mask, 0); + } + + iocb->cancel = async_poll_cancel; + aio_put_req(iocb); + return 0; +} + void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { struct poll_table_page *table = p->table; @@ -85,6 +233,7 @@ void __pollwait(struct file * filp, wait __set_current_state(TASK_RUNNING); return; } + new_table->size = PAGE_SIZE; new_table->entry = new_table->entries; new_table->next = table; p->table = new_table; @@ -98,7 +247,11 @@ void __pollwait(struct file * filp, wait get_file(filp); entry->filp = filp; entry->wait_address = wait_address; - init_waitqueue_entry(&entry->wait, current); + entry->p = p; + if (p->iocb) /* async poll */ + init_waitqueue_func_entry(&entry->wait, async_poll_waiter); + else + init_waitqueue_entry(&entry->wait, current); add_wait_queue(wait_address,&entry->wait); smp_mb(); } @@ -495,3 +648,14 @@ out: poll_freewait(&table); return err; } + +static int __init async_poll_init(void) +{ + async_poll_table_cache = kmem_cache_create("async poll table", + sizeof(async_poll_table), 0, 0, NULL, NULL); + if (!async_poll_table_cache) + panic("unable to alloc poll_table_cache"); + return 0; +} + +module_init(async_poll_init); diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/aio_abi.h x/include/linux/aio_abi.h --- x-ref/include/linux/aio_abi.h 2003-07-17 07:26:29.000000000 +0200 +++ x/include/linux/aio_abi.h 2003-07-17 08:08:03.000000000 +0200 @@ -36,10 +36,11 @@ enum { IOCB_CMD_PWRITE = 1, IOCB_CMD_FSYNC = 2, IOCB_CMD_FDSYNC = 3, - /* These two are experimental. + /* + * Experimental: * IOCB_CMD_PREADX = 4, - * IOCB_CMD_POLL = 5, */ + IOCB_CMD_POLL = 5, IOCB_CMD_NOOP = 6, }; diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/pipe_fs_i.h x/include/linux/pipe_fs_i.h --- x-ref/include/linux/pipe_fs_i.h 2003-03-15 03:25:14.000000000 +0100 +++ x/include/linux/pipe_fs_i.h 2003-07-17 08:02:21.000000000 +0200 @@ -13,6 +13,9 @@ struct pipe_inode_info { unsigned int waiting_writers; unsigned int r_counter; unsigned int w_counter; + spinlock_t pipe_aio_lock; + struct list_head read_iocb_list; + struct list_head write_iocb_list; }; /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/poll.h x/include/linux/poll.h --- x-ref/include/linux/poll.h 2003-07-17 07:03:52.000000000 +0200 +++ x/include/linux/poll.h 2003-07-17 08:08:15.000000000 +0200 @@ -9,12 +9,15 @@ #include #include #include +#include struct poll_table_page; +struct kiocb; typedef struct poll_table_struct { - int error; - struct poll_table_page * table; + int error; + struct poll_table_page *table; + struct kiocb *iocb; /* iocb for async poll */ } poll_table; extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p); @@ -29,8 +32,11 @@ static inline void poll_initwait(poll_ta { pt->error = 0; pt->table = NULL; + pt->iocb = NULL; } + extern void poll_freewait(poll_table* pt); +extern int async_poll(struct kiocb *iocb, int events); /*