diff -urN linux.10/fs/select.c linux.20/fs/select.c --- linux.10/fs/select.c Mon Sep 24 02:16:05 2001 +++ linux.20/fs/select.c Wed Jan 23 01:23:48 2002 @@ -12,6 +12,10 @@ * 24 January 2000 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). + * June 2001 + * Added async_poll implementation. -bcrl + * Nov 2001 + * Async poll improvments from Suparna Bhattacharya */ #include @@ -19,6 +23,8 @@ #include #include /* for STICKY_TIMEOUTS */ #include +#include +#include #include @@ -26,19 +32,36 @@ #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) struct poll_table_entry { - struct file * filp; wait_queue_t wait; wait_queue_head_t * wait_address; + struct file * filp; + poll_table * p; }; struct poll_table_page { + unsigned long size; struct poll_table_page * next; struct poll_table_entry * entry; struct poll_table_entry entries[0]; }; #define POLL_TABLE_FULL(table) \ - ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) + ((unsigned long)((table)->entry+1) > \ + (table)->size + (unsigned long)(table)) + +/* async poll uses only one entry per poll table as it is linked to an iocb */ +typedef struct async_poll_table_struct { + poll_table pt; + struct worktodo wtd; + int events; /* event mask for async poll */ + int wake; + long sync; + struct poll_table_page pt_page; /* one poll table page hdr */ + struct poll_table_entry entries[1]; /* space for a single entry */ +} async_poll_table; + + +static kmem_cache_t *async_poll_table_cache; /* * Ok, Peter made a complicated, but straightforward multiple_wait() function. @@ -61,6 +84,8 @@ struct poll_table_page *old; entry = p->entry; + if (entry == p->entries) /* may happen with async poll */ + break; do { entry--; remove_wait_queue(entry->wait_address,&entry->wait); @@ -68,10 +93,83 @@ } while (entry > p->entries); old = p; p = p->next; - free_page((unsigned long) old); + if (old->size == PAGE_SIZE) + free_page((unsigned long) old); + } + if (pt->iocb) /* async poll */ + kmem_cache_free(async_poll_table_cache, pt); +} + +void async_poll_complete(void *data) +{ + async_poll_table *pasync = data; + poll_table *p = data; + struct kiocb *iocb = p->iocb; + unsigned int mask; + + pasync->wake = 0; + wmb(); + do { + mask = iocb->filp->f_op->poll(iocb->filp, p); + mask &= pasync->events | POLLERR | POLLHUP; + if (mask) { + poll_freewait(p); + aio_complete(iocb, mask, 0); + return; + } + pasync->sync = 0; + wmb(); + } while (pasync->wake); + +} + +static void async_poll_waiter(wait_queue_t *wait) +{ + struct poll_table_entry *entry = (struct poll_table_entry *)wait; + async_poll_table *pasync = (async_poll_table *)(entry->p); + + /* avoid writes to the cacheline if possible for SMP */ + if (!pasync->wake) { + pasync->wake = 1; + /* ensure only one wake up queues the wtd */ + if (!pasync->sync && !test_and_set_bit(0, &pasync->sync)) + wtd_queue(&pasync->wtd); } } +int async_poll(struct kiocb *iocb, int events) +{ + unsigned int mask; + async_poll_table *pasync; + poll_table *p; + + pasync = kmem_cache_alloc(async_poll_table_cache, SLAB_KERNEL); + if (!pasync) + return -ENOMEM; + + p = (poll_table *)pasync; + poll_initwait(p); + wtd_set_action(&pasync->wtd, async_poll_complete, pasync); + p->iocb = iocb; + pasync->wake = 0; + pasync->sync = 0; + pasync->events = events; + pasync->pt_page.entry = pasync->pt_page.entries; + pasync->pt_page.size = sizeof(pasync->pt_page); + p->table = &pasync->pt_page; + + mask = DEFAULT_POLLMASK; + if (iocb->filp->f_op && iocb->filp->f_op->poll) + mask = iocb->filp->f_op->poll(iocb->filp, p); + mask &= events | POLLERR | POLLHUP; + if (mask) { + poll_freewait(p); + aio_complete(iocb, mask, 0); + } + + return 0; +} + void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { struct poll_table_page *table = p->table; @@ -85,6 +183,7 @@ __set_current_state(TASK_RUNNING); return; } + new_table->size = PAGE_SIZE; new_table->entry = new_table->entries; new_table->next = table; p->table = new_table; @@ -98,7 +197,11 @@ get_file(filp); entry->filp = filp; entry->wait_address = wait_address; - init_waitqueue_entry(&entry->wait, current); + entry->p = p; + if (p->iocb) /* async poll */ + init_waitqueue_func_entry(&entry->wait, async_poll_waiter); + else + init_waitqueue_entry(&entry->wait, current); add_wait_queue(wait_address,&entry->wait); } } @@ -494,3 +597,14 @@ poll_freewait(&table); return err; } + +static int __init async_poll_init(void) +{ + async_poll_table_cache = kmem_cache_create("async poll table", + sizeof(async_poll_table), 0, 0, NULL, NULL); + if (!async_poll_table_cache) + panic("unable to alloc poll_table_cache"); + return 0; +} + +module_init(async_poll_init); diff -urN linux.10/include/linux/poll.h linux.20/include/linux/poll.h --- linux.10/include/linux/poll.h Mon Jan 14 21:10:19 2002 +++ linux.20/include/linux/poll.h Wed Jan 23 01:22:59 2002 @@ -9,12 +9,15 @@ #include #include #include +#include struct poll_table_page; +struct kiocb; typedef struct poll_table_struct { - int error; - struct poll_table_page * table; + int error; + struct poll_table_page *table; + struct kiocb *iocb; /* iocb for async poll */ } poll_table; extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p); @@ -29,8 +32,11 @@ { pt->error = 0; pt->table = NULL; + pt->iocb = NULL; } + extern void poll_freewait(poll_table* pt); +extern int async_poll(struct kiocb *iocb, int events); /*