From: Davide Libenzi After the ep_remove() the "epi" is given back to the cache, so "epi->ep" might become invalid. It was not cought by my tests because the element wasn't immediately reused (and because I was using a single epoll fd, so the "ep" item remained the same). 25-akpm/fs/eventpoll.c | 95 ++++++++++++++++++++++++++++++++++--------------- fs/eventpoll.c.orig | 0 2 files changed, 67 insertions(+), 28 deletions(-) diff -puN fs/eventpoll.c~eventpoll-use-after-free-fix fs/eventpoll.c --- 25/fs/eventpoll.c~eventpoll-use-after-free-fix Tue Jun 3 15:42:27 2003 +++ 25-akpm/fs/eventpoll.c Tue Jun 3 15:42:27 2003 @@ -37,8 +37,41 @@ #include #include #include +#include +/* + * LOCKING: + * There are three level of locking required by epoll : + * + * 1) epsem (semaphore) + * 2) ep->sem (rw_semaphore) + * 3) ep->lock (rw_lock) + * + * The acquire order is the one listed above, from 1 to 3. + * We need a spinlock (ep->lock) because we manipulate objects + * from inside the poll callback, that might be triggered from + * a wake_up() that in turn might be called from IRQ context. + * So we can't sleep inside the poll callback and hence we need + * a spinlock. During the event transfer loop (from kernel to + * user space) we could end up sleeping due a copy_to_user(), so + * we need a lock that will allow us to sleep. This lock is a + * read-write semaphore (ep->sem). It is acquired on read during + * the event transfer loop and in write during epoll_ctl(EPOLL_CTL_DEL) + * and during eventpoll_release(). Then we also need a global + * semaphore to serialize eventpoll_release() and ep_free(). + * This semaphore is acquired by ep_free() during the epoll file + * cleanup path and it is also acquired by eventpoll_release() + * if a file has been pushed inside an epoll set and it is then + * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). + * It is possible to drop the "ep->sem" and to use the global + * semaphore "epsem" (together with "ep->lock") to have it working, + * but having "ep->sem" will make the interface more scalable. + * Events that require holding "epsem" are very rare, while for + * normal operations the epoll private "ep->sem" will guarantee + * a greater scalability. + */ + #define EVENTPOLLFS_MAGIC 0x03111965 /* My birthday should work for this :) */ @@ -283,6 +316,10 @@ static struct super_block *eventpollfs_g int flags, const char *dev_name, void *data); +/* + * This semaphore is used to serialize ep_free() and eventpoll_release(). + */ +struct semaphore epsem; /* Safe wake up implementation */ static struct poll_safewake psw; @@ -414,6 +451,7 @@ void eventpoll_init_file(struct file *fi void eventpoll_release(struct file *file) { struct list_head *lsthead = &file->f_ep_links; + struct eventpoll *ep; struct epitem *epi; /* @@ -432,17 +470,23 @@ void eventpoll_release(struct file *file * necessary. It is not necessary because we're in the "struct file" * cleanup path, and this means that noone is using this file anymore. * The only hit might come from ep_free() but by holding the semaphore - * will correctly serialize the operation. + * will correctly serialize the operation. We do need to acquire + * "ep->sem" after "epsem" because ep_remove() requires it when called + * from anywhere but ep_free(). */ + down(&epsem); while (!list_empty(lsthead)) { epi = list_entry(lsthead->next, struct epitem, fllink); + ep = epi->ep; EP_LIST_DEL(&epi->fllink); - down_write(&epi->ep->sem); - ep_remove(epi->ep, epi); - up_write(&epi->ep->sem); + down_write(&ep->sem); + ep_remove(ep, epi); + up_write(&ep->sem); } + + up(&epsem); } @@ -545,14 +589,9 @@ asmlinkage long sys_epoll_ctl(int epfd, */ ep = file->private_data; - /* - * Try to lookup the file inside our hash table. When an item is found - * ep_find() increases the usage count of the item so that it won't - * desappear underneath us. The only thing that might happen, if someone - * tries very hard, is a double insertion of the same file descriptor. - * This does not rapresent a problem though and we don't really want - * to put an extra syncronization object to deal with this harmless condition. - */ + down_write(&ep->sem); + + /* Try to lookup the file inside our hash table */ epi = ep_find(ep, tfile); error = -EINVAL; @@ -566,18 +605,9 @@ asmlinkage long sys_epoll_ctl(int epfd, error = -EEXIST; break; case EPOLL_CTL_DEL: - if (epi) { - /* - * We need to protect the remove operation because another - * thread might be doing an epoll_wait() and using the - * target file. - */ - down_write(&ep->sem); - + if (epi) error = ep_remove(ep, epi); - - up_write(&ep->sem); - } else + else error = -ENOENT; break; case EPOLL_CTL_MOD: @@ -596,6 +626,8 @@ asmlinkage long sys_epoll_ctl(int epfd, if (epi) ep_release_epitem(epi); + up_write(&ep->sem); + eexit_3: fput(tfile); eexit_2: @@ -852,8 +884,12 @@ static void ep_free(struct eventpoll *ep /* * We need to lock this because we could be hit by * eventpoll_release() while we're freeing the "struct eventpoll". + * We do not need to hold "ep->sem" here because the epoll file + * is on the way to be removed and no one has references to it + * anymore. The only hit might come from eventpoll_release() but + * holding "epsem" is sufficent here. */ - down_write(&ep->sem); + down(&epsem); /* * Walks through the whole hash by unregistering poll callbacks. @@ -884,7 +920,7 @@ static void ep_free(struct eventpoll *ep } } - up_write(&ep->sem); + up(&epsem); /* Free hash pages */ ep_free_pages(ep->hpages, EP_HASH_PAGES(ep->hashbits)); @@ -1208,6 +1244,7 @@ static int ep_remove(struct eventpoll *e { int error; unsigned long flags; + struct file *file = epi->file; /* * Removes poll wait queue hooks. We _have_ to do this without holding @@ -1220,10 +1257,10 @@ static int ep_remove(struct eventpoll *e ep_unregister_pollwait(ep, epi); /* Remove the current item from the list of epoll hooks */ - spin_lock(&epi->file->f_ep_lock); + spin_lock(&file->f_ep_lock); if (EP_IS_LINKED(&epi->fllink)) EP_LIST_DEL(&epi->fllink); - spin_unlock(&epi->file->f_ep_lock); + spin_unlock(&file->f_ep_lock); /* We need to acquire the write IRQ lock before calling ep_unlink() */ write_lock_irqsave(&ep->lock, flags); @@ -1242,7 +1279,7 @@ static int ep_remove(struct eventpoll *e error = 0; eexit_1: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n", - current, ep, epi->file, error)); + current, ep, file, error)); return error; } @@ -1633,6 +1670,8 @@ static int __init eventpoll_init(void) { int error; + init_MUTEX(&epsem); + /* Initialize the structure used to perform safe poll wait head wake ups */ ep_poll_safewake_init(&psw); diff -puN fs/eventpoll.c.orig~eventpoll-use-after-free-fix fs/eventpoll.c.orig _