From: olof@austin.ibm.com (Olof Johansson) Some futex functions do get_user calls while holding mmap_sem for reading. If get_user() faults, and another thread happens to be in mmap (or somewhere else holding waiting on down_write for the same semaphore), then do_page_fault will deadlock. Most architectures seem to be exposed to this. To avoid it, make sure the page is available. If not, release the semaphore, fault it in and retry. I also found another exposure by inspection, moving some of the code around avoids the possible deadlock there. Signed-off-by: Olof Johansson Signed-off-by: Andrew Morton --- 25-akpm/kernel/futex.c | 36 ++++++++++++++++++++++++++++++------ 25-akpm/mm/mempolicy.c | 8 ++++++-- 2 files changed, 36 insertions(+), 8 deletions(-) diff -puN kernel/futex.c~fix-futex-mmap_sem-deadlock kernel/futex.c --- 25/kernel/futex.c~fix-futex-mmap_sem-deadlock 2005-02-22 18:16:57.000000000 -0800 +++ 25-akpm/kernel/futex.c 2005-02-22 18:16:57.000000000 -0800 @@ -329,6 +329,7 @@ static int futex_requeue(unsigned long u int ret, drop_count = 0; unsigned int nqueued; + retry: down_read(¤t->mm->mmap_sem); ret = get_futex_key(uaddr1, &key1); @@ -355,9 +356,19 @@ static int futex_requeue(unsigned long u before *uaddr1. */ smp_mb(); - if (get_user(curval, (int __user *)uaddr1) != 0) { - ret = -EFAULT; - goto out; + inc_preempt_count(); + ret = __copy_from_user_inatomic(&curval, (int __user *)uaddr1, sizeof(int)); + dec_preempt_count(); + + if (unlikely(ret)) { + up_read(¤t->mm->mmap_sem); + /* Re-do the access outside the lock */ + ret = get_user(curval, (int __user *)uaddr1); + + if (!ret) + goto retry; + + return ret; } if (curval != *valp) { ret = -EAGAIN; @@ -480,6 +491,7 @@ static int futex_wait(unsigned long uadd int ret, curval; struct futex_q q; + retry: down_read(¤t->mm->mmap_sem); ret = get_futex_key(uaddr, &q.key); @@ -508,9 +520,21 @@ static int futex_wait(unsigned long uadd * We hold the mmap semaphore, so the mapping cannot have changed * since we looked it up in get_futex_key. */ - if (get_user(curval, (int __user *)uaddr) != 0) { - ret = -EFAULT; - goto out_unqueue; + inc_preempt_count(); + ret = __copy_from_user_inatomic(&curval, (int __user *)uaddr, sizeof(int)); + dec_preempt_count(); + if (unlikely(ret)) { + up_read(¤t->mm->mmap_sem); + + if (!unqueue_me(&q)) /* There's a chance we got woken already */ + return 0; + + /* Re-do the access outside the lock */ + ret = get_user(curval, (int __user *)uaddr); + + if (!ret) + goto retry; + return ret; } if (curval != val) { ret = -EWOULDBLOCK; diff -puN mm/mempolicy.c~fix-futex-mmap_sem-deadlock mm/mempolicy.c --- 25/mm/mempolicy.c~fix-futex-mmap_sem-deadlock 2005-02-22 18:16:57.000000000 -0800 +++ 25-akpm/mm/mempolicy.c 2005-02-22 18:16:57.000000000 -0800 @@ -533,9 +533,13 @@ asmlinkage long sys_get_mempolicy(int __ } else pval = pol->policy; - err = -EFAULT; + if (vma) { + up_read(¤t->mm->mmap_sem); + vma = NULL; + } + if (policy && put_user(pval, policy)) - goto out; + return -EFAULT; err = 0; if (nmask) { _