From: Andy Whitcroft If a fault in the kernel leads to an unexpected protection fault whilst in a code path which holds mmap_sem we will deadlock in do_page_fault() while trying to classify the fault. By carefully testing the source of the fault we can detect and OOPS on the vast majority of these, greatly enhancing diagnosis of such bugs. --- 25-akpm/arch/ppc64/mm/fault.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 files changed, 38 insertions(+), 1 deletion(-) diff -puN arch/ppc64/mm/fault.c~ppc64-fault-deadlock-fix arch/ppc64/mm/fault.c --- 25/arch/ppc64/mm/fault.c~ppc64-fault-deadlock-fix Wed May 19 14:39:50 2004 +++ 25-akpm/arch/ppc64/mm/fault.c Wed May 19 14:39:50 2004 @@ -75,6 +75,8 @@ static int store_updates_sp(struct pt_re return 0; } +int check_exception(struct pt_regs *regs); + /* * The error_code parameter is * - DSISR for a non-SLB data access fault, @@ -110,7 +112,29 @@ void do_page_fault(struct pt_regs *regs, bad_page_fault(regs, address, SIGSEGV); return; } - down_read(&mm->mmap_sem); + + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunatly, in the case of an + * erroneous fault occuring in a code path which already holds mmap_sem + * we will deadlock attempting to validate the fault against the + * address space. Luckily the kernel only validly references user + * space from well defined areas of code, which are listed in the + * exceptions table. + * + * As the vast majority of faults will be valid we will only perform + * the source reference check when there is a possibilty of a deadlock. + * Attempt to lock the address space, if we cannot we then validate the + * source. If this is invalid we can skip the address space check, + * thus avoiding the deadlock. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (!user_mode(regs) && !check_exception(regs)) + goto bad_area_nosemaphore; + + down_read(&mm->mmap_sem); + } + vma = find_vma(mm, address); if (!vma) goto bad_area; @@ -200,6 +224,7 @@ good_area: bad_area: up_read(&mm->mmap_sem); +bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { info.si_signo = SIGSEGV; @@ -259,3 +284,15 @@ void bad_page_fault(struct pt_regs *regs /* kernel has accessed a bad area */ die("Kernel access of bad area", regs, sig); } + +int check_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault? */ + if ((entry = search_exception_tables(regs->nip)) != NULL) { + return 1; + } + + return 0; +} _