aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2010-09-21 12:01:51 -0700
committerH. Peter Anvin <hpa@linux.intel.com>2010-10-19 13:57:08 -0700
commit617d34d9e5d8326ec8f188c616aa06ac59d083fe (patch)
tree763d02b7713bad65ba819a8334bb0e95d4370352
parent44235dcde416104b8e1db7606c283f4c0149c760 (diff)
downloadlinux-mce-2.6-617d34d9e5d8326ec8f188c616aa06ac59d083fe.tar.gz
x86, mm: Hold mm->page_table_lock while doing vmalloc_sync
Take mm->page_table_lock while syncing the vmalloc region. This prevents a race with the Xen pagetable pin/unpin code, which expects that the page_table_lock is already held. If this race occurs, then Xen can see an inconsistent page type (a page can either be read/write or a pagetable page, and pin/unpin converts it between them), which will cause either the pin or the set_p[gm]d to fail; either will crash the kernel. vmalloc_sync_all() should be called rarely, so this extra use of page_table_lock should not interfere with its normal users. The mm pointer is stashed in the pgd page's index field, as that won't be otherwise used for pgds. Reported-by: Ian Campbell <ian.cambell@eu.citrix.com> Originally-by: Jan Beulich <jbeulich@novell.com> LKML-Reference: <4CB88A4C.1080305@goop.org> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/mm/fault.c11
-rw-r--r--arch/x86/mm/init_64.c7
-rw-r--r--arch/x86/mm/pgtable.c20
4 files changed, 36 insertions, 4 deletions
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2d0a33bd2971aa..ada823a13c7c94 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
+extern struct mm_struct *pgd_page_get_mm(struct page *page);
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else /* !CONFIG_PARAVIRT */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index caec22906d7c4a..6c27c39f8a3757 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,7 +229,16 @@ void vmalloc_sync_all(void)
spin_lock_irqsave(&pgd_lock, flags);
list_for_each_entry(page, &pgd_list, lru) {
- if (!vmalloc_sync_one(page_address(page), address))
+ spinlock_t *pgt_lock;
+ int ret;
+
+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+
+ spin_lock(pgt_lock);
+ ret = vmalloc_sync_one(page_address(page), address);
+ spin_unlock(pgt_lock);
+
+ if (!ret)
break;
}
spin_unlock_irqrestore(&pgd_lock, flags);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ad7c0ff5d2b57..4d323fb770c289 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -116,12 +116,19 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_lock_irqsave(&pgd_lock, flags);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
+ spinlock_t *pgt_lock;
+
pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+ spin_lock(pgt_lock);
+
if (pgd_none(*pgd))
set_pgd(pgd, *pgd_ref);
else
BUG_ON(pgd_page_vaddr(*pgd)
!= pgd_page_vaddr(*pgd_ref));
+
+ spin_unlock(pgt_lock);
}
spin_unlock_irqrestore(&pgd_lock, flags);
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5c4ee422590e5d..c70e57dbb4912a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd)
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-static void pgd_ctor(pgd_t *pgd)
+
+static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
+{
+ BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
+ virt_to_page(pgd)->index = (pgoff_t)mm;
+}
+
+struct mm_struct *pgd_page_get_mm(struct page *page)
+{
+ return (struct mm_struct *)page->index;
+}
+
+static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
/* If the pgd points to a shared pagetable level (either the
ptes in non-PAE, or shared PMD in PAE), then just copy the
@@ -105,8 +117,10 @@ static void pgd_ctor(pgd_t *pgd)
}
/* list required to sync kernel mapping updates */
- if (!SHARED_KERNEL_PMD)
+ if (!SHARED_KERNEL_PMD) {
+ pgd_set_mm(pgd, mm);
pgd_list_add(pgd);
+ }
}
static void pgd_dtor(pgd_t *pgd)
@@ -272,7 +286,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
*/
spin_lock_irqsave(&pgd_lock, flags);
- pgd_ctor(pgd);
+ pgd_ctor(mm, pgd);
pgd_prepopulate_pmd(mm, pgd, pmds);
spin_unlock_irqrestore(&pgd_lock, flags);