From 4c21e2f2441dc5fbb957b030333f5a3f2d02dea7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:40 -0700 Subject: [PATCH] mm: split page table lock Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with a many-threaded application which concurrently initializes different parts of a large anonymous area. This patch corrects that, by using a separate spinlock per page table page, to guard the page table entries in that page, instead of using the mm's single page_table_lock. (But even then, page_table_lock is still used to guard page table allocation, and anon_vma allocation.) In this implementation, the spinlock is tucked inside the struct page of the page table page: with a BUILD_BUG_ON in case it overflows - which it would in the case of 32-bit PA-RISC with spinlock debugging enabled. Splitting the lock is not quite for free: another cacheline access. Ideally, I suppose we would use split ptlock only for multi-threaded processes on multi-cpu machines; but deciding that dynamically would have its own costs. So for now enable it by config, at some number of cpus - since the Kconfig language doesn't support inequalities, let preprocessor compare that with NR_CPUS. But I don't think it's worth being user-configurable: for good testing of both split and unsplit configs, split now at 4 cpus, and perhaps change that to 8 later. There is a benefit even for singly threaded processes: kswapd can be attacking one part of the mm while another part is busy faulting. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/mm-armv.c | 1 + arch/frv/mm/pgalloc.c | 4 ++-- arch/i386/mm/pgtable.c | 8 ++++---- arch/um/kernel/skas/mmu.c | 1 + fs/afs/file.c | 4 ++-- fs/buffer.c | 2 +- fs/jfs/jfs_metapage.c | 12 ++++++------ fs/xfs/linux-2.6/xfs_buf.c | 7 ++++--- include/linux/buffer_head.h | 6 +++--- include/linux/mm.h | 46 +++++++++++++++++++++++++++++++++++++-------- kernel/kexec.c | 4 ++-- mm/Kconfig | 13 +++++++++++++ mm/filemap.c | 2 +- mm/memory.c | 24 +++++++++++++---------- mm/mremap.c | 11 ++++++++++- mm/page_alloc.c | 16 ++++++++-------- mm/page_io.c | 6 ++++-- mm/rmap.c | 4 ++-- mm/shmem.c | 22 ++++++++++------------ mm/swap.c | 2 +- mm/swap_state.c | 8 ++++---- mm/swapfile.c | 12 ++++++------ mm/vmscan.c | 2 +- 23 files changed, 138 insertions(+), 79 deletions(-) diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c index 60f3e039bac26b..1221fdde1769fb 100644 --- a/arch/arm/mm/mm-armv.c +++ b/arch/arm/mm/mm-armv.c @@ -229,6 +229,7 @@ void free_pgd_slow(pgd_t *pgd) pte = pmd_page(*pmd); pmd_clear(pmd); dec_page_state(nr_page_table_pages); + pte_lock_deinit(pte); pte_free(pte); pmd_free(pmd); free: diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 4eaec0f3525b24..2c67dfe5a6b366 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -87,14 +87,14 @@ static inline void pgd_list_add(pgd_t *pgd) if (pgd_list) pgd_list->private = (unsigned long) &page->index; pgd_list = page; - page->private = (unsigned long) &pgd_list; + set_page_private(page, (unsigned long)&pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct page *next, **pprev, *page = virt_to_page(pgd); next = (struct page *) page->index; - pprev = (struct page **) page->private; + pprev = (struct page **)page_private(page); *pprev = next; if (next) next->private = (unsigned long) pprev; diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index dcdce2c6c53239..39c099f15b5f84 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -188,19 +188,19 @@ static inline void pgd_list_add(pgd_t *pgd) struct page *page = virt_to_page(pgd); page->index = (unsigned long)pgd_list; if (pgd_list) - pgd_list->private = (unsigned long)&page->index; + set_page_private(pgd_list, (unsigned long)&page->index); pgd_list = page; - page->private = (unsigned long)&pgd_list; + set_page_private(page, (unsigned long)&pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct page *next, **pprev, *page = virt_to_page(pgd); next = (struct page *)page->index; - pprev = (struct page **)page->private; + pprev = (struct page **)page_private(page); *pprev = next; if (next) - next->private = (unsigned long)pprev; + set_page_private(next, (unsigned long)pprev); } void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 02cf36e0331a2b..9e5e39cea821f8 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -144,6 +144,7 @@ void destroy_context_skas(struct mm_struct *mm) if(!proc_mm || !ptrace_faultinfo){ free_page(mmu->id.stack); + pte_lock_deinit(virt_to_page(mmu->last_page_table)); pte_free_kernel((pte_t *) mmu->last_page_table); dec_page_state(nr_page_table_pages); #ifdef CONFIG_3_LEVEL_PGTABLES diff --git a/fs/afs/file.c b/fs/afs/file.c index 0d576987ec670d..4975c9c193dd37 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -291,8 +291,8 @@ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags) cachefs_uncache_page(vnode->cache, page); #endif - pageio = (struct cachefs_page *) page->private; - page->private = 0; + pageio = (struct cachefs_page *) page_private(page); + set_page_private(page, 0); ClearPagePrivate(page); if (pageio) diff --git a/fs/buffer.c b/fs/buffer.c index b1667986442f13..2066e4cb700ca9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -96,7 +96,7 @@ static void __clear_page_buffers(struct page *page) { ClearPagePrivate(page); - page->private = 0; + set_page_private(page, 0); page_cache_release(page); } diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 26091a5f88d43a..8a53981f9f2797 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -86,7 +86,7 @@ struct meta_anchor { atomic_t io_count; struct metapage *mp[MPS_PER_PAGE]; }; -#define mp_anchor(page) ((struct meta_anchor *)page->private) +#define mp_anchor(page) ((struct meta_anchor *)page_private(page)) static inline struct metapage *page_to_mp(struct page *page, uint offset) { @@ -108,7 +108,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp) if (!a) return -ENOMEM; memset(a, 0, sizeof(struct meta_anchor)); - page->private = (unsigned long)a; + set_page_private(page, (unsigned long)a); SetPagePrivate(page); kmap(page); } @@ -136,7 +136,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) a->mp[index] = NULL; if (--a->mp_count == 0) { kfree(a); - page->private = 0; + set_page_private(page, 0); ClearPagePrivate(page); kunmap(page); } @@ -156,13 +156,13 @@ static inline void dec_io(struct page *page, void (*handler) (struct page *)) #else static inline struct metapage *page_to_mp(struct page *page, uint offset) { - return PagePrivate(page) ? (struct metapage *)page->private : NULL; + return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; } static inline int insert_metapage(struct page *page, struct metapage *mp) { if (mp) { - page->private = (unsigned long)mp; + set_page_private(page, (unsigned long)mp); SetPagePrivate(page); kmap(page); } @@ -171,7 +171,7 @@ static inline int insert_metapage(struct page *page, struct metapage *mp) static inline void remove_metapage(struct page *page, struct metapage *mp) { - page->private = 0; + set_page_private(page, 0); ClearPagePrivate(page); kunmap(page); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ba4767c04adfbf..4cd46abe843419 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -181,8 +181,9 @@ set_page_region( size_t offset, size_t length) { - page->private |= page_region_mask(offset, length); - if (page->private == ~0UL) + set_page_private(page, + page_private(page) | page_region_mask(offset, length)); + if (page_private(page) == ~0UL) SetPageUptodate(page); } @@ -194,7 +195,7 @@ test_page_region( { unsigned long mask = page_region_mask(offset, length); - return (mask && (page->private & mask) == mask); + return (mask && (page_private(page) & mask) == mask); } /* diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 88af42f5e04a8f..c937d6e65502e1 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -126,8 +126,8 @@ BUFFER_FNS(Eopnotsupp, eopnotsupp) /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ ({ \ - BUG_ON(!PagePrivate(page)); \ - ((struct buffer_head *)(page)->private); \ + BUG_ON(!PagePrivate(page)); \ + ((struct buffer_head *)page_private(page)); \ }) #define page_has_buffers(page) PagePrivate(page) @@ -219,7 +219,7 @@ static inline void attach_page_buffers(struct page *page, { page_cache_get(page); SetPagePrivate(page); - page->private = (unsigned long)head; + set_page_private(page, (unsigned long)head); } static inline void get_bh(struct buffer_head *bh) diff --git a/include/linux/mm.h b/include/linux/mm.h index e8d1424153bbde..8a514eca40d546 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -226,13 +226,18 @@ struct page { * to show when page is mapped * & limit reverse map searches. */ - unsigned long private; /* Mapping-private opaque data: + union { + unsigned long private; /* Mapping-private opaque data: * usually used for buffer_heads * if PagePrivate set; used for * swp_entry_t if PageSwapCache * When page is free, this indicates * order in the buddy system. */ +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS + spinlock_t ptl; +#endif + } u; struct address_space *mapping; /* If low bit clear, points to * inode address_space, or NULL. * If page mapped as anonymous @@ -260,6 +265,9 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; +#define page_private(page) ((page)->u.private) +#define set_page_private(page, v) ((page)->u.private = (v)) + /* * FIXME: take this include out, include page-flags.h in * files which need it (119 of them) @@ -311,17 +319,17 @@ extern void FASTCALL(__page_cache_release(struct page *)); #ifdef CONFIG_HUGETLB_PAGE -static inline int page_count(struct page *p) +static inline int page_count(struct page *page) { - if (PageCompound(p)) - p = (struct page *)p->private; - return atomic_read(&(p)->_count) + 1; + if (PageCompound(page)) + page = (struct page *)page_private(page); + return atomic_read(&page->_count) + 1; } static inline void get_page(struct page *page) { if (unlikely(PageCompound(page))) - page = (struct page *)page->private; + page = (struct page *)page_private(page); atomic_inc(&page->_count); } @@ -587,7 +595,7 @@ static inline int PageAnon(struct page *page) static inline pgoff_t page_index(struct page *page) { if (unlikely(PageSwapCache(page))) - return page->private; + return page_private(page); return page->index; } @@ -779,9 +787,31 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS +/* + * We tuck a spinlock to guard each pagetable page into its struct page, + * at page->private, with BUILD_BUG_ON to make sure that this will not + * overflow into the next struct page (as it might with DEBUG_SPINLOCK). + * When freeing, reset page->mapping so free_pages_check won't complain. + */ +#define __pte_lockptr(page) &((page)->u.ptl) +#define pte_lock_init(_page) do { \ + spin_lock_init(__pte_lockptr(_page)); \ +} while (0) +#define pte_lock_deinit(page) ((page)->mapping = NULL) +#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) +#else +/* + * We use mm->page_table_lock to guard all pagetable pages of the mm. + */ +#define pte_lock_init(page) do {} while (0) +#define pte_lock_deinit(page) do {} while (0) +#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ + #define pte_offset_map_lock(mm, pmd, address, ptlp) \ ({ \ - spinlock_t *__ptl = &(mm)->page_table_lock; \ + spinlock_t *__ptl = pte_lockptr(mm, pmd); \ pte_t *__pte = pte_offset_map(pmd, address); \ *(ptlp) = __ptl; \ spin_lock(__ptl); \ diff --git a/kernel/kexec.c b/kernel/kexec.c index 36c5d9cd4cc14b..2c95848fbce826 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -334,7 +334,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) if (pages) { unsigned int count, i; pages->mapping = NULL; - pages->private = order; + set_page_private(pages, order); count = 1 << order; for (i = 0; i < count; i++) SetPageReserved(pages + i); @@ -347,7 +347,7 @@ static void kimage_free_pages(struct page *page) { unsigned int order, count, i; - order = page->private; + order = page_private(page); count = 1 << order; for (i = 0; i < count; i++) ClearPageReserved(page + i); diff --git a/mm/Kconfig b/mm/Kconfig index 391ffc54d13675..f35a550ba4b92f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -111,3 +111,16 @@ config SPARSEMEM_STATIC config SPARSEMEM_EXTREME def_bool y depends on SPARSEMEM && !SPARSEMEM_STATIC + +# Heavily threaded applications may benefit from splitting the mm-wide +# page_table_lock, so that faults on different parts of the user address +# space can be handled with less contention: split it at this NR_CPUS. +# Default to 4 for wider testing, though 8 might be more appropriate. +# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. +# PA-RISC's debug spinlock_t is too large for the 32-bit struct page. +# +config SPLIT_PTLOCK_CPUS + int + default "4096" if ARM && !CPU_CACHE_VIPT + default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT + default "4" diff --git a/mm/filemap.c b/mm/filemap.c index 8aa344e8848997..f560b41c8f61b1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -152,7 +152,7 @@ static int sync_page(void *word) * in the ->sync_page() methods make essential use of the * page_mapping(), merely passing the page down to the backing * device's unplug functions when it's non-NULL, which in turn - * ignore it for all cases but swap, where only page->private is + * ignore it for all cases but swap, where only page_private(page) is * of interest. When page_mapping() does go NULL, the entire * call stack gracefully ignores the page and returns. * -- wli diff --git a/mm/memory.c b/mm/memory.c index 8461e2dd91d75f..e9ef599498b5ee 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -114,6 +114,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) { struct page *page = pmd_page(*pmd); pmd_clear(pmd); + pte_lock_deinit(page); pte_free_tlb(tlb, page); dec_page_state(nr_page_table_pages); tlb->mm->nr_ptes--; @@ -294,10 +295,12 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) if (!new) return -ENOMEM; + pte_lock_init(new); spin_lock(&mm->page_table_lock); - if (pmd_present(*pmd)) /* Another has populated it */ + if (pmd_present(*pmd)) { /* Another has populated it */ + pte_lock_deinit(new); pte_free(new); - else { + } else { mm->nr_ptes++; inc_page_state(nr_page_table_pages); pmd_populate(mm, pmd, new); @@ -432,7 +435,7 @@ again: if (!dst_pte) return -ENOMEM; src_pte = pte_offset_map_nested(src_pmd, addr); - src_ptl = &src_mm->page_table_lock; + src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock(src_ptl); do { @@ -1194,15 +1197,16 @@ EXPORT_SYMBOL(remap_pfn_range); * (but do_wp_page is only called after already making such a check; * and do_anonymous_page and do_no_page can safely check later on). */ -static inline int pte_unmap_same(struct mm_struct *mm, +static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, pte_t *page_table, pte_t orig_pte) { int same = 1; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) if (sizeof(pte_t) > sizeof(unsigned long)) { - spin_lock(&mm->page_table_lock); + spinlock_t *ptl = pte_lockptr(mm, pmd); + spin_lock(ptl); same = pte_same(*page_table, orig_pte); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); } #endif pte_unmap(page_table); @@ -1655,7 +1659,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_t pte; int ret = VM_FAULT_MINOR; - if (!pte_unmap_same(mm, page_table, orig_pte)) + if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) goto out; entry = pte_to_swp_entry(orig_pte); @@ -1773,7 +1777,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, page_cache_get(page); entry = mk_pte(page, vma->vm_page_prot); - ptl = &mm->page_table_lock; + ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (!pte_none(*page_table)) goto release; @@ -1934,7 +1938,7 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, pgoff_t pgoff; int err; - if (!pte_unmap_same(mm, page_table, orig_pte)) + if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) return VM_FAULT_MINOR; if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { @@ -1992,7 +1996,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, pte, pmd, write_access, entry); } - ptl = &mm->page_table_lock; + ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (unlikely(!pte_same(*pte, entry))) goto unlock; diff --git a/mm/mremap.c b/mm/mremap.c index 8de77b632a206b..b535438c363cfb 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -72,7 +72,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, struct address_space *mapping = NULL; struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; - spinlock_t *old_ptl; + spinlock_t *old_ptl, *new_ptl; if (vma->vm_file) { /* @@ -88,8 +88,15 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_vma->vm_truncate_count = 0; } + /* + * We don't have to worry about the ordering of src and dst + * pte locks because exclusive mmap_sem prevents deadlock. + */ old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl); new_pte = pte_offset_map_nested(new_pmd, new_addr); + new_ptl = pte_lockptr(mm, new_pmd); + if (new_ptl != old_ptl) + spin_lock(new_ptl); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { @@ -101,6 +108,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, set_pte_at(mm, new_addr, new_pte, pte); } + if (new_ptl != old_ptl) + spin_unlock(new_ptl); pte_unmap_nested(new_pte - 1); pte_unmap_unlock(old_pte - 1, old_ptl); if (mapping) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0541288ebf4b1a..a2995a5d012c7e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -154,7 +154,7 @@ static void prep_compound_page(struct page *page, unsigned long order) struct page *p = page + i; SetPageCompound(p); - p->private = (unsigned long)page; + set_page_private(p, (unsigned long)page); } } @@ -174,7 +174,7 @@ static void destroy_compound_page(struct page *page, unsigned long order) if (!PageCompound(p)) bad_page(__FUNCTION__, page); - if (p->private != (unsigned long)page) + if (page_private(p) != (unsigned long)page) bad_page(__FUNCTION__, page); ClearPageCompound(p); } @@ -187,18 +187,18 @@ static void destroy_compound_page(struct page *page, unsigned long order) * So, we don't need atomic page->flags operations here. */ static inline unsigned long page_order(struct page *page) { - return page->private; + return page_private(page); } static inline void set_page_order(struct page *page, int order) { - page->private = order; + set_page_private(page, order); __SetPagePrivate(page); } static inline void rmv_page_order(struct page *page) { __ClearPagePrivate(page); - page->private = 0; + set_page_private(page, 0); } /* @@ -238,7 +238,7 @@ __find_combined_index(unsigned long page_idx, unsigned int order) * (a) the buddy is free && * (b) the buddy is on the buddy system && * (c) a page and its buddy have the same order. - * for recording page's order, we use page->private and PG_private. + * for recording page's order, we use page_private(page) and PG_private. * */ static inline int page_is_buddy(struct page *page, int order) @@ -264,7 +264,7 @@ static inline int page_is_buddy(struct page *page, int order) * parts of the VM system. * At each level, we keep a list of pages, which are heads of continuous * free pages of length of (1 << order) and marked with PG_Private.Page's - * order is recorded in page->private field. + * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the * other. That is, if we allocate a small block, and both were * free, the remainder of the region must be split into blocks. @@ -463,7 +463,7 @@ static void prep_new_page(struct page *page, int order) page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked | 1 << PG_mappedtodisk); - page->private = 0; + set_page_private(page, 0); set_page_refs(page, order); kernel_map_pages(page, 1 << order, 1); } diff --git a/mm/page_io.c b/mm/page_io.c index 330e00d6db00d3..bb2b0d53889cbd 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -91,7 +91,8 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } - bio = get_swap_bio(GFP_NOIO, page->private, page, end_swap_bio_write); + bio = get_swap_bio(GFP_NOIO, page_private(page), page, + end_swap_bio_write); if (bio == NULL) { set_page_dirty(page); unlock_page(page); @@ -115,7 +116,8 @@ int swap_readpage(struct file *file, struct page *page) BUG_ON(!PageLocked(page)); ClearPageUptodate(page); - bio = get_swap_bio(GFP_KERNEL, page->private, page, end_swap_bio_read); + bio = get_swap_bio(GFP_KERNEL, page_private(page), page, + end_swap_bio_read); if (bio == NULL) { unlock_page(page); ret = -ENOMEM; diff --git a/mm/rmap.c b/mm/rmap.c index a84bdfe582c0c7..a33e779d1bd887 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -274,7 +274,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, return NULL; } - ptl = &mm->page_table_lock; + ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { *ptlp = ptl; @@ -550,7 +550,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) update_hiwater_rss(mm); if (PageAnon(page)) { - swp_entry_t entry = { .val = page->private }; + swp_entry_t entry = { .val = page_private(page) }; /* * Store the swap location in the pte. * See handle_pte_fault() ... diff --git a/mm/shmem.c b/mm/shmem.c index 37777f4c11f860..dc25565a61e95c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -71,9 +71,6 @@ /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 -/* Keep swapped page count in private field of indirect struct page */ -#define nr_swapped private - /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ enum sgp_type { SGP_QUICK, /* don't try more than file page cache lookup */ @@ -324,8 +321,10 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns entry->val = value; info->swapped += incdec; - if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) - kmap_atomic_to_page(entry)->nr_swapped += incdec; + if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { + struct page *page = kmap_atomic_to_page(entry); + set_page_private(page, page_private(page) + incdec); + } } /* @@ -368,9 +367,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long spin_unlock(&info->lock); page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); - if (page) { - page->nr_swapped = 0; - } + if (page) + set_page_private(page, 0); spin_lock(&info->lock); if (!page) { @@ -561,7 +559,7 @@ static void shmem_truncate(struct inode *inode) diroff = 0; } subdir = dir[diroff]; - if (subdir && subdir->nr_swapped) { + if (subdir && page_private(subdir)) { size = limit - idx; if (size > ENTRIES_PER_PAGE) size = ENTRIES_PER_PAGE; @@ -572,10 +570,10 @@ static void shmem_truncate(struct inode *inode) nr_swaps_freed += freed; if (offset) spin_lock(&info->lock); - subdir->nr_swapped -= freed; + set_page_private(subdir, page_private(subdir) - freed); if (offset) spin_unlock(&info->lock); - BUG_ON(subdir->nr_swapped > offset); + BUG_ON(page_private(subdir) > offset); } if (offset) offset = 0; @@ -743,7 +741,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s dir = shmem_dir_map(subdir); } subdir = *dir; - if (subdir && subdir->nr_swapped) { + if (subdir && page_private(subdir)) { ptr = shmem_swp_map(subdir); size = limit - idx; if (size > ENTRIES_PER_PAGE) diff --git a/mm/swap.c b/mm/swap.c index 21d15f99805c96..b89512877ec264 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -39,7 +39,7 @@ int page_cluster; void put_page(struct page *page) { if (unlikely(PageCompound(page))) { - page = (struct page *)page->private; + page = (struct page *)page_private(page); if (put_page_testzero(page)) { void (*dtor)(struct page *page); diff --git a/mm/swap_state.c b/mm/swap_state.c index 132164f7d0a758..cafc1edcbeba2c 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -83,7 +83,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry, page_cache_get(page); SetPageLocked(page); SetPageSwapCache(page); - page->private = entry.val; + set_page_private(page, entry.val); total_swapcache_pages++; pagecache_acct(1); } @@ -126,8 +126,8 @@ void __delete_from_swap_cache(struct page *page) BUG_ON(PageWriteback(page)); BUG_ON(PagePrivate(page)); - radix_tree_delete(&swapper_space.page_tree, page->private); - page->private = 0; + radix_tree_delete(&swapper_space.page_tree, page_private(page)); + set_page_private(page, 0); ClearPageSwapCache(page); total_swapcache_pages--; pagecache_acct(-1); @@ -197,7 +197,7 @@ void delete_from_swap_cache(struct page *page) { swp_entry_t entry; - entry.val = page->private; + entry.val = page_private(page); write_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 510f0039b00030..8970c0b74194f4 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -61,7 +61,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) swp_entry_t entry; down_read(&swap_unplug_sem); - entry.val = page->private; + entry.val = page_private(page); if (PageSwapCache(page)) { struct block_device *bdev = swap_info[swp_type(entry)].bdev; struct backing_dev_info *bdi; @@ -69,8 +69,8 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) /* * If the page is removed from swapcache from under us (with a * racy try_to_unuse/swapoff) we need an additional reference - * count to avoid reading garbage from page->private above. If - * the WARN_ON triggers during a swapoff it maybe the race + * count to avoid reading garbage from page_private(page) above. + * If the WARN_ON triggers during a swapoff it maybe the race * condition and it's harmless. However if it triggers without * swapoff it signals a problem. */ @@ -294,7 +294,7 @@ static inline int page_swapcount(struct page *page) struct swap_info_struct *p; swp_entry_t entry; - entry.val = page->private; + entry.val = page_private(page); p = swap_info_get(entry); if (p) { /* Subtract the 1 for the swap cache itself */ @@ -339,7 +339,7 @@ int remove_exclusive_swap_page(struct page *page) if (page_count(page) != 2) /* 2: us + cache */ return 0; - entry.val = page->private; + entry.val = page_private(page); p = swap_info_get(entry); if (!p) return 0; @@ -1042,7 +1042,7 @@ int page_queue_congested(struct page *page) BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */ if (PageSwapCache(page)) { - swp_entry_t entry = { .val = page->private }; + swp_entry_t entry = { .val = page_private(page) }; struct swap_info_struct *sis; sis = get_swap_info_struct(swp_type(entry)); diff --git a/mm/vmscan.c b/mm/vmscan.c index 41d1064aabfb94..135bf8ca96ee60 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -521,7 +521,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) #ifdef CONFIG_SWAP if (PageSwapCache(page)) { - swp_entry_t swap = { .val = page->private }; + swp_entry_t swap = { .val = page_private(page) }; __delete_from_swap_cache(page); write_unlock_irq(&mapping->tree_lock); swap_free(swap); -- cgit 1.2.3-korg