From: Martin Schwidefsky this is another s/390 related mm patch. It introduces the concept of physical dirty and referenced bits into the common mm code. I always had the nagging feeling that the pte functions for setting/clearing the dirty and referenced bits are not appropriate for s/390. It works but it is a bit of a hack. After the wake of rmap it is now possible to put a much better solution into place. The idea is simple: since there are not dirty/referenced bits in the pte make these function nops on s/390 and add operations on the physical page to the appropriate places. For the referenced bit this is the page_referenced() function. For the dirty bit there are two relevant spots: in page_remove_rmap after the last user of the page removed its reverse mapping and in try_to_unmap after the last user was unmapped. There are two new functions to accomplish this: * page_test_and_clear_dirty: Test and clear the dirty bit of a physical page. This function is analog to ptep_test_and_clear_dirty but gets a struct page as argument instead of a pte_t pointer. * page_test_and_clear_young: Test and clear the referenced bit of a physical page. This function is analog to ptep_test_and_clear_young but gets a struct page as argument instead of a pte_t pointer. Its pretty straightforward and with it the s/390 mm makes much more sense. You'll need the tls flush optimization patch for the patch. Comments ? --- 25-akpm/include/asm-generic/pgtable.h | 8 ++ 25-akpm/include/asm-s390/pgtable.h | 96 ++++++++++++++++++++-------------- 25-akpm/mm/msync.c | 17 ++---- 25-akpm/mm/rmap.c | 13 ++++ 4 files changed, 85 insertions(+), 49 deletions(-) diff -puN include/asm-generic/pgtable.h~s390-12-dirty-referenced-bits include/asm-generic/pgtable.h --- 25/include/asm-generic/pgtable.h~s390-12-dirty-referenced-bits Thu Jan 8 14:11:41 2004 +++ 25-akpm/include/asm-generic/pgtable.h Thu Jan 8 14:11:41 2004 @@ -97,4 +97,12 @@ static inline void ptep_mkdirty(pte_t *p #define pte_same(A,B) (pte_val(A) == pte_val(B)) #endif +#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +#define page_test_and_clear_dirty(page) (0) +#endif + +#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG +#define page_test_and_clear_young(page) (0) +#endif + #endif /* _ASM_GENERIC_PGTABLE_H */ diff -puN include/asm-s390/pgtable.h~s390-12-dirty-referenced-bits include/asm-s390/pgtable.h --- 25/include/asm-s390/pgtable.h~s390-12-dirty-referenced-bits Thu Jan 8 14:11:41 2004 +++ 25-akpm/include/asm-s390/pgtable.h Thu Jan 8 14:11:41 2004 @@ -213,9 +213,6 @@ extern char empty_zero_page[PAGE_SIZE]; #define _PAGE_RO 0x200 /* HW read-only */ #define _PAGE_INVALID 0x400 /* HW invalid */ -/* Software bits in the page table entry */ -#define _PAGE_ISCLEAN 0x002 - /* Mask and four different kinds of invalid pages. */ #define _PAGE_INVALID_MASK 0x601 #define _PAGE_INVALID_EMPTY 0x400 @@ -283,12 +280,12 @@ extern char empty_zero_page[PAGE_SIZE]; * No mapping available */ #define PAGE_NONE_SHARED __pgprot(_PAGE_INVALID_NONE) -#define PAGE_NONE_PRIVATE __pgprot(_PAGE_INVALID_NONE|_PAGE_ISCLEAN) +#define PAGE_NONE_PRIVATE __pgprot(_PAGE_INVALID_NONE) #define PAGE_RO_SHARED __pgprot(_PAGE_RO) -#define PAGE_RO_PRIVATE __pgprot(_PAGE_RO|_PAGE_ISCLEAN) -#define PAGE_COPY __pgprot(_PAGE_RO|_PAGE_ISCLEAN) +#define PAGE_RO_PRIVATE __pgprot(_PAGE_RO) +#define PAGE_COPY __pgprot(_PAGE_RO) #define PAGE_SHARED __pgprot(0) -#define PAGE_KERNEL __pgprot(_PAGE_ISCLEAN) +#define PAGE_KERNEL __pgprot(0) /* * The S390 can't do page protection for execute, and considers that the @@ -403,20 +400,20 @@ extern inline int pte_write(pte_t pte) extern inline int pte_dirty(pte_t pte) { - int skey; - - if (pte_val(pte) & _PAGE_ISCLEAN) - return 0; - asm volatile ("iske %0,%1" : "=d" (skey) : "a" (pte_val(pte))); - return skey & _PAGE_CHANGED; + /* A pte is neither clean nor dirty on s/390. The dirty bit + * is in the storage key. See page_test_and_clear_dirty for + * details. + */ + return 0; } extern inline int pte_young(pte_t pte) { - int skey; - - asm volatile ("iske %0,%1" : "=d" (skey) : "a" (pte_val(pte))); - return skey & _PAGE_REFERENCED; + /* A pte is neither young nor old on s/390. The young bit + * is in the storage key. See page_test_and_clear_young for + * details. + */ + return 0; } /* @@ -461,8 +458,8 @@ extern inline void pte_clear(pte_t *ptep */ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pte_val(pte) &= PAGE_MASK | _PAGE_ISCLEAN; - pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_ISCLEAN; + pte_val(pte) &= PAGE_MASK; + pte_val(pte) |= pgprot_val(newprot); return pte; } @@ -476,7 +473,7 @@ extern inline pte_t pte_wrprotect(pte_t extern inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) &= ~(_PAGE_RO | _PAGE_ISCLEAN); + pte_val(pte) &= ~_PAGE_RO; return pte; } @@ -516,13 +513,7 @@ extern inline pte_t pte_mkyoung(pte_t pt static inline int ptep_test_and_clear_young(pte_t *ptep) { - int ccode; - - asm volatile ("rrbe 0,%1\n\t" - "ipm %0\n\t" - "srl %0,28\n\t" - : "=d" (ccode) : "a" (pte_val(*ptep)) : "cc" ); - return ccode & 2; + return 0; } static inline int @@ -535,18 +526,7 @@ ptep_clear_flush_young(struct vm_area_st static inline int ptep_test_and_clear_dirty(pte_t *ptep) { - int skey; - - if (pte_val(*ptep) & _PAGE_ISCLEAN) - return 0; - asm volatile ("iske %0,%1" : "=d" (skey) : "a" (*ptep)); - if ((skey & _PAGE_CHANGED) == 0) - return 0; - /* We can't clear the changed bit atomically. For now we - * clear (!) the page referenced bit. */ - asm volatile ("sske %0,%1" - : : "d" (0), "a" (*ptep)); - return 1; + return 0; } static inline int @@ -603,6 +583,42 @@ ptep_establish(struct vm_area_struct *vm } /* + * Test and clear dirty bit in storage key. + * We can't clear the changed bit atomically. This is a potential + * race against modification of the referenced bit. This function + * should therefore only be called if it is not mapped in any + * address space. + */ +#define page_test_and_clear_dirty(page) \ +({ \ + struct page *__page = (page); \ + unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT); \ + int __skey; \ + asm volatile ("iske %0,%1" : "=d" (__skey) : "a" (__physpage)); \ + if (__skey & _PAGE_CHANGED) { \ + asm volatile ("sske %0,%1" \ + : : "d" (__skey & ~_PAGE_CHANGED), \ + "a" (__physpage)); \ + } \ + (__skey & _PAGE_CHANGED); \ +}) + +/* + * Test and clear referenced bit in storage key. + */ +#define page_test_and_clear_young(page) \ +({ \ + struct page *__page = (page); \ + unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT); \ + int __ccode; \ + asm volatile ("rrbe 0,%1\n\t" \ + "ipm %0\n\t" \ + "srl %0,28\n\t" \ + : "=d" (__ccode) : "a" (__physpage) : "cc" ); \ + (__ccode & 2); \ +}) + +/* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ @@ -782,6 +798,8 @@ typedef pte_t *pte_addr_t; #define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTEP_MKDIRTY #define __HAVE_ARCH_PTE_SAME +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG #include #endif /* _S390_PAGE_H */ diff -puN mm/msync.c~s390-12-dirty-referenced-bits mm/msync.c --- 25/mm/msync.c~s390-12-dirty-referenced-bits Thu Jan 8 14:11:41 2004 +++ 25-akpm/mm/msync.c Thu Jan 8 14:11:41 2004 @@ -24,16 +24,15 @@ static int filemap_sync_pte(pte_t *ptep, unsigned long address, unsigned int flags) { pte_t pte = *ptep; + unsigned long pfn = pte_pfn(pte); + struct page *page; - if (pte_present(pte) && pte_dirty(pte)) { - struct page *page; - unsigned long pfn = pte_pfn(pte); - if (pfn_valid(pfn)) { - page = pfn_to_page(pfn); - if (!PageReserved(page) && - ptep_clear_flush_dirty(vma, address, ptep)) - set_page_dirty(page); - } + if (pte_present(pte) && pfn_valid(pfn)) { + page = pfn_to_page(pfn); + if (!PageReserved(page) && + (ptep_clear_flush_dirty(vma, address, ptep) || + page_test_and_clear_dirty(page))) + set_page_dirty(page); } return 0; } diff -puN mm/rmap.c~s390-12-dirty-referenced-bits mm/rmap.c --- 25/mm/rmap.c~s390-12-dirty-referenced-bits Thu Jan 8 14:11:41 2004 +++ 25-akpm/mm/rmap.c Thu Jan 8 14:11:41 2004 @@ -117,6 +117,9 @@ int page_referenced(struct page * page) struct pte_chain *pc; int referenced = 0; + if (page_test_and_clear_young(page)) + mark_page_accessed(page); + if (TestClearPageReferenced(page)) referenced++; @@ -271,6 +274,8 @@ void page_remove_rmap(struct page *page, } } out: + if (page->pte.direct == 0 && page_test_and_clear_dirty(page)) + set_page_dirty(page); if (!page_mapped(page)) dec_page_state(nr_mapped); out_unlock: @@ -360,7 +365,6 @@ static int try_to_unmap_one(struct page set_page_dirty(page); mm->rss--; - page_cache_release(page); ret = SWAP_SUCCESS; out_unlock: @@ -399,6 +403,9 @@ int try_to_unmap(struct page * page) if (PageDirect(page)) { ret = try_to_unmap_one(page, page->pte.direct); if (ret == SWAP_SUCCESS) { + if (page_test_and_clear_dirty(page)) + set_page_dirty(page); + page_cache_release(page); page->pte.direct = 0; ClearPageDirect(page); } @@ -439,6 +446,10 @@ int try_to_unmap(struct page * page) } else { start->next_and_idx++; } + if (page->pte.direct == 0 && + page_test_and_clear_dirty(page)) + set_page_dirty(page); + page_cache_release(page); break; case SWAP_AGAIN: /* Skip this pte, remembering status. */ _