From: Martin Schwidefsky <schwidefsky@de.ibm.com>

this is another s/390 related mm patch. It introduces the concept of
physical dirty and referenced bits into the common mm code. I always
had the nagging feeling that the pte functions for setting/clearing
the dirty and referenced bits are not appropriate for s/390. It works
but it is a bit of a hack. 
After the wake of rmap it is now possible to put a much better solution
into place. The idea is simple: since there are not dirty/referenced
bits in the pte make these function nops on s/390 and add operations
on the physical page to the appropriate places. For the referenced bit
this is the page_referenced() function. For the dirty bit there are
two relevant spots: in page_remove_rmap after the last user of the
page removed its reverse mapping and in try_to_unmap after the last
user was unmapped. There are two new functions to accomplish this:

 * page_test_and_clear_dirty: Test and clear the dirty bit of a
   physical page. This function is analog to ptep_test_and_clear_dirty
   but gets a struct page as argument instead of a pte_t pointer.

 * page_test_and_clear_young: Test and clear the referenced bit
   of a physical page. This function is analog to ptep_test_and_clear_young
   but gets a struct page as argument instead of a pte_t pointer.

Its pretty straightforward and with it the s/390 mm makes much more
sense. You'll need the tls flush optimization patch for the patch.
Comments ?



---

 25-akpm/include/asm-generic/pgtable.h |    8 ++
 25-akpm/include/asm-s390/pgtable.h    |   96 ++++++++++++++++++++--------------
 25-akpm/mm/msync.c                    |   17 ++----
 25-akpm/mm/rmap.c                     |   13 ++++
 4 files changed, 85 insertions(+), 49 deletions(-)

diff -puN include/asm-generic/pgtable.h~s390-12-dirty-referenced-bits include/asm-generic/pgtable.h
--- 25/include/asm-generic/pgtable.h~s390-12-dirty-referenced-bits	Thu Jan  8 14:11:41 2004
+++ 25-akpm/include/asm-generic/pgtable.h	Thu Jan  8 14:11:41 2004
@@ -97,4 +97,12 @@ static inline void ptep_mkdirty(pte_t *p
 #define pte_same(A,B)	(pte_val(A) == pte_val(B))
 #endif
 
+#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
+#define page_test_and_clear_dirty(page) (0)
+#endif
+
+#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
+#define page_test_and_clear_young(page) (0)
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff -puN include/asm-s390/pgtable.h~s390-12-dirty-referenced-bits include/asm-s390/pgtable.h
--- 25/include/asm-s390/pgtable.h~s390-12-dirty-referenced-bits	Thu Jan  8 14:11:41 2004
+++ 25-akpm/include/asm-s390/pgtable.h	Thu Jan  8 14:11:41 2004
@@ -213,9 +213,6 @@ extern char empty_zero_page[PAGE_SIZE];
 #define _PAGE_RO        0x200          /* HW read-only                     */
 #define _PAGE_INVALID   0x400          /* HW invalid                       */
 
-/* Software bits in the page table entry */
-#define _PAGE_ISCLEAN   0x002
-
 /* Mask and four different kinds of invalid pages. */
 #define _PAGE_INVALID_MASK	0x601
 #define _PAGE_INVALID_EMPTY	0x400
@@ -283,12 +280,12 @@ extern char empty_zero_page[PAGE_SIZE];
  * No mapping available
  */
 #define PAGE_NONE_SHARED  __pgprot(_PAGE_INVALID_NONE)
-#define PAGE_NONE_PRIVATE __pgprot(_PAGE_INVALID_NONE|_PAGE_ISCLEAN)
+#define PAGE_NONE_PRIVATE __pgprot(_PAGE_INVALID_NONE)
 #define PAGE_RO_SHARED	  __pgprot(_PAGE_RO)
-#define PAGE_RO_PRIVATE	  __pgprot(_PAGE_RO|_PAGE_ISCLEAN)
-#define PAGE_COPY	  __pgprot(_PAGE_RO|_PAGE_ISCLEAN)
+#define PAGE_RO_PRIVATE	  __pgprot(_PAGE_RO)
+#define PAGE_COPY	  __pgprot(_PAGE_RO)
 #define PAGE_SHARED	  __pgprot(0)
-#define PAGE_KERNEL	  __pgprot(_PAGE_ISCLEAN)
+#define PAGE_KERNEL	  __pgprot(0)
 
 /*
  * The S390 can't do page protection for execute, and considers that the
@@ -403,20 +400,20 @@ extern inline int pte_write(pte_t pte)
 
 extern inline int pte_dirty(pte_t pte)
 {
-	int skey;
-
-	if (pte_val(pte) & _PAGE_ISCLEAN)
-		return 0;
-	asm volatile ("iske %0,%1" : "=d" (skey) : "a" (pte_val(pte)));
-	return skey & _PAGE_CHANGED;
+	/* A pte is neither clean nor dirty on s/390. The dirty bit
+	 * is in the storage key. See page_test_and_clear_dirty for
+	 * details.
+	 */
+	return 0;
 }
 
 extern inline int pte_young(pte_t pte)
 {
-	int skey;
-
-	asm volatile ("iske %0,%1" : "=d" (skey) : "a" (pte_val(pte)));
-	return skey & _PAGE_REFERENCED;
+	/* A pte is neither young nor old on s/390. The young bit
+	 * is in the storage key. See page_test_and_clear_young for
+	 * details.
+	 */
+	return 0;
 }
 
 /*
@@ -461,8 +458,8 @@ extern inline void pte_clear(pte_t *ptep
  */
 extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pte_val(pte) &= PAGE_MASK | _PAGE_ISCLEAN;
-	pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_ISCLEAN;
+	pte_val(pte) &= PAGE_MASK;
+	pte_val(pte) |= pgprot_val(newprot);
 	return pte;
 }
 
@@ -476,7 +473,7 @@ extern inline pte_t pte_wrprotect(pte_t 
 
 extern inline pte_t pte_mkwrite(pte_t pte) 
 {
-	pte_val(pte) &= ~(_PAGE_RO | _PAGE_ISCLEAN);
+	pte_val(pte) &= ~_PAGE_RO;
 	return pte;
 }
 
@@ -516,13 +513,7 @@ extern inline pte_t pte_mkyoung(pte_t pt
 
 static inline int ptep_test_and_clear_young(pte_t *ptep)
 {
-	int ccode;
-
-	asm volatile ("rrbe 0,%1\n\t"
-		      "ipm  %0\n\t"
-		      "srl  %0,28\n\t" 
-                      : "=d" (ccode) : "a" (pte_val(*ptep)) : "cc" );
-	return ccode & 2;
+	return 0;
 }
 
 static inline int
@@ -535,18 +526,7 @@ ptep_clear_flush_young(struct vm_area_st
 
 static inline int ptep_test_and_clear_dirty(pte_t *ptep)
 {
-	int skey;
-
-	if (pte_val(*ptep) & _PAGE_ISCLEAN)
-		return 0;
-	asm volatile ("iske %0,%1" : "=d" (skey) : "a" (*ptep));
-	if ((skey & _PAGE_CHANGED) == 0)
-		return 0;
-	/* We can't clear the changed bit atomically. For now we
-         * clear (!) the page referenced bit. */
-	asm volatile ("sske %0,%1" 
-	              : : "d" (0), "a" (*ptep));
-	return 1;
+	return 0;
 }
 
 static inline int
@@ -603,6 +583,42 @@ ptep_establish(struct vm_area_struct *vm
 }
 
 /*
+ * Test and clear dirty bit in storage key.
+ * We can't clear the changed bit atomically. This is a potential
+ * race against modification of the referenced bit. This function
+ * should therefore only be called if it is not mapped in any
+ * address space.
+ */
+#define page_test_and_clear_dirty(page)					  \
+({									  \
+	struct page *__page = (page);					  \
+	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);  \
+	int __skey;							  \
+	asm volatile ("iske %0,%1" : "=d" (__skey) : "a" (__physpage));   \
+	if (__skey & _PAGE_CHANGED) {					  \
+		asm volatile ("sske %0,%1"				  \
+			      : : "d" (__skey & ~_PAGE_CHANGED),	  \
+			          "a" (__physpage));			  \
+	}								  \
+	(__skey & _PAGE_CHANGED);					  \
+})
+
+/*
+ * Test and clear referenced bit in storage key.
+ */
+#define page_test_and_clear_young(page)					  \
+({									  \
+	struct page *__page = (page);					  \
+	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);  \
+	int __ccode;							  \
+	asm volatile ("rrbe 0,%1\n\t"					  \
+		      "ipm  %0\n\t"					  \
+		      "srl  %0,28\n\t" 					  \
+                      : "=d" (__ccode) : "a" (__physpage) : "cc" );	  \
+	(__ccode & 2);							  \
+})
+
+/*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  */
@@ -782,6 +798,8 @@ typedef pte_t *pte_addr_t;
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 #define __HAVE_ARCH_PTEP_MKDIRTY
 #define __HAVE_ARCH_PTE_SAME
+#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
+#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
 #include <asm-generic/pgtable.h>
 
 #endif /* _S390_PAGE_H */
diff -puN mm/msync.c~s390-12-dirty-referenced-bits mm/msync.c
--- 25/mm/msync.c~s390-12-dirty-referenced-bits	Thu Jan  8 14:11:41 2004
+++ 25-akpm/mm/msync.c	Thu Jan  8 14:11:41 2004
@@ -24,16 +24,15 @@ static int filemap_sync_pte(pte_t *ptep,
 	unsigned long address, unsigned int flags)
 {
 	pte_t pte = *ptep;
+	unsigned long pfn = pte_pfn(pte);
+	struct page *page;
 
-	if (pte_present(pte) && pte_dirty(pte)) {
-		struct page *page;
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			page = pfn_to_page(pfn);
-			if (!PageReserved(page) &&
-			    ptep_clear_flush_dirty(vma, address, ptep))
-				set_page_dirty(page);
-		}
+	if (pte_present(pte) && pfn_valid(pfn)) {
+		page = pfn_to_page(pfn);
+		if (!PageReserved(page) &&
+		    (ptep_clear_flush_dirty(vma, address, ptep) ||
+		     page_test_and_clear_dirty(page)))
+			set_page_dirty(page);
 	}
 	return 0;
 }
diff -puN mm/rmap.c~s390-12-dirty-referenced-bits mm/rmap.c
--- 25/mm/rmap.c~s390-12-dirty-referenced-bits	Thu Jan  8 14:11:41 2004
+++ 25-akpm/mm/rmap.c	Thu Jan  8 14:11:41 2004
@@ -117,6 +117,9 @@ int page_referenced(struct page * page)
 	struct pte_chain *pc;
 	int referenced = 0;
 
+	if (page_test_and_clear_young(page))
+		mark_page_accessed(page);
+
 	if (TestClearPageReferenced(page))
 		referenced++;
 
@@ -271,6 +274,8 @@ void page_remove_rmap(struct page *page,
 		}
 	}
 out:
+	if (page->pte.direct == 0 && page_test_and_clear_dirty(page))
+		set_page_dirty(page);
 	if (!page_mapped(page))
 		dec_page_state(nr_mapped);
 out_unlock:
@@ -360,7 +365,6 @@ static int try_to_unmap_one(struct page 
 		set_page_dirty(page);
 
 	mm->rss--;
-	page_cache_release(page);
 	ret = SWAP_SUCCESS;
 
 out_unlock:
@@ -399,6 +403,9 @@ int try_to_unmap(struct page * page)
 	if (PageDirect(page)) {
 		ret = try_to_unmap_one(page, page->pte.direct);
 		if (ret == SWAP_SUCCESS) {
+			if (page_test_and_clear_dirty(page))
+				set_page_dirty(page);
+			page_cache_release(page);
 			page->pte.direct = 0;
 			ClearPageDirect(page);
 		}
@@ -439,6 +446,10 @@ int try_to_unmap(struct page * page)
 				} else {
 					start->next_and_idx++;
 				}
+				if (page->pte.direct == 0 &&
+				    page_test_and_clear_dirty(page))
+					set_page_dirty(page);
+				page_cache_release(page);
 				break;
 			case SWAP_AGAIN:
 				/* Skip this pte, remembering status. */

_