diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids 2.4.23pre7/include/linux/mm.h race/include/linux/mm.h --- 2.4.23pre7/include/linux/mm.h 2003-10-10 08:08:27.000000000 +0200 +++ race/include/linux/mm.h 2003-10-11 17:32:02.000000000 +0200 @@ -322,11 +322,9 @@ typedef struct page { #define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) #define PageChecked(page) test_bit(PG_checked, &(page)->flags) #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) -#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) #define PageLaunder(page) test_bit(PG_launder, &(page)->flags) #define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) #define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags) -#define ClearPageArch1(page) clear_bit(PG_arch_1, &(page)->flags) /* * The zone field is never updated after free_area_init_core() diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids 2.4.23pre7/include/linux/pagemap.h race/include/linux/pagemap.h --- 2.4.23pre7/include/linux/pagemap.h 2003-10-10 08:08:28.000000000 +0200 +++ race/include/linux/pagemap.h 2003-10-12 14:42:47.000000000 +0200 @@ -85,7 +85,6 @@ extern void FASTCALL(unlock_page(struct __find_lock_page(mapping, index, page_hash(mapping, index)) extern struct page *find_trylock_page(struct address_space *, unsigned long); -extern void add_to_page_cache(struct page * page, struct address_space *mapping, unsigned long index); extern void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index); extern int add_to_page_cache_unique(struct page * page, struct address_space *mapping, unsigned long index, struct page **hash); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids 2.4.23pre7/include/linux/swap.h race/include/linux/swap.h --- 2.4.23pre7/include/linux/swap.h 2003-10-10 08:08:29.000000000 +0200 +++ race/include/linux/swap.h 2003-10-12 15:02:08.000000000 +0200 @@ -103,6 +103,7 @@ struct sysinfo; struct zone_t; /* linux/mm/swap.c */ +extern void FASTCALL(__lru_cache_add(struct page *)); extern void FASTCALL(lru_cache_add(struct page *)); extern void FASTCALL(__lru_cache_del(struct page *)); extern void FASTCALL(lru_cache_del(struct page *)); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids 2.4.23pre7/mm/filemap.c race/mm/filemap.c --- 2.4.23pre7/mm/filemap.c 2003-10-10 08:08:32.000000000 +0200 +++ race/mm/filemap.c 2003-10-12 15:06:36.000000000 +0200 @@ -656,33 +656,16 @@ static inline void __add_to_page_cache(s struct address_space *mapping, unsigned long offset, struct page **hash) { - /* - * Yes this is inefficient, however it is needed. The problem - * is that we could be adding a page to the swap cache while - * another CPU is also modifying page->flags, so the updates - * really do need to be atomic. -- Rik - */ - ClearPageUptodate(page); - ClearPageError(page); - ClearPageDirty(page); - ClearPageReferenced(page); - ClearPageArch1(page); - ClearPageChecked(page); - LockPage(page); + unsigned long flags; + + flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_dirty | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked); + page->flags = flags | (1 << PG_locked); page_cache_get(page); page->index = offset; add_page_to_inode_queue(mapping, page); add_page_to_hash_queue(page, hash); } -void add_to_page_cache(struct page * page, struct address_space * mapping, unsigned long offset) -{ - spin_lock(&pagecache_lock); - __add_to_page_cache(page, mapping, offset, page_hash(mapping, offset)); - spin_unlock(&pagecache_lock); - lru_cache_add(page); -} - int add_to_page_cache_unique(struct page * page, struct address_space *mapping, unsigned long offset, struct page **hash) @@ -690,6 +673,23 @@ int add_to_page_cache_unique(struct page int err; struct page *alias; + /* + * This function is the only pagecache entry point that + * is allowed to deal with pages outside the pagecache/swapcache, + * but that might be already queued in the VM lru lists + * (one example is the anonymous ram). + * + * For this reason here we have to execute the + * __add_to_page_cache under the pagemap_lru_lock too, + * to avoid VM lru operations like activate_page to + * race with the page->flags clearing in __add_to_page_cache + * or whatever else similar race condition. It's just + * much safer, simpler and more performant (more performant for + * the other common pagecache operations), to avoid the race, + * rather than trying to control it to avoid damages when + * it triggers. + */ + spin_lock(&pagemap_lru_lock); spin_lock(&pagecache_lock); alias = __find_page_nolock(mapping, offset, *hash); @@ -701,7 +701,8 @@ int add_to_page_cache_unique(struct page spin_unlock(&pagecache_lock); if (!err) - lru_cache_add(page); + __lru_cache_add(page); + spin_unlock(&pagemap_lru_lock); return err; } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids 2.4.23pre7/mm/page_alloc.c race/mm/page_alloc.c --- 2.4.23pre7/mm/page_alloc.c 2003-10-10 08:08:32.000000000 +0200 +++ race/mm/page_alloc.c 2003-10-11 17:32:02.000000000 +0200 @@ -109,8 +109,7 @@ static void __free_pages_ok (struct page BUG(); if (PageActive(page)) BUG(); - ClearPageReferenced(page); - ClearPageDirty(page); + page->flags &= ~((1<flags & PF_FREE_PAGES) goto local_freelist; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids 2.4.23pre7/mm/swap.c race/mm/swap.c --- 2.4.23pre7/mm/swap.c 2003-10-10 08:08:33.000000000 +0200 +++ race/mm/swap.c 2003-10-12 15:02:54.000000000 +0200 @@ -54,6 +54,19 @@ void activate_page(struct page * page) /** * lru_cache_add: add a page to the page lists * @page: the page to add + * + * This function is for when the caller already holds + * the pagemap_lru_lock. + */ +void __lru_cache_add(struct page * page) +{ + if (!PageLRU(page) && !TestSetPageLRU(page)) + add_page_to_inactive_list(page); +} + +/** + * lru_cache_add: add a page to the page lists + * @page: the page to add */ void lru_cache_add(struct page * page) {