diff -urN linux-2.4.17-rmap-virgin/fs/proc/array.c linux-2.4.17-rmap/fs/proc/array.c --- linux-2.4.17-rmap-virgin/fs/proc/array.c Thu Oct 11 09:00:01 2001 +++ linux-2.4.17-rmap/fs/proc/array.c Fri Dec 7 03:09:09 2001 @@ -392,82 +392,13 @@ mmput(mm); return res; } - -static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, - int * pages, int * shared, int * dirty, int * total) -{ - pte_t * pte; - unsigned long end; - - if (pmd_none(*pmd)) - return; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - return; - } - pte = pte_offset(pmd, address); - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - pte_t page = *pte; - struct page *ptpage; - - address += PAGE_SIZE; - pte++; - if (pte_none(page)) - continue; - ++*total; - if (!pte_present(page)) - continue; - ptpage = pte_page(page); - if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) - continue; - ++*pages; - if (pte_dirty(page)) - ++*dirty; - if (page_count(pte_page(page)) > 1) - ++*shared; - } while (address < end); -} - -static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size, - int * pages, int * shared, int * dirty, int * total) -{ - pmd_t * pmd; - unsigned long end; - - if (pgd_none(*pgd)) - return; - if (pgd_bad(*pgd)) { - pgd_ERROR(*pgd); - pgd_clear(pgd); - return; - } - pmd = pmd_offset(pgd, address); - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - do { - statm_pte_range(pmd, address, end - address, pages, shared, dirty, total); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); -} - -static void statm_pgd_range(pgd_t * pgd, unsigned long address, unsigned long end, - int * pages, int * shared, int * dirty, int * total) -{ - while (address < end) { - statm_pmd_range(pgd, address, end - address, pages, shared, dirty, total); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - pgd++; - } -} +/* + * This thing is retarded so I've ripped out the page table scanning. + * The VMA scanning is slow enough. + * The variable names in this thing are a total piece of shit. + * Someone really blew a goat when they wrote this idiot thing. + */ int proc_pid_statm(struct task_struct *task, char * buffer) { struct mm_struct *mm; @@ -482,23 +413,24 @@ struct vm_area_struct * vma; down_read(&mm->mmap_sem); vma = mm->mmap; + resident = mm->rss; + size = mm->total_vm; while (vma) { - pgd_t *pgd = pgd_offset(mm, vma->vm_start); - int pages = 0, shared = 0, dirty = 0, total = 0; + int pages, total; + + total = vma->vm_end - vma->vm_start; + pages = total >> PAGE_SHIFT; + + if (vma->vm_flags & VM_SHARED) + share += total; - statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total); - resident += pages; - share += shared; - dt += dirty; - size += total; - if (vma->vm_flags & VM_EXECUTABLE) - trs += pages; /* text */ - else if (vma->vm_flags & VM_GROWSDOWN) - drs += pages; /* stack */ - else if (vma->vm_end > 0x60000000) - lrs += pages; /* library */ - else - drs += pages; + if (vma->vm_flags & VM_EXECUTABLE) { + if(vma->vm_end > TASK_UNMAPPED_BASE) + lrs += pages; /* library */ + else + trs += pages; /* text */ + } else + drs += pages; /* stack and data */ vma = vma->vm_next; } up_read(&mm->mmap_sem); diff -urN linux-2.4.17-rmap-virgin/fs/proc/proc_misc.c linux-2.4.17-rmap/fs/proc/proc_misc.c --- linux-2.4.17-rmap-virgin/fs/proc/proc_misc.c Thu Dec 6 11:31:04 2001 +++ linux-2.4.17-rmap/fs/proc/proc_misc.c Thu Dec 6 17:37:03 2001 @@ -178,9 +178,9 @@ K(i.bufferram), K(pg_size - swapper_space.nrpages), K(swapper_space.nrpages), - K(nr_active_pages), - K(nr_inactive_dirty_pages), - K(nr_inactive_clean_pages), + K(nr_active_pages()), + K(nr_inactive_dirty_pages()), + K(nr_inactive_clean_pages()), K(i.totalhigh), K(i.freehigh), K(i.totalram-i.totalhigh), diff -urN linux-2.4.17-rmap-virgin/include/asm-i386/pgtable.h linux-2.4.17-rmap/include/asm-i386/pgtable.h --- linux-2.4.17-rmap-virgin/include/asm-i386/pgtable.h Thu Nov 22 11:46:19 2001 +++ linux-2.4.17-rmap/include/asm-i386/pgtable.h Fri Dec 7 04:01:58 2001 @@ -267,7 +267,28 @@ * Permanent address of a page. Obviously must never be * called on a highmem page. */ +#ifdef CONFIG_HIGHMEM #define page_address(page) ((page)->virtual) +#else /* !CONFIG_HIGHMEM */ +#define page_address(page) __va(((page) - mem_map) << PAGE_SHIFT) +#endif /* !CONFIG_HIGHMEM */ + +#define PageZone(page) \ + (zone_table[((page)->flags >> (BITS_PER_LONG - 5)) & 0x3UL]) + +#define SetPageZone(page,zone_id) \ + do { \ + (page)->flags &= ~(0x3UL << (BITS_PER_LONG - 5)); \ + (page)->flags |= ((zone_id)&0x3UL)<<(BITS_PER_LONG-5); \ + } while(0) + +#define PageAge(page) (((page)->flags >> 21) & 0x3FUL) +#define SetPageAge(page, age) \ + do { \ + (page)->flags &= ~(0x3FUL << 21); \ + (page)->flags |= ((age) & 0x3FUL) << 21; \ + } while(0) + #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* diff -urN linux-2.4.17-rmap-virgin/include/linux/mm.h linux-2.4.17-rmap/include/linux/mm.h --- linux-2.4.17-rmap-virgin/include/linux/mm.h Thu Dec 6 11:31:04 2001 +++ linux-2.4.17-rmap/include/linux/mm.h Fri Dec 7 12:02:36 2001 @@ -17,9 +17,6 @@ extern unsigned long num_physpages; extern void * high_memory; extern int page_cluster; -/* The inactive_clean lists are per zone. */ -extern struct list_head active_list; -extern struct list_head inactive_dirty_list; #include #include @@ -162,14 +159,14 @@ updated asynchronously */ struct list_head lru; /* Pageout list, eg. active_list; protected by pagemap_lru_lock !! */ - unsigned long age; /* Page aging counter. */ struct pte_chain * pte_chain; /* Reverse pte mapping pointer. */ wait_queue_head_t wait; /* Page locked? Stand in line... */ struct page **pprev_hash; /* Complement to *next_hash. */ struct buffer_head * buffers; /* Buffer maps us to a disk block. */ +#ifdef CONFIG_HIGHMEM void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ - struct zone_struct *zone; /* Memory zone we are in. */ +#endif /* CONFIG_HIGHMEM */ } mem_map_t; /* diff -urN linux-2.4.17-rmap-virgin/include/linux/mmzone.h linux-2.4.17-rmap/include/linux/mmzone.h --- linux-2.4.17-rmap-virgin/include/linux/mmzone.h Thu Dec 6 11:31:04 2001 +++ linux-2.4.17-rmap/include/linux/mmzone.h Fri Dec 7 11:59:00 2001 @@ -41,6 +41,9 @@ unsigned long free_pages; unsigned long inactive_clean_pages; unsigned long inactive_dirty_pages; + int nr_active_pages; + int nr_inactive_dirty_pages; + int nr_inactive_clean_pages; unsigned long pages_min, pages_low, pages_high; int need_balance; @@ -48,6 +51,8 @@ * free areas of different sizes */ struct list_head inactive_clean_list; + struct list_head inactive_dirty_list; + struct list_head active_list; free_area_t free_area[MAX_ORDER]; /* @@ -64,6 +69,8 @@ char *name; unsigned long size; } zone_t; + +extern zone_t *zone_table[]; #define ZONE_DMA 0 #define ZONE_NORMAL 1 diff -urN linux-2.4.17-rmap-virgin/include/linux/sched.h.rej linux-2.4.17-rmap/include/linux/sched.h.rej --- linux-2.4.17-rmap-virgin/include/linux/sched.h.rej Thu Dec 6 11:31:17 2001 +++ linux-2.4.17-rmap/include/linux/sched.h.rej Wed Dec 31 16:00:00 1969 @@ -1,16 +0,0 @@ -*************** -*** 327,332 **** - struct mm_struct *active_mm; - struct list_head local_pages; - unsigned int allocation_order, nr_local_pages; - - /* task state */ - struct linux_binfmt *binfmt; ---- 341,347 ---- - struct mm_struct *active_mm; - struct list_head local_pages; - unsigned int allocation_order, nr_local_pages; -+ unsigned long flags; - - /* task state */ - struct linux_binfmt *binfmt; diff -urN linux-2.4.17-rmap-virgin/include/linux/swap.h linux-2.4.17-rmap/include/linux/swap.h --- linux-2.4.17-rmap-virgin/include/linux/swap.h Thu Dec 6 11:31:04 2001 +++ linux-2.4.17-rmap/include/linux/swap.h Fri Dec 7 04:01:58 2001 @@ -3,6 +3,7 @@ #include #include +#include #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff @@ -85,9 +86,11 @@ extern unsigned int nr_free_pages(void); extern unsigned int nr_free_buffer_pages(void); -extern int nr_active_pages; -extern int nr_inactive_dirty_pages; -extern int nr_inactive_clean_pages; + +extern int nr_active_pages(void); +extern int nr_inactive_dirty_pages(void); +extern int nr_inactive_clean_pages(void); + extern atomic_t page_cache_size; extern atomic_t buffermem_pages; extern spinlock_t pagecache_lock; @@ -190,7 +193,7 @@ #define PAGE_AGE_START 5 #define PAGE_AGE_ADV 3 #define PAGE_AGE_DECL 1 -#define PAGE_AGE_MAX 64 +#define PAGE_AGE_MAX 63 /* * List add/del helper macros. These must be called @@ -209,46 +212,46 @@ #define add_page_to_active_list(page) { \ DEBUG_LRU_PAGE(page); \ SetPageActive(page); \ - list_add(&(page)->lru, &active_list); \ - nr_active_pages++; \ + list_add(&(page)->lru, &PageZone(page)->active_list); \ + PageZone(page)->nr_active_pages++; \ } #define add_page_to_inactive_dirty_list(page) { \ DEBUG_LRU_PAGE(page); \ SetPageInactiveDirty(page); \ - list_add(&(page)->lru, &inactive_dirty_list); \ - nr_inactive_dirty_pages++; \ - page->zone->inactive_dirty_pages++; \ + list_add(&(page)->lru, &PageZone(page)->inactive_dirty_list); \ + PageZone(page)->nr_inactive_dirty_pages++; \ + PageZone(page)->inactive_dirty_pages++; \ } #define add_page_to_inactive_clean_list(page) { \ DEBUG_LRU_PAGE(page); \ SetPageInactiveClean(page); \ - list_add(&(page)->lru, &page->zone->inactive_clean_list); \ - page->zone->inactive_clean_pages++; \ - nr_inactive_clean_pages++; \ + list_add(&(page)->lru, &PageZone(page)->inactive_clean_list); \ + PageZone(page)->inactive_clean_pages++; \ + PageZone(page)->nr_inactive_clean_pages++; \ } #define del_page_from_active_list(page) { \ list_del(&(page)->lru); \ ClearPageActive(page); \ - nr_active_pages--; \ + PageZone(page)->nr_active_pages--; \ DEBUG_LRU_PAGE(page); \ } #define del_page_from_inactive_dirty_list(page) { \ list_del(&(page)->lru); \ ClearPageInactiveDirty(page); \ - nr_inactive_dirty_pages--; \ - page->zone->inactive_dirty_pages--; \ + PageZone(page)->nr_inactive_dirty_pages--; \ + PageZone(page)->inactive_dirty_pages--; \ DEBUG_LRU_PAGE(page); \ } #define del_page_from_inactive_clean_list(page) { \ list_del(&(page)->lru); \ ClearPageInactiveClean(page); \ - page->zone->inactive_clean_pages--; \ - nr_inactive_clean_pages--; \ + PageZone(page)->inactive_clean_pages--; \ + PageZone(page)->nr_inactive_clean_pages--; \ DEBUG_LRU_PAGE(page); \ } diff -urN linux-2.4.17-rmap-virgin/mm/Makefile linux-2.4.17-rmap/mm/Makefile --- linux-2.4.17-rmap-virgin/mm/Makefile Thu Dec 6 11:31:04 2001 +++ linux-2.4.17-rmap/mm/Makefile Wed Dec 5 23:15:42 2001 @@ -9,7 +9,7 @@ O_TARGET := mm.o -export-objs := shmem.o filemap.o +export-objs := shmem.o filemap.o page_alloc.o obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ diff -urN linux-2.4.17-rmap-virgin/mm/filemap.c linux-2.4.17-rmap/mm/filemap.c --- linux-2.4.17-rmap-virgin/mm/filemap.c Thu Dec 6 11:31:04 2001 +++ linux-2.4.17-rmap/mm/filemap.c Thu Dec 6 16:14:30 2001 @@ -1799,6 +1799,9 @@ { struct address_space *mapping = file->f_dentry->d_inode->i_mapping; unsigned long max; + unsigned zone_id; + zone_t *zone; + pg_data_t *pgdat = pgdat_list; if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) return -EINVAL; @@ -1812,9 +1815,14 @@ nr = max; /* And limit it to a sane percentage of the inactive list.. */ - max = nr_inactive_clean_pages / 2; - if (nr > max) - nr = max; + while(pgdat) { + for(zone_id = ZONE_DMA; zone_id < MAX_NR_ZONES; ++zone_id) { + zone = &pgdat->node_zones[zone_id]; + if(nr > zone->nr_inactive_clean_pages / 2) + nr = zone->nr_inactive_clean_pages / 2; + } + pgdat = pgdat->node_next; + } while (nr) { page_cache_read(file, index); diff -urN linux-2.4.17-rmap-virgin/mm/page_alloc.c linux-2.4.17-rmap/mm/page_alloc.c --- linux-2.4.17-rmap-virgin/mm/page_alloc.c Thu Dec 6 11:31:05 2001 +++ linux-2.4.17-rmap/mm/page_alloc.c Thu Dec 6 17:38:16 2001 @@ -16,15 +16,12 @@ #include #include #include +#include #include +#include #include int nr_swap_pages; -int nr_active_pages; -int nr_inactive_dirty_pages; -int nr_inactive_clean_pages; -struct list_head inactive_dirty_list; -struct list_head active_list; pg_data_t *pgdat_list; static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; @@ -32,6 +29,9 @@ static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, }; static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, }; +zone_t *zone_table[MAX_NR_ZONES]; +EXPORT_SYMBOL(zone_table); + /* * Free_page() adds the page to the free lists. This is optimized for * fast normal cases (no error jumps taken normally). @@ -55,7 +55,13 @@ /* * Temporary debugging check. */ -#define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->zone_start_mapnr) || (((x)-mem_map) >= (zone)->zone_start_mapnr+(zone)->size)) +#define BAD_RANGE(zone, page) \ +( \ + (zone) != PageZone(page) \ + || (((page) - mem_map) < (zone)->zone_start_mapnr) \ + || (((page) - mem_map) >= ((zone)->zone_start_mapnr \ + + (zone)->size)) \ +) /* * Buddy system. Hairy. You really aren't expected to understand this @@ -90,9 +96,9 @@ if (page->pte_chain) BUG(); page->flags &= ~((1<age = PAGE_AGE_START; + SetPageAge(page, PAGE_AGE_START); - zone = page->zone; + zone = PageZone(page); mask = (~0UL) << order; base = zone->zone_mem_map; @@ -628,6 +634,54 @@ return sum; } +int nr_inactive_clean_pages(void) +{ + pg_data_t *pgdat = pgdat_list; + unsigned int sum = 0; + + while(pgdat) { + zone_t *zone = pgdat->node_zones; + while(zone - pgdat->node_zones < MAX_NR_ZONES) { + sum += zone->nr_inactive_clean_pages; + ++zone; + } + pgdat = pgdat->node_next; + } + return sum; +} + +int nr_inactive_dirty_pages(void) +{ + pg_data_t *pgdat = pgdat_list; + unsigned int sum = 0; + + while(pgdat) { + zone_t *zone = pgdat->node_zones; + while(zone - pgdat->node_zones < MAX_NR_ZONES) { + sum += zone->nr_inactive_dirty_pages; + ++zone; + } + pgdat = pgdat->node_next; + } + return sum; +} + +int nr_active_pages(void) +{ + pg_data_t *pgdat = pgdat_list; + int sum = 0; + + while(pgdat) { + zone_t *zone = pgdat->node_zones; + while(zone - pgdat->node_zones < MAX_NR_ZONES) { + sum += zone->nr_active_pages; + ++zone; + } + pgdat = pgdat->node_next; + } + return sum; +} + #if CONFIG_HIGHMEM unsigned int nr_free_highpages (void) { @@ -679,9 +733,9 @@ nr_free_highpages() << (PAGE_SHIFT-10)); printk("( Active: %d, inactive_dirty: %d, inactive_clean: %d, free: %d (%d %d %d) )\n", - nr_active_pages, - nr_inactive_dirty_pages, - nr_inactive_clean_pages, + nr_active_pages(), + nr_inactive_dirty_pages(), + nr_inactive_clean_pages(), nr_free_pages(), freepages.min, freepages.low, @@ -796,6 +850,9 @@ for (i = 0; i < MAX_NR_ZONES; i++) { unsigned long size = zones_size[i]; totalpages += size; + zone_table[i] = pgdat->node_zones + i; + INIT_LIST_HEAD(&zone_table[i]->active_list); + INIT_LIST_HEAD(&zone_table[i]->inactive_dirty_list); } realtotalpages = totalpages; if (zholes_size) @@ -804,9 +861,6 @@ printk("On node %d totalpages: %lu\n", nid, realtotalpages); - INIT_LIST_HEAD(&active_list); - INIT_LIST_HEAD(&inactive_dirty_list); - /* * Some architectures (with lots of mem and discontinous memory * maps) have to search for a good mem_map area: @@ -894,9 +948,11 @@ for (i = 0; i < size; i++) { struct page *page = mem_map + offset + i; - page->zone = zone; + SetPageZone(page, j); +#ifdef CONFIG_HIGHMEM if (j != ZONE_HIGHMEM) page->virtual = __va(zone_start_paddr); +#endif zone_start_paddr += PAGE_SIZE; } diff -urN linux-2.4.17-rmap-virgin/mm/rmap.c linux-2.4.17-rmap/mm/rmap.c --- linux-2.4.17-rmap-virgin/mm/rmap.c Thu Dec 6 11:31:05 2001 +++ linux-2.4.17-rmap/mm/rmap.c Fri Dec 7 03:36:39 2001 @@ -28,7 +28,10 @@ #include #include -#define DEBUG +/* #define DEBUG */ +#ifdef DEBUG +#undef DEBUG +#endif /* * Shared pages have a chain of pte_chain structures, used to locate @@ -190,9 +193,13 @@ if (!spin_trylock(&mm->page_table_lock)) return SWAP_AGAIN; + /* During mremap, it's possible pages are not in a VMA. */ vma = find_vma(mm, address); - if (!vma) - BUG(); + if (!vma) { + ret = SWAP_FAIL; + goto out_unlock; + } + /* The page is mlock()d, we cannot swap it out. */ if (vma->vm_flags & VM_LOCKED) { diff -urN linux-2.4.17-rmap-virgin/mm/swap.c linux-2.4.17-rmap/mm/swap.c --- linux-2.4.17-rmap-virgin/mm/swap.c Thu Dec 6 11:31:05 2001 +++ linux-2.4.17-rmap/mm/swap.c Wed Dec 5 22:21:51 2001 @@ -14,6 +14,7 @@ */ #include +#include #include #include #include @@ -67,7 +68,7 @@ */ ClearPageReferenced(page); if (PageActive(page)) { - page->age = 0; + SetPageAge(page, 0); del_page_from_active_list(page); add_page_to_inactive_dirty_list(page); } @@ -96,7 +97,7 @@ } /* Make sure the page gets a fair chance at staying active. */ - page->age = max((int)page->age, PAGE_AGE_START); + SetPageAge(page, max((int)PageAge(page), PAGE_AGE_START)); } void FASTCALL(activate_page(struct page *)); diff -urN linux-2.4.17-rmap-virgin/mm/vmscan.c linux-2.4.17-rmap/mm/vmscan.c --- linux-2.4.17-rmap-virgin/mm/vmscan.c Thu Dec 6 11:31:05 2001 +++ linux-2.4.17-rmap/mm/vmscan.c Fri Dec 7 12:08:20 2001 @@ -36,18 +36,76 @@ static inline void age_page_up(struct page *page) { - page->age = min((int) (page->age + PAGE_AGE_ADV), PAGE_AGE_MAX); + SetPageAge(page, min((int)(PageAge(page)+PAGE_AGE_ADV), PAGE_AGE_MAX)); } static inline void age_page_down(struct page *page) { - page->age -= min(PAGE_AGE_DECL, (int)page->age); + SetPageAge(page, PageAge(page) - min(PAGE_AGE_DECL,(int)PageAge(page))); +} + +static inline int page_mapping_inuse(struct page * page) +{ + struct address_space * mapping = page->mapping; + + /* Page is in somebody's page tables. */ + if (page->pte_chain) + return 1; + + /* XXX: does this happen ? */ + if (!mapping) + return 0; + + /* File is mmaped by somebody. */ + if (mapping->i_mmap || mapping->i_mmap_shared) + return 1; + + return 0; +} + +/* + * A predicate to determine if a page is in active use + * or otherwise unfreeable. + */ +static inline int page_unfreeable(struct page *page) +{ + return (PageReferenced(page) || PageAge(page)) + && page_mapping_inuse(page); +} + +/* + * A predicate to determine whether a page is busy, + * that is, mlocked or in use elsewhere in the kernel. + */ +static inline int page_busy(struct page *page) +{ + return (page_count(page) > (1 + !!page->buffers)) + && !PageLocked(page); +} + +/* + * A secondary test to counteract race conditions, combining + * several of the tests for freeability. + */ +static inline int page_definitely_freeable(struct page *page) +{ + return page->mapping && !PageDirty(page) + && !page->pte_chain && page_count(page) == 1; +} + +/* + * A predicate to determine whether a page resides in + * unbacked anonymous memory. + */ +static inline int page_unbacked_anonymous(struct page *page) +{ + return page->pte_chain && !page->mapping; } /* * Estimate whether a zone has enough inactive or free pages.. */ -static unsigned int zone_inactive_plenty(zone_t *zone) +static inline unsigned int zone_inactive_plenty(zone_t *zone) { unsigned int inactive; @@ -62,7 +120,7 @@ } #define FREE_PLENTY_FACTOR 4 -static unsigned int zone_free_plenty(zone_t *zone) +static inline unsigned int zone_free_plenty(zone_t *zone) { unsigned int free, target; @@ -74,35 +132,16 @@ return free > target * FREE_PLENTY_FACTOR; } -static unsigned int free_plenty(void) +static inline unsigned int free_plenty(void) { unsigned int free; free = nr_free_pages(); - free += nr_inactive_clean_pages; + free += nr_inactive_clean_pages(); return free > freepages.high * FREE_PLENTY_FACTOR; } -static inline int page_mapping_inuse(struct page * page) -{ - struct address_space * mapping = page->mapping; - - /* Page is in somebody's page tables. */ - if (page->pte_chain) - return 1; - - /* XXX: does this happen ? */ - if (!mapping) - return 0; - - /* File is mmaped by somebody. */ - if (mapping->i_mmap || mapping->i_mmap_shared) - return 1; - - return 0; -} - /** * reclaim_page - reclaims one page from the inactive_clean list * @zone: reclaim a page from this zone @@ -133,7 +172,7 @@ if (unlikely(!PageInactiveClean(page))) { printk("VM: reclaim_page, wrong page on list.\n"); list_del(page_lru); - page->zone->inactive_clean_pages--; + PageZone(page)->inactive_clean_pages--; continue; } @@ -182,7 +221,7 @@ if (entry.val) swap_free(entry); UnlockPage(page); - page->age = PAGE_AGE_START; + SetPageAge(page, PAGE_AGE_START); if (page_count(page) != 1) printk("VM: reclaim_page, found page with count %d!\n", page_count(page)); @@ -226,34 +265,83 @@ * * This code is heavily inspired by the FreeBSD source code. Thanks * go out to Matthew Dillon. + * + * Factoring out the zone decisions in the loop appears to require some + * work. Perhaps one approach would be to partition the body of the + * inner loop according to the branching done on the zone and then + * branch elsewhere. How to do this without either duplicating code + * or introducing function calls or using large macros is not apparent. */ #define CAN_DO_FS ((gfp_mask & __GFP_FS) && should_write) #define WRITE_LOW_WATER 5 #define WRITE_HIGH_WATER 10 + +int page_launder_zone(zone_t *, int); + +#ifdef CONFIG_NUMA +int page_launder(int gfp_mask) +{ + pg_data_t *pgdat = pgdat_list; + int cleaned_pages = 0; + + while(pgdat) { + unsigned zone_id; + zone_t * zone; + + for(zone_id = ZONE_DMA; zone_id < MAX_NR_ZONES; ++zone_id) { + zone = &pgdat->node_zones[zone_id]; + if (!zone_free_plenty(zone)) + cleaned_pages + += page_launder_zone(zone, gfp_mask); + } + pgdat = pgdat->node_next; + } + return cleaned_pages; +} +#else /* !CONFIG_NUMA */ int page_launder(int gfp_mask) { + int cleaned_pages = 0; + int zone_id; + zone_t *zone; + + for(zone_id = ZONE_DMA; zone_id < MAX_NR_ZONES; ++zone_id) { + zone = contig_page_data.node_zones + zone_id; + if(!zone_free_plenty(zone)) + cleaned_pages += page_launder_zone(zone, gfp_mask); + } + + return cleaned_pages; +} +#endif /* !CONFIG_NUMA */ + +int page_launder_zone(zone_t *zone, int gfp_mask) +{ int maxscan, cleaned_pages; struct list_head * entry; cleaned_pages = 0; - + /* The main launder loop. */ spin_lock(&pagemap_lru_lock); - maxscan = nr_inactive_dirty_pages; - while (--maxscan >= 0 && (entry = inactive_dirty_list.prev) != &inactive_dirty_list) { + + maxscan = zone->nr_inactive_dirty_pages; + + while (maxscan-- > 0 && !list_empty(&zone->inactive_dirty_list)) { struct page * page; + entry = zone->inactive_dirty_list.prev; page = list_entry(entry, struct page, lru); list_del(entry); - list_add(entry, &inactive_dirty_list); + list_add(entry, &zone->inactive_dirty_list); /* Wrong page on list?! (list corruption, should not happen) */ if (!PageInactiveDirty(page)) { printk("VM: page_launder, wrong page on list.\n"); list_del(entry); - nr_inactive_dirty_pages--; - page->zone->inactive_dirty_pages--; + zone->nr_inactive_dirty_pages--; + PageZone(page)->inactive_dirty_pages--; continue; } @@ -261,11 +349,10 @@ * The page is in active use or really unfreeable. Move to * the active list and adjust the page age if needed. */ - if ((PageReferenced(page) || page->age) && - page_mapping_inuse(page)) { + if (page_unfreeable(page)) { del_page_from_inactive_dirty_list(page); add_page_to_active_list(page); - page->age = max((int)page->age, PAGE_AGE_START); + SetPageAge(page,max((int)PageAge(page),PAGE_AGE_START)); continue; } @@ -278,8 +365,7 @@ * The !PageLocked() test is to protect us from ourselves, * see the code around the writepage() call. */ - if ((page_count(page) > (1 + !!page->buffers)) && - !PageLocked(page)) { + if (page_busy(page)) { del_page_from_inactive_dirty_list(page); add_page_to_active_list(page); continue; @@ -289,8 +375,10 @@ * If this zone has plenty of pages free, don't spend time * on cleaning it but only move clean pages out of the way * so we won't have to scan those again. + * The invariant that this is only called when + * zone_free_plenty() is already true is now used: */ - if (zone_free_plenty(page->zone) || page_count(page) == 0) { + if (!page_count(page)) { continue; } @@ -315,7 +403,7 @@ * * XXX: implement swap clustering ? */ - if (page->pte_chain && !page->mapping) { + if (page_unbacked_anonymous(page)) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); if (!add_to_swap(page)) { @@ -431,8 +519,7 @@ * This test is not safe from races, but only the one * in reclaim_page() needs to be. */ - if (page->mapping && !PageDirty(page) && !page->pte_chain && - page_count(page) == 1) { + if (page_definitely_freeable(page)) { del_page_from_inactive_dirty_list(page); add_page_to_inactive_clean_list(page); UnlockPage(page); @@ -462,23 +549,74 @@ * This function will scan a portion of the active list to find * unused pages, those pages will then be moved to the inactive list. */ +int refill_inactive_zone(zone_t *, int); +#ifdef CONFIG_NUMA int refill_inactive(int priority) { + unsigned zone_id; + zone_t *zone; + pg_data_t *pgdat = pgdat_list; + int nr_deactivated = 0; + + while(pgdat) { + for(zone_id = ZONE_DMA; zone_id < MAX_NR_ZONES; ++zone_id) { + zone = &pgdat->node_zones[zone_id]; + if(zone->nr_active_pages >> priority) + nr_deactivated + += refill_inactive_zone(zone, priority); + } + pgdat = pgdat->node_next; + } + + return nr_deactivated; +} + +#else /* !CONFIG_NUMA */ + +int refill_inactive(int priority) +{ + unsigned zone_id; + zone_t *zone; + int nr_deactivated = 0; + + for(zone_id = ZONE_DMA; zone_id < MAX_NR_ZONES; ++zone_id) { + zone = contig_page_data.node_zones + zone_id; + if(zone->nr_active_pages >> priority) { + /* + * Don't deactivate pages from zones which have + * plenty inactive pages. + */ + if (!(unlikely(zone_inactive_plenty(zone) + && zone_free_plenty(zone)))) + nr_deactivated + += refill_inactive_zone(zone, priority); + } + } + + return nr_deactivated; +} +#endif /* !CONFIG_NUMA */ + +int refill_inactive_zone(zone_t *zone, int priority) +{ struct list_head * page_lru; struct page * page; - int maxscan = nr_active_pages >> priority; + int maxscan; int nr_deactivated = 0; - /* Take the lock while messing with the list... */ spin_lock(&pagemap_lru_lock); - while (maxscan-- > 0 && (page_lru = active_list.prev) != &active_list) { + maxscan = zone->nr_active_pages >> priority; + + /* Take the lock while messing with the list... */ + while (maxscan-- > 0 && !list_empty(&zone->active_list)) { + page_lru = zone->active_list.prev; page = list_entry(page_lru, struct page, lru); /* Wrong page on list?! (list corruption, should not happen) */ if (unlikely(!PageActive(page))) { printk("VM: refill_inactive, wrong page on list.\n"); list_del(page_lru); - nr_active_pages--; + zone->nr_active_pages--; continue; } @@ -497,24 +635,15 @@ age_page_down(page); } - /* - * Don't deactivate pages from zones which have - * plenty inactive pages. - */ - if (unlikely(zone_inactive_plenty(page->zone) && - zone_free_plenty(page->zone))) { - goto skip_page; - } - /* * If the page age is 'hot' AND the object the page * is in is still in use, we keep the page. Otherwise * we move it to the inactive_dirty list. */ - if (page->age && page_mapping_inuse(page)) { + if (PageAge(page) && page_mapping_inuse(page)) { skip_page: list_del(page_lru); - list_add(page_lru, &active_list); + list_add(page_lru, &zone->active_list); } else { deactivate_page_nolock(page); nr_deactivated++; @@ -576,9 +705,9 @@ { unsigned int mem; - mem = nr_active_pages; - mem += nr_inactive_dirty_pages; - mem += nr_inactive_clean_pages; + mem = nr_active_pages(); + mem += nr_inactive_dirty_pages(); + mem += nr_inactive_clean_pages(); return mem / 4; }