--- 25-akpm/include/linux/mm.h | 5 +++- 25-akpm/mm/mempolicy.c | 51 ++++++++++++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 15 deletions(-) diff -puN include/linux/mm.h~numa-api-core-tweaks include/linux/mm.h --- 25/include/linux/mm.h~numa-api-core-tweaks Wed Apr 7 12:52:22 2004 +++ 25-akpm/include/linux/mm.h Wed Apr 7 12:52:22 2004 @@ -157,8 +157,11 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); +#ifdef CONFIG_NUMA int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); - struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr); + struct mempolicy *(*get_policy)(struct vm_area_struct *vma, + unsigned long addr); +#endif }; /* forward declaration; pte_chain is meant to be internal to rmap.c */ diff -puN mm/mempolicy.c~numa-api-core-tweaks mm/mempolicy.c --- 25/mm/mempolicy.c~numa-api-core-tweaks Wed Apr 7 12:52:28 2004 +++ 25-akpm/mm/mempolicy.c Wed Apr 7 12:58:44 2004 @@ -76,7 +76,6 @@ static kmem_cache_t *policy_cache; static kmem_cache_t *sn_cache; -#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1)) #define PDprintk(fmt...) /* Highest zone. An specific allocation for a zone below that is not @@ -92,6 +91,7 @@ static struct mempolicy default_policy = static int check_online(unsigned long *nodes) { DECLARE_BITMAP(offline, MAX_NUMNODES); + bitmap_copy(offline, node_online_map, MAX_NUMNODES); if (bitmap_empty(offline, MAX_NUMNODES)) set_bit(0, offline); @@ -106,6 +106,7 @@ static int check_online(unsigned long *n static int check_policy(int mode, unsigned long *nodes) { int empty = bitmap_empty(nodes, MAX_NUMNODES); + switch (mode) { case MPOL_DEFAULT: if (!empty) @@ -194,6 +195,7 @@ static struct zonelist *bind_zonelist(un static struct mempolicy *new_policy(int mode, unsigned long *nodes) { struct mempolicy *policy; + PDprintk("setting mode %d nodes[0] %lx\n", mode, nodes[0]); if (mode == MPOL_DEFAULT) return NULL; @@ -405,6 +407,7 @@ asmlinkage long sys_set_mempolicy(int mo static void get_zonemask(struct mempolicy *p, unsigned long *nodes) { int i; + bitmap_clear(nodes, MAX_NUMNODES); switch (p->policy) { case MPOL_BIND: @@ -432,6 +435,7 @@ static int lookup_node(struct mm_struct { struct page *p; int err; + err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL); if (err >= 0) { err = page_zone(p)->zone_pgdat->node_id; @@ -444,11 +448,13 @@ static int lookup_node(struct mm_struct static int copy_nodes_to_user(unsigned long *user_mask, unsigned long maxnode, unsigned long *nodes) { - unsigned long copy = round_up(maxnode-1, BITS_PER_LONG) / 8; + unsigned long copy = ALIGN(maxnode-1, BITS_PER_LONG) / 8; + if (copy > sizeof(nodes)) { if (copy > PAGE_SIZE) return -EINVAL; - if (clear_user((char*)user_mask + sizeof(nodes), copy - sizeof(nodes))) + if (clear_user((char*)user_mask + sizeof(nodes), + copy - sizeof(nodes))) return -EFAULT; copy = sizeof(nodes); } @@ -492,9 +498,10 @@ asmlinkage long sys_get_mempolicy(int *p if (err < 0) goto out; pval = err; - } else if (pol == current->mempolicy && pol->policy == MPOL_INTERLEAVE) + } else if (pol == current->mempolicy && + pol->policy == MPOL_INTERLEAVE) { pval = current->il_next; - else { + } else { err = -EINVAL; goto out; } @@ -523,10 +530,12 @@ static struct mempolicy * get_vma_policy(struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = current->mempolicy; + if (vma) { if (vma->vm_ops && vma->vm_ops->get_policy) pol = vma->vm_ops->get_policy(vma, addr); - else if (vma->vm_policy && vma->vm_policy->policy != MPOL_DEFAULT) + else if (vma->vm_policy && + vma->vm_policy->policy != MPOL_DEFAULT) pol = vma->vm_policy; } if (!pol) @@ -538,6 +547,7 @@ get_vma_policy(struct vm_area_struct *vm static struct zonelist *zonelist_policy(unsigned gfp, struct mempolicy *policy) { int nd; + switch (policy->policy) { case MPOL_PREFERRED: nd = policy->v.preferred_node; @@ -565,6 +575,7 @@ static unsigned interleave_nodes(struct { unsigned nid, next; struct task_struct *me = current; + nid = me->il_next; BUG_ON(nid >= MAX_NUMNODES); next = find_next_bit(policy->v.nodes, MAX_NUMNODES, 1+nid); @@ -575,12 +586,13 @@ static unsigned interleave_nodes(struct } /* Do static interleaving for a VMA with known offset. */ -static unsigned -offset_il_node(struct mempolicy *pol, struct vm_area_struct *vma, unsigned long off) +static unsigned offset_il_node(struct mempolicy *pol, + struct vm_area_struct *vma, unsigned long off) { unsigned target = (unsigned)off % (unsigned)numnodes; int c; int nid = -1; + c = 0; do { nid = find_next_bit(pol->v.nodes, MAX_NUMNODES, nid+1); @@ -600,6 +612,7 @@ static struct page *alloc_page_interleav { struct zonelist *zl; struct page *page; + BUG_ON(!test_bit(nid, node_online_map)); zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK); page = __alloc_pages(gfp, 0, zl); @@ -626,7 +639,7 @@ static struct page *alloc_page_interleav * This function allocates a page from the kernel page pool and applies * a NUMA policy associated with the VMA or the current process. * When VMA is not NULL caller must hold down_read on the mmap_sem of the - * mm_struct of the VMA to prevent it from going away. Should be used for i + * mm_struct of the VMA to prevent it from going away. Should be used for * all allocations for pages that will be mapped into * user space. Returns NULL when no page can be allocated. * @@ -636,6 +649,7 @@ struct page * alloc_page_vma(unsigned gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(vma, addr); + if (unlikely(pol->policy == MPOL_INTERLEAVE)) { unsigned nid; if (vma) { @@ -671,19 +685,20 @@ alloc_page_vma(unsigned gfp, struct vm_a struct page *alloc_pages_current(unsigned gfp, unsigned order) { struct mempolicy *pol = current->mempolicy; + if (!pol || in_interrupt()) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE && order == 0) return alloc_page_interleave(gfp, interleave_nodes(pol)); return __alloc_pages(gfp, order, zonelist_policy(gfp, pol)); } - EXPORT_SYMBOL(alloc_pages_current); /* Slow path of a mempolicy copy */ struct mempolicy *__mpol_copy(struct mempolicy *old) { struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL); + if (!new) return ERR_PTR(-ENOMEM); *new = *old; @@ -747,6 +762,7 @@ extern void __mpol_free(struct mempolicy int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(vma, addr); + switch (pol->policy) { case MPOL_DEFAULT: return numa_node_id(); @@ -755,7 +771,8 @@ int mpol_first_node(struct vm_area_struc case MPOL_INTERLEAVE: return interleave_nodes(pol); case MPOL_PREFERRED: - return pol->v.preferred_node >= 0 ? pol->v.preferred_node:numa_node_id(); + return pol->v.preferred_node >= 0 ? + pol->v.preferred_node : numa_node_id(); } BUG(); return 0; @@ -765,6 +782,7 @@ int mpol_first_node(struct vm_area_struc int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(vma, addr); + switch (pol->policy) { case MPOL_PREFERRED: case MPOL_DEFAULT: @@ -798,6 +816,7 @@ static struct sp_node * sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) { struct rb_node *n = sp->root.rb_node; + while (n) { struct sp_node *p = rb_entry(n, struct sp_node, nd); if (start >= p->end) { @@ -830,6 +849,7 @@ static void sp_insert(struct shared_poli struct rb_node **p = &sp->root.rb_node; struct rb_node *parent = NULL; struct sp_node *nd; + while (*p) { parent = *p; nd = rb_entry(parent, struct sp_node, nd); @@ -852,6 +872,7 @@ mpol_shared_policy_lookup(struct shared_ { struct mempolicy *pol = NULL; struct sp_node *sn; + down(&sp->sem); sn = sp_lookup(sp, idx, idx+1); if (sn) { @@ -874,6 +895,7 @@ struct sp_node * sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) { struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); + if (!n) return NULL; n->start = start; @@ -924,8 +946,8 @@ static int shared_policy_replace(struct return 0; } -int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, - struct mempolicy *npol) +int mpol_set_shared_policy(struct shared_policy *info, + struct vm_area_struct *vma, struct mempolicy *npol) { int err; struct sp_node *new = NULL; @@ -952,6 +974,7 @@ void mpol_free_shared_policy(struct shar { struct sp_node *n; struct rb_node *next; + down(&p->sem); next = rb_first(&p->root); while (next) { @@ -978,4 +1001,4 @@ static __init int numa_policy_init(void) panic("Cannot create NUMA policy cache"); return 0; } -__initcall(numa_policy_init); +module_init(numa_policy_init); _