From: Andi Kleen A few incremental fixes for NUMA API. - Fix a few comments - Add a compat_ function for get_mem_policy I considered changing the ABI to avoid this, but that would have made the API too ugly. I put it directly into the file because a mm/compat.c didn't seem worth it just for this. - Fix the algorithm for VMA interleave. --- 25-akpm/mm/mempolicy.c | 61 +++++++++++++++++++++++++++++++------------------ 1 files changed, 39 insertions(+), 22 deletions(-) diff -puN mm/mempolicy.c~numa-api-fixes mm/mempolicy.c --- 25/mm/mempolicy.c~numa-api-fixes Wed Apr 21 14:11:48 2004 +++ 25-akpm/mm/mempolicy.c Wed Apr 21 14:11:48 2004 @@ -26,7 +26,7 @@ * process policy. * default Allocate on the local node first, or when on a VMA * use the process policy. This is what Linux always did - * in a NUMA aware kernel and still does by, ahem, default. + * in a NUMA aware kernel and still does by, ahem, default. * * The process policy is applied for most non interrupt memory allocations * in that process' context. Interrupts ignore the policies and always @@ -71,6 +71,7 @@ #include #include #include +#include #include static kmem_cache_t *policy_cache; @@ -88,7 +89,7 @@ static struct mempolicy default_policy = }; /* Check if all specified nodes are online */ -static int check_online(unsigned long *nodes) +static int nodes_online(unsigned long *nodes) { DECLARE_BITMAP(offline, MAX_NUMNODES); @@ -103,7 +104,7 @@ static int check_online(unsigned long *n } /* Do sanity checking on a policy */ -static int check_policy(int mode, unsigned long *nodes) +static int mpol_check_policy(int mode, unsigned long *nodes) { int empty = bitmap_empty(nodes, MAX_NUMNODES); @@ -120,7 +121,7 @@ static int check_policy(int mode, unsign return -EINVAL; break; } - return check_online(nodes); + return nodes_online(nodes); } /* Copy a node mask from user space. */ @@ -159,7 +160,7 @@ static int get_nodes(unsigned long *node if (nmask && copy_from_user(nodes, nmask, nlongs*sizeof(unsigned long))) return -EFAULT; nodes[nlongs-1] &= endmask; - return check_policy(mode, nodes); + return mpol_check_policy(mode, nodes); } /* Generate a custom zonelist for the BIND policy. */ @@ -192,7 +193,7 @@ static struct zonelist *bind_zonelist(un } /* Create a new policy */ -static struct mempolicy *new_policy(int mode, unsigned long *nodes) +static struct mempolicy *mpol_new(int mode, unsigned long *nodes) { struct mempolicy *policy; @@ -248,7 +249,7 @@ verify_pages(unsigned long addr, unsigne p = pte_page(*pte); pte_unmap(pte); if (p) { - unsigned nid = page_zone(p)->zone_pgdat->node_id; + unsigned nid = page_to_nid(p); if (!test_bit(nid, nodes)) return -EIO; } @@ -359,7 +360,7 @@ asmlinkage long sys_mbind(unsigned long if (err) return err; - new = new_policy(mode, nodes); + new = mpol_new(mode, nodes); if (IS_ERR(new)) return PTR_ERR(new); @@ -389,7 +390,7 @@ asmlinkage long sys_set_mempolicy(int mo err = get_nodes(nodes, nmask, maxnode, mode); if (err) return err; - new = new_policy(mode, nodes); + new = mpol_new(mode, nodes); if (IS_ERR(new)) return PTR_ERR(new); mpol_free(current->mempolicy); @@ -444,7 +445,7 @@ static int lookup_node(struct mm_struct static int copy_nodes_to_user(unsigned long *user_mask, unsigned long maxnode, unsigned long *nodes) { - unsigned long copy = ALIGN(maxnode-1, BITS_PER_LONG) / 8; + unsigned long copy = ALIGN(maxnode-1, 64) / 8; if (copy > sizeof(nodes)) { if (copy > PAGE_SIZE) @@ -521,6 +522,23 @@ asmlinkage long sys_get_mempolicy(int *p return err; } +#ifdef CONFIG_COMPAT +/* The other functions are compatible */ +asmlinkage long compat_get_mempolicy(int *policy, + unsigned *nmask, unsigned maxnode, + unsigned addr, unsigned flags) +{ + long err; + unsigned long *nm = NULL; + if (nmask) + nm = compat_alloc_user_space(ALIGN(maxnode-1, 64) / 8); + err = sys_get_mempolicy(policy, nm, maxnode, addr, flags); + if (!err && copy_in_user(nmask, nm, ALIGN(maxnode-1, 32)/8)) + err = -EFAULT; + return err; +} +#endif + /* Return effective policy for a VMA */ static struct mempolicy * get_vma_policy(struct vm_area_struct *vma, unsigned long addr) @@ -585,25 +603,23 @@ static unsigned interleave_nodes(struct static unsigned offset_il_node(struct mempolicy *pol, struct vm_area_struct *vma, unsigned long off) { - unsigned target = (unsigned)off % (unsigned)numnodes; + unsigned nnodes = bitmap_weight(pol->v.nodes, MAX_NUMNODES); + unsigned target = (unsigned)off % nnodes; int c; int nid = -1; c = 0; do { nid = find_next_bit(pol->v.nodes, MAX_NUMNODES, nid+1); - if (nid >= MAX_NUMNODES) { - nid = -1; - continue; - } c++; } while (c <= target); BUG_ON(nid >= MAX_NUMNODES); + BUG_ON(!test_bit(nid, pol->v.nodes)); return nid; } -/* Allocate a page in interleaved policy for a VMA. Use the offset - into the VMA as key. Own path because it needs to do special accounting. */ +/* Allocate a page in interleaved policy. + Own path because it needs to do special accounting. */ static struct page *alloc_page_interleave(unsigned gfp, unsigned nid) { struct zonelist *zl; @@ -667,11 +683,12 @@ alloc_page_vma(unsigned gfp, struct vm_a /** * alloc_pages_current - Allocate pages. * - * @gfp: %GFP_USER user allocation, - * %GFP_KERNEL kernel allocation, - * %GFP_HIGHMEM highmem allocation, - * %GFP_FS don't call back into a file system. - * %GFP_ATOMIC don't sleep. + * @gfp: + * %GFP_USER user allocation, + * %GFP_KERNEL kernel allocation, + * %GFP_HIGHMEM highmem allocation, + * %GFP_FS don't call back into a file system. + * %GFP_ATOMIC don't sleep. * @order: Power of two of allocation size in pages. 0 is a single page. * * Allocate a page from the kernel page pool. When not in _