diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/arch/i386/kernel/entry.S 450-membind/arch/i386/kernel/entry.S --- 440-local_balance_exec/arch/i386/kernel/entry.S Sun Apr 20 22:17:52 2003 +++ 450-membind/arch/i386/kernel/entry.S Sun Apr 20 22:18:24 2003 @@ -1011,6 +1011,7 @@ ENTRY(sys_call_table) .long sys_clock_gettime /* 265 */ .long sys_clock_getres .long sys_clock_nanosleep + .long sys_membind nr_syscalls=(.-sys_call_table)/4 diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/fs/inode.c 450-membind/fs/inode.c --- 440-local_balance_exec/fs/inode.c Sun Apr 20 19:35:04 2003 +++ 450-membind/fs/inode.c Sun Apr 20 22:18:24 2003 @@ -141,6 +141,7 @@ static struct inode *alloc_inode(struct mapping->a_ops = &empty_aops; mapping->host = inode; mapping->gfp_mask = GFP_HIGHUSER; + mapping->binding = NULL; mapping->dirtied_when = 0; mapping->assoc_mapping = NULL; mapping->backing_dev_info = &default_backing_dev_info; diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/include/asm-i386/unistd.h 450-membind/include/asm-i386/unistd.h --- 440-local_balance_exec/include/asm-i386/unistd.h Tue Feb 25 23:03:50 2003 +++ 450-membind/include/asm-i386/unistd.h Sun Apr 20 22:18:24 2003 @@ -273,8 +273,9 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) +#define __NR_membind 268 -#define NR_syscalls 268 +#define NR_syscalls 269 /* user-visible error numbers are in the range -1 - -124: see */ diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/include/linux/binding.h 450-membind/include/linux/binding.h --- 440-local_balance_exec/include/linux/binding.h Wed Dec 31 16:00:00 1969 +++ 450-membind/include/linux/binding.h Sun Apr 20 22:18:24 2003 @@ -0,0 +1,19 @@ +#ifndef _LINUX_BINDING_H +#define _LINUX_BINDING_H + +#include + +/* Policy flags for memory bindings */ +#define MB_SOFTBIND (1<<0) /* Let page faults fall back to other memblks */ +#define MB_HARDBIND (1<<1) /* Don't let page faults fall back to other memblks */ +#define MB_FIRSTREF (1<<2) /* Allocate pages on closest memblk to faulting CPU */ +#define MB_STRIPE (1<<3) /* Allocate pages evenly across memblks */ +#define MB_MOSTFREE (1<<4) /* Allocate pages on memblk with most free mem */ + +/* Structure to keep track of shared memory segment bindings */ +struct binding { + unsigned long policy; + struct zonelist zonelist; +}; + +#endif /* _LINUX_BINDING_H */ diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/include/linux/fs.h 450-membind/include/linux/fs.h --- 440-local_balance_exec/include/linux/fs.h Sun Apr 20 19:35:07 2003 +++ 450-membind/include/linux/fs.h Sun Apr 20 22:18:24 2003 @@ -19,6 +19,7 @@ #include #include #include +#include #include struct iovec; @@ -325,6 +326,7 @@ struct address_space { struct semaphore i_shared_sem; /* protect both above lists */ unsigned long dirtied_when; /* jiffies of first page dirtying */ int gfp_mask; /* how to allocate the pages */ + struct binding *binding; /* for memory bindings */ struct backing_dev_info *backing_dev_info; /* device readahead, etc */ spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/include/linux/pagemap.h 450-membind/include/linux/pagemap.h --- 440-local_balance_exec/include/linux/pagemap.h Sun Apr 20 19:35:07 2003 +++ 450-membind/include/linux/pagemap.h Sun Apr 20 22:18:24 2003 @@ -27,15 +27,24 @@ #define page_cache_release(page) put_page(page) void release_pages(struct page **pages, int nr, int cold); -static inline struct page *page_cache_alloc(struct address_space *x) +static inline struct page *__page_cache_alloc(struct address_space *x, int cold) { - return alloc_pages(x->gfp_mask, 0); -} + int gfp_mask; + struct zonelist *zonelist; -static inline struct page *page_cache_alloc_cold(struct address_space *x) -{ - return alloc_pages(x->gfp_mask|__GFP_COLD, 0); + gfp_mask = x->gfp_mask; + if (cold) + gfp_mask |= __GFP_COLD; + if (x->binding) + zonelist = &x->binding->zonelist; + else + zonelist = NODE_DATA(numa_node_id())->node_zonelists + (x->gfp_mask & GFP_ZONEMASK); + + return __alloc_pages(gfp_mask, 0, zonelist); } + +#define page_cache_alloc(x) __page_cache_alloc((x), 0) +#define page_cache_alloc_cold(x) __page_cache_alloc((x), 1) typedef int filler_t(void *, struct page *); diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/mm/Makefile 450-membind/mm/Makefile --- 440-local_balance_exec/mm/Makefile Thu Feb 13 11:08:15 2003 +++ 450-membind/mm/Makefile Sun Apr 20 22:18:24 2003 @@ -7,7 +7,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ shmem.o vmalloc.o -obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ +obj-y := bootmem.o fadvise.o filemap.o membind.o mempool.o oom_kill.o \ page_alloc.o page-writeback.o pdflush.o readahead.o \ slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y) diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/mm/membind.c 450-membind/mm/membind.c --- 440-local_balance_exec/mm/membind.c Wed Dec 31 16:00:00 1969 +++ 450-membind/mm/membind.c Sun Apr 20 22:18:24 2003 @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + + +static inline void cpumask_to_nodemask(DECLARE_BITMAP(cpumask, NR_CPUS), + DECLARE_BITMAP(nodemask, MAX_NUMNODES)) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) + if (test_bit(i, (cpumask))) + set_bit(cpu_to_node(i), (nodemask)); +} + +/* + * Takes a BITMAP of nodes as an argument, and ensures that at least one of + * the nodes in the bitmap are actually online. + * Returns 0 if at least one specified node is online, -EINVAL otherwise. + */ +static inline int check_binding_nodemask(DECLARE_BITMAP(nodemask, MAX_NUMNODES)){ + int i; + + /* Make sure at least one specified node is online */ + for (i = 0; i < MAX_NUMNODES; i++) + if (test_bit(i, nodemask) && node_online(i)) + return 0; + return -EINVAL; +} + +/* + * Takes a policy as an argument and ensures it is a correct policy flag. + * Returns 0 if the policy flag is correct (and possibly alters passed in value), + * returns -EINVAL otherwise. + */ +static inline int check_binding_policy(unsigned long *policy) +{ + unsigned long test; + + /* Test SOFT/HARD binding */ + test = *policy & (MB_SOFTBIND | MB_HARDBIND); + switch (test){ + case 0: + /* If neither specified, default to HARDBIND */ + *policy |= MB_HARDBIND; + case MB_SOFTBIND: + case MB_HARDBIND: + break; + default: + /* User specified some combination. Bad User! */ + return -EINVAL; + } + + /* Test FIRSTREF/STRIPE/MOSTFREE binding */ + test = *policy & (MB_FIRSTREF | MB_STRIPE | MB_MOSTFREE); + switch (test){ + case 0: + /* If none specified, default to FIRSTREF */ + *policy |= MB_FIRSTREF; + case MB_FIRSTREF: + case MB_STRIPE: + break; + case MB_MOSTFREE: + /* Not Implemented Yet */ + default: + return -EINVAL; + } + return 0; +} + +static inline int add_zones(pg_data_t *pgdat, struct zonelist *zonelist, + int zone_num, int zone_type) +{ + switch (zone_type) { + struct zone *zone; + default: + BUG(); + case ZONE_HIGHMEM: + zone = pgdat->node_zones + ZONE_HIGHMEM; + if (zone->present_pages) + zonelist->zones[zone_num++] = zone; + case ZONE_NORMAL: + zone = pgdat->node_zones + ZONE_NORMAL; + if (zone->present_pages) + zonelist->zones[zone_num++] = zone; + case ZONE_DMA: + zone = pgdat->node_zones + ZONE_DMA; + if (zone->present_pages) + zonelist->zones[zone_num++] = zone; + } + return zone_num; +} + +static struct binding *alloc_binding(unsigned long policy, + DECLARE_BITMAP(nodemask, MAX_NUMNODES), int gfp_flag) +{ + struct binding *binding; + int node, zone_num, zone_type; + + if (check_binding_policy(&policy) || check_binding_nodemask(nodemask)) + return NULL; + + binding = (struct binding *)kmalloc(sizeof(struct binding), GFP_KERNEL); + if (!binding) + return NULL; + memset(binding, 0, sizeof(struct binding)); + + /* TODO: Figure in HARD/SOFT binding, policies */ + binding->policy = policy; + + gfp_flag &= GFP_ZONEMASK; + if (gfp_flag & __GFP_HIGHMEM) + zone_type = ZONE_HIGHMEM; + else if (gfp_flag & __GFP_DMA) + zone_type = ZONE_DMA; + else + zone_type = ZONE_NORMAL; + + for (node = 0, zone_num = 0; node < MAX_NUMNODES; node++) { + if (test_bit(node, nodemask) && node_online(node)) + zone_num = add_zones(NODE_DATA(node), &binding->zonelist, zone_num, zone_type); + } + binding->zonelist.zones[zone_num] = NULL; + + if (!zone_num) { + kfree(binding); + binding = NULL; + } + return binding; +} + + +/* + * membind - Bind a range of a process' VM space to a set of memory blocks + * according to a predefined policy. + * + * @start: beginning address of memory region to bind + * @len: length of memory region to bind + * @policy: flag specifying the policy to use for the segment + * @mask_ptr: pointer to bitmask of cpus + * @mask_len: length of the bitmask + */ +asmlinkage unsigned long sys_membind(unsigned long start, unsigned long len, + unsigned long policy, unsigned long *mask_ptr, unsigned int mask_len) +{ + DECLARE_BITMAP(cpu_mask, NR_CPUS); + DECLARE_BITMAP(node_mask, MAX_NUMNODES); + struct vm_area_struct *vma = NULL; + struct address_space *mapping; + int error = 0; + + /* + * Deal with getting cpumask from userspace + * and translating to nodemask + */ + if (mask_len > NR_CPUS) { + error = -EINVAL; + goto out; + } + CLEAR_BITMAP(cpu_mask, NR_CPUS); + CLEAR_BITMAP(node_mask, MAX_NUMNODES); + if (copy_from_user(cpu_mask, mask_ptr, (mask_len+7)/8)) { + error = -EFAULT; + goto out; + } + cpumask_to_nodemask(cpu_mask, node_mask); + + vma = find_vma(current->mm, start); + if (!(vma && vma->vm_file && vma->vm_ops && vma->vm_ops->nopage == shmem_nopage)){ + /* This isn't a shm segment. For now, we bail. */ + printk("%s: Can only bind shm(em) segments for now!\n", __FUNCTION__); + error = -EINVAL; + goto out; + } + + mapping = vma->vm_file->f_dentry->d_inode->i_mapping; + mapping->binding = alloc_binding(policy, node_mask, mapping->gfp_mask); + if (!mapping->binding){ + printk("%s: Error while building memory binding!\n", __FUNCTION__); + error = -EFAULT; + } + +out: + return error; +} diff -urpN -X /home/fletch/.diff.exclude 440-local_balance_exec/mm/swap_state.c 450-membind/mm/swap_state.c --- 440-local_balance_exec/mm/swap_state.c Sun Apr 20 19:35:08 2003 +++ 450-membind/mm/swap_state.c Sun Apr 20 22:18:24 2003 @@ -42,6 +42,7 @@ struct address_space swapper_space = { .host = &swapper_inode, .a_ops = &swap_aops, .backing_dev_info = &swap_backing_dev_info, + .binding = NULL, .i_mmap = LIST_HEAD_INIT(swapper_space.i_mmap), .i_mmap_shared = LIST_HEAD_INIT(swapper_space.i_mmap_shared), .i_shared_sem = __MUTEX_INITIALIZER(swapper_space.i_shared_sem),