diff -urN 2.4.4/arch/alpha/config.in alpha-numa/arch/alpha/config.in --- 2.4.4/arch/alpha/config.in Sat Apr 28 05:24:29 2001 +++ alpha-numa/arch/alpha/config.in Tue May 1 14:06:12 2001 @@ -59,7 +59,7 @@ Wildfire CONFIG_ALPHA_WILDFIRE" Generic # clear all implied options (don't want default values for those): -unset CONFIG_ALPHA_EV4 CONFIG_ALPHA_EV5 CONFIG_ALPHA_EV6 CONFIG_ALPHA_EV67 +unset CONFIG_ALPHA_EV4 CONFIG_ALPHA_EV5 CONFIG_ALPHA_EV6 unset CONFIG_ALPHA_EISA unset CONFIG_ALPHA_LCA CONFIG_ALPHA_APECS CONFIG_ALPHA_CIA unset CONFIG_ALPHA_T2 CONFIG_ALPHA_PYXIS CONFIG_ALPHA_POLARIS @@ -141,12 +141,10 @@ then define_bool CONFIG_ALPHA_EV6 y define_bool CONFIG_ALPHA_TSUNAMI y - bool 'EV67 (or later) CPU (speed > 600MHz)?' CONFIG_ALPHA_EV67 fi if [ "$CONFIG_ALPHA_WILDFIRE" = "y" -o "$CONFIG_ALPHA_TITAN" = "y" ] then define_bool CONFIG_ALPHA_EV6 y - define_bool CONFIG_ALPHA_EV67 y fi if [ "$CONFIG_ALPHA_RAWHIDE" = "y" ] then @@ -166,7 +164,6 @@ then define_bool CONFIG_ALPHA_IRONGATE y define_bool CONFIG_ALPHA_EV6 y - define_bool CONFIG_ALPHA_EV67 y fi if [ "$CONFIG_ALPHA_JENSEN" = "y" -o "$CONFIG_ALPHA_MIKASA" = "y" \ @@ -207,6 +204,13 @@ -o "$CONFIG_ALPHA_TITAN" = "y" -o "$CONFIG_ALPHA_GENERIC" = "y" ] then bool 'Symmetric multi-processing support' CONFIG_SMP +fi + +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool 'Discontiguous Memory Support' CONFIG_DISCONTIGMEM + if [ "$CONFIG_DISCONTIGMEM" = "y" ]; then + bool ' NUMA Support' CONFIG_NUMA + fi fi # The machine must be able to support more than 8GB physical memory diff -urN 2.4.4/arch/alpha/kernel/entry.S alpha-numa/arch/alpha/kernel/entry.S --- 2.4.4/arch/alpha/kernel/entry.S Sun Sep 3 20:36:45 2000 +++ alpha-numa/arch/alpha/kernel/entry.S Tue May 1 14:06:12 2001 @@ -10,7 +10,7 @@ #define SIGCHLD 20 -#define NR_SYSCALLS 377 +#define NR_SYSCALLS 378 /* * These offsets must match with alpha_mv in . diff -urN 2.4.4/arch/alpha/kernel/process.c alpha-numa/arch/alpha/kernel/process.c --- 2.4.4/arch/alpha/kernel/process.c Thu Feb 22 03:44:53 2001 +++ alpha-numa/arch/alpha/kernel/process.c Tue May 1 14:06:12 2001 @@ -83,10 +83,11 @@ /* Although we are an idle CPU, we do not want to get into the scheduler unnecessarily. */ - if (current->need_resched) { - schedule(); - check_pgt_cache(); - } + long oldval = xchg(¤t->need_resched, -1UL); + if (!oldval) + while (current->need_resched < 0); + schedule(); + check_pgt_cache(); } } diff -urN 2.4.4/arch/alpha/kernel/setup.c alpha-numa/arch/alpha/kernel/setup.c --- 2.4.4/arch/alpha/kernel/setup.c Sun Apr 1 01:17:07 2001 +++ alpha-numa/arch/alpha/kernel/setup.c Tue May 1 14:06:12 2001 @@ -87,18 +87,6 @@ static struct alpha_machine_vector *get_sysvec_byname(const char *); static void get_sysnames(long, long, char **, char **); -/* - * This is setup by the secondary bootstrap loader. Because - * the zero page is zeroed out as soon as the vm system is - * initialized, we need to copy things out into a more permanent - * place. - */ -#define PARAM ZERO_PGE -#define COMMAND_LINE ((char*)(PARAM + 0x0000)) -#define COMMAND_LINE_SIZE 256 -#define INITRD_START (*(unsigned long *) (PARAM+0x100)) -#define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) - static char command_line[COMMAND_LINE_SIZE]; char saved_command_line[COMMAND_LINE_SIZE]; @@ -245,6 +233,7 @@ return end >> PAGE_SHIFT; /* Return the PFN of the limit. */ } +#ifndef CONFIG_DISCONTIGMEM static void __init setup_memory(void *kernel_end) { @@ -362,6 +351,7 @@ /* Reserve the bootmap memory. */ reserve_bootmem(PFN_PHYS(bootmap_start), bootmap_size); + printk("reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size)); #ifdef CONFIG_BLK_DEV_INITRD initrd_start = INITRD_START; @@ -383,6 +373,7 @@ } #endif /* CONFIG_BLK_DEV_INITRD */ } +#endif /* !CONFIG_DISCONTIGMEM */ int __init page_is_ram(unsigned long pfn) diff -urN 2.4.4/arch/alpha/kernel/smp.c alpha-numa/arch/alpha/kernel/smp.c --- 2.4.4/arch/alpha/kernel/smp.c Thu Feb 22 03:44:53 2001 +++ alpha-numa/arch/alpha/kernel/smp.c Tue May 1 14:06:12 2001 @@ -123,6 +123,22 @@ cpu_data[cpuid].prof_multiplier = 1; } +static void __init +wait_boot_cpu_to_stop(int cpuid) +{ + long stop = jiffies + 10*HZ; + + while (time_before(jiffies, stop)) { + if (!smp_secondary_alive) + return; + barrier(); + } + + printk("wait_boot_cpu_to_stop: FAILED on CPU %d, hanging now\n", cpuid); + for (;;) + barrier(); +} + /* * Where secondaries begin a life of C. */ @@ -131,6 +147,11 @@ { int cpuid = hard_smp_processor_id(); + if (current != init_tasks[cpu_number_map(cpuid)]) { + printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n", + cpuid, current, init_tasks[cpu_number_map(cpuid)]); + } + DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state)); /* Turn on machine checks. */ @@ -154,12 +175,22 @@ /* Must have completely accurate bogos. */ __sti(); + + /* + * Wait boot CPU to stop with irq enabled before + * running calibrate_delay(). + */ + wait_boot_cpu_to_stop(cpuid); + mb(); calibrate_delay(); - smp_store_cpu_info(cpuid); - /* Allow master to continue. */ + smp_store_cpu_info(cpuid); + /* + * Allow master to continue only after we written + * the loops_per_jiffy. + */ wmb(); - smp_secondary_alive = cpuid; + smp_secondary_alive = 1; /* Wait for the go code. */ while (!smp_threads_ready) @@ -203,6 +234,7 @@ break; case EV6_CPU: + case EV67_CPU: on_chip_cache = 64 + 64; break; @@ -246,7 +278,7 @@ + hwrpb->processor_offset + cpuid * hwrpb->processor_size); - cpumask = (1L << cpuid); + cpumask = (1UL << cpuid); if (hwrpb->txrdy & cpumask) goto delay1; ready1: @@ -267,8 +299,8 @@ return; delay1: - /* Wait one second. Note that jiffies aren't ticking yet. */ - for (timeout = 100000; timeout > 0; --timeout) { + /* Wait 10 seconds. Note that jiffies aren't ticking yet. */ + for (timeout = 1000000; timeout > 0; --timeout) { if (!(hwrpb->txrdy & cpumask)) goto ready1; udelay(10); @@ -277,8 +309,8 @@ goto timeout; delay2: - /* Wait one second. */ - for (timeout = 100000; timeout > 0; --timeout) { + /* Wait 10 seconds. */ + for (timeout = 1000000; timeout > 0; --timeout) { if (!(hwrpb->txrdy & cpumask)) goto ready2; udelay(10); @@ -307,7 +339,7 @@ mycpu = hard_smp_processor_id(); for (i = 0; i < NR_CPUS; i++) { - if (!(txrdy & (1L << i))) + if (!(txrdy & (1UL << i))) continue; DBGS(("recv_secondary_console_msg: " @@ -375,7 +407,7 @@ #if 0 DBGS(("KSP 0x%lx PTBR 0x%lx VPTBR 0x%lx UNIQUE 0x%lx\n", - hwpcb->ksp, hwpcb->ptbr, hwrpb->vptb, hwcpb->unique)); + hwpcb->ksp, hwpcb->ptbr, hwrpb->vptb, hwpcb->unique)); #endif DBGS(("Starting secondary cpu %d: state 0x%lx pal_flags 0x%lx\n", cpuid, idle->state, idle->thread.pal_flags)); @@ -398,9 +430,9 @@ send_secondary_console_msg("START\r\n", cpuid); - /* Wait 1 second for an ACK from the console. Note that jiffies + /* Wait 10 seconds for an ACK from the console. Note that jiffies aren't ticking yet. */ - for (timeout = 100000; timeout > 0; timeout--) { + for (timeout = 1000000; timeout > 0; timeout--) { if (cpu->flags & 1) goto started; udelay(10); @@ -447,6 +479,8 @@ idle = init_task.prev_task; if (!idle) panic("No idle process for CPU %d", cpuid); + if (idle == &init_task) + panic("idle process is init_task for CPU %d", cpuid); idle->processor = cpuid; __cpu_logical_map[cpunum] = cpuid; @@ -468,10 +502,14 @@ if (secondary_cpu_start(cpuid, idle)) return -1; + mb(); + /* Notify the secondary CPU it can run calibrate_delay() */ + smp_secondary_alive = 0; + /* We've been acked by the console; wait one second for the task to start up for real. Note that jiffies aren't ticking yet. */ - for (timeout = 0; timeout < 100000; timeout++) { - if (smp_secondary_alive != -1) + for (timeout = 0; timeout < 1000000; timeout++) { + if (smp_secondary_alive == 1) goto alive; udelay(10); barrier(); @@ -523,7 +561,7 @@ if ((cpu->flags & 0x1cc) == 0x1cc) { smp_num_probed++; /* Assume here that "whami" == index */ - hwrpb_cpu_present_mask |= (1L << i); + hwrpb_cpu_present_mask |= (1UL << i); cpu->pal_revision = boot_cpu_palrev; } @@ -534,9 +572,9 @@ } } else { smp_num_probed = 1; - hwrpb_cpu_present_mask = (1L << boot_cpuid); + hwrpb_cpu_present_mask = (1UL << boot_cpuid); } - cpu_present_mask = 1L << boot_cpuid; + cpu_present_mask = 1UL << boot_cpuid; printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n", smp_num_probed, hwrpb_cpu_present_mask); @@ -589,7 +627,7 @@ if (smp_boot_one_cpu(i, cpu_count)) continue; - cpu_present_mask |= 1L << i; + cpu_present_mask |= 1UL << i; cpu_count++; } @@ -600,7 +638,7 @@ bogosum = 0; for (i = 0; i < NR_CPUS; i++) { - if (cpu_present_mask & (1L << i)) + if (cpu_present_mask & (1UL << i)) bogosum += cpu_data[i].loops_per_jiffy; } printk(KERN_INFO "SMP: Total of %d processors activated " @@ -798,13 +836,13 @@ printk(KERN_WARNING "smp_send_reschedule: Sending IPI to self.\n"); #endif - send_ipi_message(1L << cpu, IPI_RESCHEDULE); + send_ipi_message(1UL << cpu, IPI_RESCHEDULE); } void smp_send_stop(void) { - unsigned long to_whom = cpu_present_mask ^ (1L << smp_processor_id()); + unsigned long to_whom = cpu_present_mask ^ (1UL << smp_processor_id()); #if DEBUG_IPI_MSG if (hard_smp_processor_id() != boot_cpu_id) printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n"); @@ -827,7 +865,7 @@ int smp_call_function (void (*func) (void *info), void *info, int retry, int wait) { - unsigned long to_whom = cpu_present_mask ^ (1L << smp_processor_id()); + unsigned long to_whom = cpu_present_mask ^ (1UL << smp_processor_id()); struct smp_call_struct data; long timeout; diff -urN 2.4.4/arch/alpha/kernel/traps.c alpha-numa/arch/alpha/kernel/traps.c --- 2.4.4/arch/alpha/kernel/traps.c Thu Feb 22 03:44:53 2001 +++ alpha-numa/arch/alpha/kernel/traps.c Tue May 1 14:06:12 2001 @@ -291,10 +291,12 @@ * is interesting. */ printk("%6x%c", (int)tmp & 0xffffff, (++i % 11) ? ' ' : '\n'); +#if 0 if (i > 40) { printk(" ..."); break; } +#endif } printk("\n"); } diff -urN 2.4.4/arch/alpha/mm/Makefile alpha-numa/arch/alpha/mm/Makefile --- 2.4.4/arch/alpha/mm/Makefile Wed Oct 30 17:41:44 1996 +++ alpha-numa/arch/alpha/mm/Makefile Tue May 1 14:06:12 2001 @@ -7,9 +7,10 @@ # # Note 2! The CFLAGS definition is now in the main makefile... -OBJS = init.o fault.o extable.o +O_TARGET := mm.o -mm.o: $(OBJS) - $(LD) -r -o mm.o $(OBJS) +obj-y := init.o fault.o extable.o + +obj-$(CONFIG_DISCONTIGMEM) += numa.o include $(TOPDIR)/Rules.make diff -urN 2.4.4/arch/alpha/mm/init.c alpha-numa/arch/alpha/mm/init.c --- 2.4.4/arch/alpha/mm/init.c Sat Apr 28 05:24:29 2001 +++ alpha-numa/arch/alpha/mm/init.c Tue May 1 14:06:12 2001 @@ -33,7 +33,7 @@ #include #include -static unsigned long totalram_pages; +unsigned long totalram_pages; extern void die_if_kernel(char *,struct pt_regs *,long); @@ -69,23 +69,17 @@ int do_check_pgt_cache(int low, int high) { int freed = 0; - if(pgtable_cache_size > high) { - do { - if(pgd_quicklist) { - free_pgd_slow(get_pgd_fast()); - freed++; - } - if(pmd_quicklist) { - pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); - freed++; - } - if(pte_quicklist) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); - freed++; - } - } while(pgtable_cache_size > low); - } - return freed; + if(pgtable_cache_size > high) { + do { + if(pgd_quicklist) + free_pgd_slow(get_pgd_fast()), freed++; + if(pmd_quicklist) + pmd_free_slow(pmd_alloc_one_fast(NULL, 0)), freed++; + if(pte_quicklist) + pte_free_slow(pte_alloc_one_fast(NULL, 0)), freed++; + } while(pgtable_cache_size > low); + } + return freed; } /* @@ -115,6 +109,7 @@ return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED)); } +#ifndef CONFIG_DISCONTIGMEM void show_mem(void) { @@ -144,6 +139,7 @@ printk("%ld pages in page table cache\n",pgtable_cache_size); show_buffers(); } +#endif static inline unsigned long load_PCB(struct thread_struct * pcb) @@ -275,6 +271,7 @@ } +#ifndef CONFIG_DISCONTIGMEM /* * paging_init() sets up the memory map. */ @@ -287,16 +284,7 @@ dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; high_pfn = max_low_pfn; -#define ORDER_MASK (~((1L << (MAX_ORDER-1))-1)) -#define ORDER_ALIGN(n) (((n) + ~ORDER_MASK) & ORDER_MASK) - - dma_pfn = ORDER_ALIGN(dma_pfn); - high_pfn = ORDER_ALIGN(high_pfn); - -#undef ORDER_MASK -#undef ORDER_ALIGN - - if (dma_pfn > high_pfn) + if (dma_pfn >= high_pfn) zones_size[ZONE_DMA] = high_pfn; else { zones_size[ZONE_DMA] = dma_pfn; @@ -309,6 +297,7 @@ /* Initialize the kernel's ZERO_PGE. */ memset((void *)ZERO_PGE, 0, PAGE_SIZE); } +#endif /* CONFIG_DISCONTIGMEM */ #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM) void @@ -327,6 +316,7 @@ } #endif +#ifndef CONFIG_DISCONTIGMEM static void __init printk_memory_info(void) { @@ -366,6 +356,7 @@ printk_memory_info(); } +#endif /* CONFIG_DISCONTIGMEM */ void free_initmem (void) diff -urN 2.4.4/arch/alpha/mm/numa.c alpha-numa/arch/alpha/mm/numa.c --- 2.4.4/arch/alpha/mm/numa.c Thu Jan 1 01:00:00 1970 +++ alpha-numa/arch/alpha/mm/numa.c Tue May 1 14:06:12 2001 @@ -0,0 +1,430 @@ +/* + * linux/arch/alpha/mm/numa.c + * + * DISCONTIGMEM NUMA alpha support. + * + * Copyright (C) 2001 Andrea Arcangeli SuSE + */ + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BLK_DEV_INITRD +#include +#endif + +#include +#include + +plat_pg_data_t *plat_node_data[MAX_NUMNODES]; +bootmem_data_t plat_node_bdata[MAX_NUMNODES]; + +#undef DEBUG_DISCONTIG +#ifdef DEBUG_DISCONTIG +#define DBGDCONT(args...) printk(args) +#else +#define DBGDCONT(args...) +#endif + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) +#define for_each_mem_cluster(memdesc, cluster, i) \ + for ((cluster) = (memdesc)->cluster, (i) = 0; \ + (i) < (memdesc)->numclusters; (i)++, (cluster)++) + +static void __init show_mem_layout(void) +{ + struct memclust_struct * cluster; + struct memdesc_struct * memdesc; + int i; + + /* Find free clusters, and init and free the bootmem accordingly. */ + memdesc = (struct memdesc_struct *) + (hwrpb->mddt_offset + (unsigned long) hwrpb); + + printk("Raw memory layout:\n"); + for_each_mem_cluster(memdesc, cluster, i) { + printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n", + i, cluster->usage, cluster->start_pfn, + cluster->start_pfn + cluster->numpages); + } +} + +static void __init +setup_memory_node(int nid, void *kernel_end) +{ + extern unsigned long mem_size_limit; + struct memclust_struct * cluster; + struct memdesc_struct * memdesc; + unsigned long start_kernel_pfn, end_kernel_pfn; + unsigned long bootmap_size, bootmap_pages, bootmap_start; + unsigned long start, end; + unsigned long node_pfn_start, node_pfn_end; + int i; + unsigned long node_datasz = PFN_UP(sizeof(plat_pg_data_t)); + int show_init = 0; + + /* Find the bounds of current node */ + node_pfn_start = (nid * NODE_MAX_MEM_SIZE) >> PAGE_SHIFT; + node_pfn_end = node_pfn_start + (NODE_MAX_MEM_SIZE >> PAGE_SHIFT); + + /* Find free clusters, and init and free the bootmem accordingly. */ + memdesc = (struct memdesc_struct *) + (hwrpb->mddt_offset + (unsigned long) hwrpb); + + /* find the bounds of this node (min_low_pfn/max_low_pfn) */ + min_low_pfn = ~0UL; + for_each_mem_cluster(memdesc, cluster, i) { + /* Bit 0 is console/PALcode reserved. Bit 1 is + non-volatile memory -- we might want to mark + this for later. */ + if (cluster->usage & 3) + continue; + + start = cluster->start_pfn; + end = start + cluster->numpages; + + if (start >= node_pfn_end || end <= node_pfn_start) + continue; + + if (!show_init) { + show_init = 1; + printk("Initialing bootmem allocator on Node ID %d\n", nid); + } + printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n", + i, cluster->usage, cluster->start_pfn, + cluster->start_pfn + cluster->numpages); + + if (start < node_pfn_start) + start = node_pfn_start; + if (end > node_pfn_end) + end = node_pfn_end; + + if (start < min_low_pfn) + min_low_pfn = start; + if (end > max_low_pfn) + max_low_pfn = end; + } + + if (mem_size_limit && max_low_pfn >= mem_size_limit) { + printk("setup: forcing memory size to %ldK (from %ldK).\n", + mem_size_limit << (PAGE_SHIFT - 10), + max_low_pfn << (PAGE_SHIFT - 10)); + max_low_pfn = mem_size_limit; + } + + if (min_low_pfn >= max_low_pfn) + return; + + num_physpages += max_low_pfn - min_low_pfn; + + /* Cute trick to make sure our local node data is on local memory */ + PLAT_NODE_DATA(nid) = (plat_pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); + /* Quasi-mark the plat_pg_data_t as in-use */ + min_low_pfn += node_datasz; + if (min_low_pfn >= max_low_pfn) { + printk(" not enough mem to reserve PLAT_NODE_DATA"); + return; + } + NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + + printk(" Detected node memory: start %8lu, end %8lu\n", + min_low_pfn, max_low_pfn); + + DBGDCONT(" DISCONTIG: plat_node_data[%d] is at 0x%p\n", nid, PLAT_NODE_DATA(nid)); + DBGDCONT(" DISCONTIG: NODE_DATA(%d)->bdata is at 0x%p\n", nid, NODE_DATA(nid)->bdata); + + /* Find the bounds of kernel memory. */ + start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS); + end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end)); + bootmap_start = -1; + + if (!nid && (max_low_pfn < end_kernel_pfn || min_low_pfn > start_kernel_pfn)) + panic("kernel loaded out of ram"); + + /* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned */ + min_low_pfn = (min_low_pfn + ((1UL << (MAX_ORDER-1))-1)) & ~((1UL << (MAX_ORDER-1))-1); + + /* We need to know how many physically contiguous pages + we'll need for the bootmap. */ + bootmap_pages = bootmem_bootmap_pages(max_low_pfn-min_low_pfn); + + /* Now find a good region where to allocate the bootmap. */ + for_each_mem_cluster(memdesc, cluster, i) { + if (cluster->usage & 3) + continue; + + start = cluster->start_pfn; + end = start + cluster->numpages; + + if (start >= max_low_pfn || end <= min_low_pfn) + continue; + + if (end > max_low_pfn) + end = max_low_pfn; + if (start < min_low_pfn) + start = min_low_pfn; + + if (start < start_kernel_pfn) { + if (end > end_kernel_pfn + && end - end_kernel_pfn >= bootmap_pages) { + bootmap_start = end_kernel_pfn; + break; + } else if (end > start_kernel_pfn) + end = start_kernel_pfn; + } else if (start < end_kernel_pfn) + start = end_kernel_pfn; + if (end - start >= bootmap_pages) { + bootmap_start = start; + break; + } + } + + if (bootmap_start == -1) + panic("couldn't find a contigous place for the bootmap"); + + /* Allocate the bootmap and mark the whole MM as reserved. */ + bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start, + min_low_pfn, max_low_pfn); + DBGDCONT(" bootmap_start %lu, bootmap_size %lu, bootmap_pages %lu\n", + bootmap_start, bootmap_size, bootmap_pages); + + /* Mark the free regions. */ + for_each_mem_cluster(memdesc, cluster, i) { + if (cluster->usage & 3) + continue; + + start = cluster->start_pfn; + end = cluster->start_pfn + cluster->numpages; + + if (start >= max_low_pfn || end <= min_low_pfn) + continue; + + if (end > max_low_pfn) + end = max_low_pfn; + if (start < min_low_pfn) + start = min_low_pfn; + + if (start < start_kernel_pfn) { + if (end > end_kernel_pfn) { + free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start), + (PFN_PHYS(start_kernel_pfn) + - PFN_PHYS(start))); + printk(" freeing pages %ld:%ld\n", + start, start_kernel_pfn); + start = end_kernel_pfn; + } else if (end > start_kernel_pfn) + end = start_kernel_pfn; + } else if (start < end_kernel_pfn) + start = end_kernel_pfn; + if (start >= end) + continue; + + free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start)); + printk(" freeing pages %ld:%ld\n", start, end); + } + + /* Reserve the bootmap memory. */ + reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start), bootmap_size); + printk(" reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size)); + + numnodes++; +} + +void __init +setup_memory(void *kernel_end) +{ + int nid; + + show_mem_layout(); + + numnodes = 0; + for (nid = 0; nid < MAX_NUMNODES; nid++) + setup_memory_node(nid, kernel_end); + +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = INITRD_START; + if (initrd_start) { + initrd_end = initrd_start+INITRD_SIZE; + printk("Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *) initrd_start, INITRD_SIZE); + + if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) { + printk("initrd extends beyond end of memory " + "(0x%08lx > 0x%p)\ndisabling initrd\n", + initrd_end, + phys_to_virt(PFN_PHYS(max_low_pfn))); + initrd_start = initrd_end = 0; + } else { + /* Assume the initrd to be in the first node */ + reserve_bootmem_node(NODE_DATA(nid), virt_to_phys((void *)initrd_start), + INITRD_SIZE); + } + } +#endif /* CONFIG_BLK_DEV_INITRD */ +} + +void __init paging_init(void) +{ + unsigned int nid; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; + unsigned long dma_local_pfn; + + /* + * The old global MAX_DMA_ADDRESS per-arch API doesn't fit + * in the NUMA model, for now we convert it to a pfn and + * we interpret this pfn as a local per-node information. + * This issue isn't very important since none of these machines + * have legacy ISA slots anyways. + */ + dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + + for (nid = 0; nid < numnodes; nid++) { + unsigned long start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT; + unsigned long end_pfn = plat_node_bdata[nid].node_low_pfn; + unsigned long lmax_mapnr; + + if (dma_local_pfn >= end_pfn - start_pfn) + zones_size[ZONE_DMA] = end_pfn - start_pfn; + else { + zones_size[ZONE_DMA] = dma_local_pfn; + zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn; + } + free_area_init_node(nid, NODE_DATA(nid), NULL, zones_size, start_pfn< max_mapnr) { + max_mapnr = lmax_mapnr; + DBGDCONT("Grow max_mapnr to %ld\n", max_mapnr); + } + } + + /* Initialize the kernel's ZERO_PGE. */ + memset((void *)ZERO_PGE, 0, PAGE_SIZE); +} + +#define printkdot() \ +do { \ + if (!(i++ % ((100UL*1024*1024)>>PAGE_SHIFT))) \ + printk("."); \ +} while(0) + +#define clobber(p, size) memset((p)->virtual, 0xaa, (size)) + +void __init mem_stress(void) +{ + LIST_HEAD(x); + LIST_HEAD(xx); + struct page * p; + unsigned long i = 0; + + printk("starting memstress"); + while ((p = alloc_pages(GFP_ATOMIC, 1))) { + clobber(p, PAGE_SIZE*2); + list_add(&p->list, &x); + printkdot(); + } + while ((p = alloc_page(GFP_ATOMIC))) { + clobber(p, PAGE_SIZE); + list_add(&p->list, &xx); + printkdot(); + } + while (!list_empty(&x)) { + p = list_entry(x.next, struct page, list); + clobber(p, PAGE_SIZE*2); + list_del(x.next); + __free_pages(p, 1); + printkdot(); + } + while (!list_empty(&xx)) { + p = list_entry(xx.next, struct page, list); + clobber(p, PAGE_SIZE); + list_del(xx.next); + __free_pages(p, 0); + printkdot(); + } + printk("I'm still alive duh!\n"); +} + +#undef printkdot +#undef clobber + +void __init mem_init(void) +{ + unsigned long codesize, reservedpages, datasize, initsize, pfn; + extern int page_is_ram(unsigned long) __init; + extern char _text, _etext, _data, _edata; + extern char __init_begin, __init_end; + extern unsigned long totalram_pages; + unsigned long nid, i; + mem_map_t * lmem_map; + + high_memory = (void *) __va(max_mapnr <node_start_paddr >> PAGE_SHIFT; + for (i = 0; i < PLAT_NODE_DATA_SIZE(nid); i++, pfn++) + if (page_is_ram(pfn) && PageReserved(lmem_map+i)) + reservedpages++; + } + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_data; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, " + "%luk data, %luk init)\n", + nr_free_pages() << (PAGE_SHIFT-10), + num_physpages << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10); +#if 0 + mem_stress(); +#endif +} + +void +show_mem(void) +{ + long i,free = 0,total = 0,reserved = 0; + long shared = 0, cached = 0; + int nid; + + printk("\nMem-info:\n"); + show_free_areas(); + printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); + for (nid = 0; nid < numnodes; nid++) { + mem_map_t * lmem_map = NODE_MEM_MAP(nid); + i = PLAT_NODE_DATA_SIZE(nid); + while (i-- > 0) { + total++; + if (PageReserved(lmem_map+i)) + reserved++; + else if (PageSwapCache(lmem_map+i)) + cached++; + else if (!page_count(lmem_map+i)) + free++; + else + shared += atomic_read(&lmem_map[i].count) - 1; + } + } + printk("%ld pages of RAM\n",total); + printk("%ld free pages\n",free); + printk("%ld reserved pages\n",reserved); + printk("%ld pages shared\n",shared); + printk("%ld pages swap cached\n",cached); + printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); +} diff -urN 2.4.4/include/asm-alpha/bitops.h alpha-numa/include/asm-alpha/bitops.h --- 2.4.4/include/asm-alpha/bitops.h Tue Jan 23 16:17:47 2001 +++ alpha-numa/include/asm-alpha/bitops.h Tue May 1 14:06:12 2001 @@ -46,7 +46,7 @@ { int *m = ((int *) addr) + (nr >> 5); - *m |= 1UL << (nr & 31); + *m |= 1 << (nr & 31); } #define smp_mb__before_clear_bit() smp_mb() @@ -70,6 +70,17 @@ :"Ir" (~(1UL << (nr & 31))), "m" (*m)); } +/* + * WARNING: non atomic version. + */ +static __inline__ void +__change_bit(unsigned long nr, volatile void * addr) +{ + int *m = ((int *) addr) + (nr >> 5); + + *m ^= 1 << (nr & 31); +} + extern __inline__ void change_bit(unsigned long nr, volatile void * addr) { @@ -167,6 +178,20 @@ int old = *m; *m = old & ~mask; + return (old & mask) != 0; +} + +/* + * WARNING: non atomic version. + */ +static __inline__ int +__test_and_change_bit(unsigned long nr, volatile void * addr) +{ + unsigned long mask = 1 << (nr & 0x1f); + int *m = ((int *) addr) + (nr >> 5); + int old = *m; + + *m = old ^ mask; return (old & mask) != 0; } diff -urN 2.4.4/include/asm-alpha/mmzone.h alpha-numa/include/asm-alpha/mmzone.h --- 2.4.4/include/asm-alpha/mmzone.h Thu Jan 1 01:00:00 1970 +++ alpha-numa/include/asm-alpha/mmzone.h Tue May 1 14:06:12 2001 @@ -0,0 +1,127 @@ +/* + * Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99 + * Adapted for the alpha wildfire architecture Jan 2001. + */ +#ifndef _ASM_MMZONE_H_ +#define _ASM_MMZONE_H_ + +#include +#ifdef CONFIG_NUMA_SCHED +#include +#endif +#ifdef NOTYET +#include +#include +#include +#include +#endif /* NOTYET */ + +typedef struct plat_pglist_data { + pg_data_t gendata; +#ifdef NOTYET + kern_vars_t kern_vars; +#endif +#if defined(CONFIG_NUMA) && defined(CONFIG_NUMA_SCHED) + struct numa_schedule_data schedule_data; +#endif +} plat_pg_data_t; + +struct bootmem_data_t; /* stupid forward decl. */ + +/* + * Following are macros that are specific to this numa platform. + */ + +extern plat_pg_data_t *plat_node_data[]; + +#ifdef CONFIG_ALPHA_WILDFIRE +# define ALPHA_PA_TO_NID(pa) ((pa) >> 36) /* 16 nodes max due 43bit kseg */ +#define NODE_MAX_MEM_SIZE (64L * 1024L * 1024L * 1024L) /* 64 GB */ +#define MAX_NUMNODES WILDFIRE_MAX_QBB +#else +# define ALPHA_PA_TO_NID(pa) (0) +#define NODE_MAX_MEM_SIZE (~0UL) +#define MAX_NUMNODES 1 +#endif + +#define PHYSADDR_TO_NID(pa) ALPHA_PA_TO_NID(pa) +#define PLAT_NODE_DATA(n) (plat_node_data[(n)]) +#define PLAT_NODE_DATA_STARTNR(n) \ + (PLAT_NODE_DATA(n)->gendata.node_start_mapnr) +#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_size) + +#if 1 +#define PLAT_NODE_DATA_LOCALNR(p, n) \ + (((p) - PLAT_NODE_DATA(n)->gendata.node_start_paddr) >> PAGE_SHIFT) +#else +static inline unsigned long +PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) +{ + unsigned long temp; + temp = p - PLAT_NODE_DATA(n)->gendata.node_start_paddr; + return (temp >> PAGE_SHIFT); +} +#endif + +#ifdef CONFIG_DISCONTIGMEM + +/* + * Following are macros that each numa implmentation must define. + */ + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define KVADDR_TO_NID(kaddr) PHYSADDR_TO_NID(__pa(kaddr)) + +/* + * Return a pointer to the node data for node n. + */ +#define NODE_DATA(n) (&((PLAT_NODE_DATA(n))->gendata)) + +/* + * NODE_MEM_MAP gives the kaddr for the mem_map of the node. + */ +#define NODE_MEM_MAP(nid) (NODE_DATA(nid)->node_mem_map) + +/* + * Given a kaddr, ADDR_TO_MAPBASE finds the owning node of the memory + * and returns the the mem_map of that node. + */ +#define ADDR_TO_MAPBASE(kaddr) \ + NODE_MEM_MAP(KVADDR_TO_NID((unsigned long)(kaddr))) + +/* + * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory + * and returns the kaddr corresponding to first physical page in the + * node's mem_map. + */ +#define LOCAL_BASE_ADDR(kaddr) ((unsigned long)__va(NODE_DATA(KVADDR_TO_NID(kaddr))->node_start_paddr)) + +#define LOCAL_MAP_NR(kvaddr) \ + (((unsigned long)(kvaddr)-LOCAL_BASE_ADDR(kvaddr)) >> PAGE_SHIFT) + +#define kern_addr_valid(kaddr) test_bit(LOCAL_MAP_NR(kaddr), \ + NODE_DATA(KVADDR_TO_NID(kaddr))->valid_addr_bitmap) + +#define virt_to_page(kaddr) (ADDR_TO_MAPBASE(kaddr) + LOCAL_MAP_NR(kaddr)) +#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) + +#ifdef CONFIG_NUMA +#ifdef CONFIG_NUMA_SCHED +#define NODE_SCHEDULE_DATA(nid) (&((PLAT_NODE_DATA(nid))->schedule_data)) +#endif + +#ifdef CONFIG_ALPHA_WILDFIRE +/* With wildfire assume 4 CPUs per node */ +#define cputonode(cpu) ((cpu) >> 2) +#else +#define cputonode(cpu) 0 +#endif /* CONFIG_ALPHA_WILDFIRE */ + +#define numa_node_id() cputonode(smp_processor_id()) +#endif /* CONFIG_NUMA */ + +#endif /* CONFIG_DISCONTIGMEM */ + +#endif /* _ASM_MMZONE_H_ */ diff -urN 2.4.4/include/asm-alpha/page.h alpha-numa/include/asm-alpha/page.h --- 2.4.4/include/asm-alpha/page.h Thu Feb 22 03:45:10 2001 +++ alpha-numa/include/asm-alpha/page.h Tue May 1 14:06:12 2001 @@ -1,6 +1,8 @@ #ifndef _ALPHA_PAGE_H #define _ALPHA_PAGE_H +#include + /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 13 #define PAGE_SIZE (1UL << PAGE_SHIFT) @@ -33,6 +35,7 @@ #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) @@ -56,7 +59,11 @@ #endif /* STRICT_MM_TYPECHECKS */ -#define BUG() __asm__ __volatile__("call_pal 129 # bugchk") +#define BUG() \ +do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + __asm__ __volatile__("call_pal %0 # bugchk" : : "i" (PAL_bugchk)); \ +} while (0) #define PAGE_BUG(page) BUG() /* Pure 2^n version of get_order */ @@ -86,8 +93,10 @@ #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) #define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) +#ifndef CONFIG_DISCONTIGMEM #define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) -#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) +#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) +#endif /* CONFIG_DISCONTIGMEM */ #endif /* __KERNEL__ */ diff -urN 2.4.4/include/asm-alpha/pgalloc.h alpha-numa/include/asm-alpha/pgalloc.h --- 2.4.4/include/asm-alpha/pgalloc.h Sat Apr 28 05:24:44 2001 +++ alpha-numa/include/asm-alpha/pgalloc.h Tue May 1 14:06:14 2001 @@ -229,6 +229,7 @@ #ifndef CONFIG_SMP extern struct pgtable_cache_struct { unsigned long *pgd_cache; + unsigned long *pmd_cache; unsigned long *pte_cache; unsigned long pgtable_cache_sz; } quicklists; @@ -237,12 +238,12 @@ #define quicklists cpu_data[smp_processor_id()] #endif #define pgd_quicklist (quicklists.pgd_cache) -#define pmd_quicklist ((unsigned long *)0) +#define pmd_quicklist (quicklists.pmd_cache) #define pte_quicklist (quicklists.pte_cache) #define pgtable_cache_size (quicklists.pgtable_cache_sz) -#define pmd_populate(mm, pmd, pte) pmd_set(pmd, pte) #define pgd_populate(mm, pgd, pmd) pgd_set(pgd, pmd) +#define pmd_populate(mm, pmd, pte) pmd_set(pmd, pte) extern pgd_t *get_pgd_slow(void); @@ -252,7 +253,7 @@ if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; + ret[0] = 0; pgtable_cache_size--; } else ret = (unsigned long *)get_pgd_slow(); @@ -271,68 +272,72 @@ free_page((unsigned long)pgd); } -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL); - if (ret) - clear_page(ret); - return ret; + pte_t *pte; + + pte = (pte_t *) __get_free_page(GFP_KERNEL); + if (pte) + clear_page(pte); + return pte; } -static inline pmd_t *pmd_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) { unsigned long *ret; if ((ret = (unsigned long *)pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; + ret[0] = 0; pgtable_cache_size--; } - return (pmd_t *)ret; + return (pte_t *)ret; } -static inline void pmd_free_fast(pmd_t *pmd) +static inline void pte_free_fast(pte_t *pte) { - *(unsigned long *)pmd = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pmd; + *(unsigned long *)pte = (unsigned long) pte_quicklist; + pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; } -static inline void pmd_free_slow(pmd_t *pmd) +static inline void pte_free_slow(pte_t *pte) { - free_page((unsigned long)pmd); + free_page((unsigned long)pte); } -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - return pte; + pmd_t *pmd; + + pmd = (pmd_t *) __get_free_page(GFP_KERNEL); + if (pmd) + clear_page(pmd); + return pmd; } -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline pmd_t *pmd_alloc_one_fast(struct mm_struct *mm, unsigned long address) { unsigned long *ret; - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; + if ((ret = (unsigned long *)pmd_quicklist) != NULL) { + pmd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; pgtable_cache_size--; } - return (pte_t *)ret; + return (pmd_t *)ret; } -static inline void pte_free_fast(pte_t *pte) +static inline void pmd_free_fast(pmd_t *pmd) { - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; + *(unsigned long *)pmd = (unsigned long) pmd_quicklist; + pmd_quicklist = (unsigned long *) pmd; pgtable_cache_size++; } -static inline void pte_free_slow(pte_t *pte) +static inline void pmd_free_slow(pmd_t *pmd) { - free_page((unsigned long)pte); + free_page((unsigned long)pmd); } #define pte_free(pte) pte_free_fast(pte) diff -urN 2.4.4/include/asm-alpha/pgtable.h alpha-numa/include/asm-alpha/pgtable.h --- 2.4.4/include/asm-alpha/pgtable.h Sat Apr 28 05:24:44 2001 +++ alpha-numa/include/asm-alpha/pgtable.h Tue May 1 14:06:12 2001 @@ -9,6 +9,7 @@ * in (currently 8192). */ #include +#include #include #include /* For TASK_SIZE */ @@ -189,6 +190,15 @@ * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ +#ifndef CONFIG_DISCONTIGMEM +#define PAGE_TO_PA(page) ((page - mem_map) << PAGE_SHIFT) +#else +#define PAGE_TO_PA(page) \ + ((((page)-(page)->zone->zone_mem_map) << PAGE_SHIFT) \ + + (page)->zone->zone_start_paddr) +#endif + +#ifndef CONFIG_DISCONTIGMEM #define mk_pte(page, pgprot) \ ({ \ pte_t pte; \ @@ -197,6 +207,19 @@ pgprot_val(pgprot); \ pte; \ }) +#else +#define mk_pte(page, pgprot) \ +({ \ + pte_t pte; \ + unsigned long pfn; \ + \ + pfn = ((unsigned long)((page)-(page)->zone->zone_mem_map)) << 32; \ + pfn += (page)->zone->zone_start_paddr << (32-PAGE_SHIFT); \ + pte_val(pte) = pfn | pgprot_val(pgprot); \ + \ + pte; \ +}) +#endif extern inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t pte; pte_val(pte) = (PHYS_TWIDDLE(physpage) << (32-PAGE_SHIFT)) | pgprot_val(pgprot); return pte; } @@ -210,7 +233,20 @@ extern inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp) { pgd_val(*pgdp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); } +#ifndef CONFIG_DISCONTIGMEM #define pte_page(x) (mem_map+(unsigned long)((pte_val(x) >> 32))) +#else +#define pte_page(x) \ +({ \ + unsigned long kvirt; \ + struct page * __xx; \ + \ + kvirt = (unsigned long)__va(pte_val(x) >> (32-PAGE_SHIFT)); \ + __xx = virt_to_page(kvirt); \ + \ + __xx; \ +}) +#endif extern inline unsigned long pmd_page(pmd_t pmd) { return PAGE_OFFSET + ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } @@ -303,7 +339,10 @@ /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ #define PageSkip(page) (0) + +#ifndef CONFIG_DISCONTIGMEM #define kern_addr_valid(addr) (1) +#endif #define io_remap_page_range(start, busaddr, size, prot) \ remap_page_range(start, virt_to_phys(__ioremap(busaddr)), size, prot) diff -urN 2.4.4/include/asm-alpha/smp.h alpha-numa/include/asm-alpha/smp.h --- 2.4.4/include/asm-alpha/smp.h Sun Apr 1 01:17:31 2001 +++ alpha-numa/include/asm-alpha/smp.h Tue May 1 14:06:12 2001 @@ -29,6 +29,7 @@ int need_new_asn; int asn_lock; unsigned long *pgd_cache; + unsigned long *pmd_cache; unsigned long *pte_cache; unsigned long pgtable_cache_sz; unsigned long ipi_count; diff -urN 2.4.4/include/asm-alpha/spinlock.h alpha-numa/include/asm-alpha/spinlock.h --- 2.4.4/include/asm-alpha/spinlock.h Tue Jan 23 23:29:24 2001 +++ alpha-numa/include/asm-alpha/spinlock.h Tue May 1 14:06:12 2001 @@ -96,6 +96,8 @@ #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } +#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + #if DEBUG_RWLOCK extern void write_lock(rwlock_t * lock); extern void read_lock(rwlock_t * lock); diff -urN 2.4.4/include/asm-alpha/system.h alpha-numa/include/asm-alpha/system.h --- 2.4.4/include/asm-alpha/system.h Mon Jan 22 03:33:50 2001 +++ alpha-numa/include/asm-alpha/system.h Tue May 1 14:06:12 2001 @@ -35,6 +35,18 @@ #define START_ADDR (PAGE_OFFSET+KERNEL_START_PHYS+0x10000) +/* + * This is setup by the secondary bootstrap loader. Because + * the zero page is zeroed out as soon as the vm system is + * initialized, we need to copy things out into a more permanent + * place. + */ +#define PARAM ZERO_PGE +#define COMMAND_LINE ((char*)(PARAM + 0x0000)) +#define COMMAND_LINE_SIZE 256 +#define INITRD_START (*(unsigned long *) (PARAM+0x100)) +#define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) + #ifndef __ASSEMBLY__ #include diff -urN 2.4.4/include/asm-i386/bitops.h alpha-numa/include/asm-i386/bitops.h --- 2.4.4/include/asm-i386/bitops.h Tue May 1 06:06:15 2001 +++ alpha-numa/include/asm-i386/bitops.h Tue May 1 14:06:12 2001 @@ -79,6 +79,23 @@ #define smp_mb__after_clear_bit() barrier() /** + * __change_bit - Toggle a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void __change_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( + "btcl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** * change_bit - Toggle a bit in memory * @nr: Bit to clear * @addr: Address to start counting from @@ -170,6 +187,18 @@ "btrl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) :"Ir" (nr)); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static __inline__ int __test_and_change_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); return oldbit; } diff -urN 2.4.4/include/linux/mm.h alpha-numa/include/linux/mm.h --- 2.4.4/include/linux/mm.h Tue May 1 06:06:15 2001 +++ alpha-numa/include/linux/mm.h Tue May 1 14:06:12 2001 @@ -480,7 +480,6 @@ #else #define __GFP_HIGHMEM 0x0 /* noop */ #endif -#define __GFP_VM 0x20 #define GFP_BUFFER (__GFP_HIGH | __GFP_WAIT) diff -urN 2.4.4/include/linux/mmzone.h alpha-numa/include/linux/mmzone.h --- 2.4.4/include/linux/mmzone.h Tue May 1 06:06:15 2001 +++ alpha-numa/include/linux/mmzone.h Tue May 1 14:06:12 2001 @@ -16,7 +16,7 @@ typedef struct free_area_struct { struct list_head free_list; - unsigned int *map; + unsigned long *map; } free_area_t; struct pglist_data; @@ -34,7 +34,6 @@ * Commonly accessed fields: */ spinlock_t lock; - unsigned long offset; unsigned long free_pages; unsigned long inactive_clean_pages; unsigned long inactive_dirty_pages; @@ -47,17 +46,18 @@ free_area_t free_area[MAX_ORDER]; /* - * rarely used fields: - */ - char *name; - unsigned long size; - /* * Discontig memory support fields. */ struct pglist_data *zone_pgdat; + struct page *zone_mem_map; unsigned long zone_start_paddr; unsigned long zone_start_mapnr; - struct page *zone_mem_map; + + /* + * rarely used fields: + */ + char *name; + unsigned long size; } zone_t; #define ZONE_DMA 0 @@ -81,7 +81,7 @@ int gfp_mask; } zonelist_t; -#define NR_GFPINDEX 0x100 +#define NR_GFPINDEX 0x20 /* * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM @@ -112,8 +112,7 @@ extern pg_data_t *pgdat_list; #define memclass(pgzone, tzone) (((pgzone)->zone_pgdat == (tzone)->zone_pgdat) \ - && (((pgzone) - (pgzone)->zone_pgdat->node_zones) <= \ - ((tzone) - (pgzone)->zone_pgdat->node_zones))) + && ((pgzone) <= (tzone))) /* * The following two are not meant for general usage. They are here as diff -urN 2.4.4/include/linux/swap.h alpha-numa/include/linux/swap.h --- 2.4.4/include/linux/swap.h Tue May 1 06:06:22 2001 +++ alpha-numa/include/linux/swap.h Tue May 1 14:06:12 2001 @@ -64,9 +64,9 @@ }; extern int nr_swap_pages; -FASTCALL(unsigned int nr_free_pages(void)); -FASTCALL(unsigned int nr_inactive_clean_pages(void)); -FASTCALL(unsigned int nr_free_buffer_pages(void)); +extern unsigned int nr_free_pages(void); +extern unsigned int nr_inactive_clean_pages(void); +extern unsigned int nr_free_buffer_pages(void); extern int nr_active_pages; extern int nr_inactive_dirty_pages; extern atomic_t nr_async_pages; diff -urN 2.4.4/kernel/ksyms.c alpha-numa/kernel/ksyms.c --- 2.4.4/kernel/ksyms.c Sat Apr 28 05:24:48 2001 +++ alpha-numa/kernel/ksyms.c Tue May 1 14:06:12 2001 @@ -98,6 +98,8 @@ EXPORT_SYMBOL(free_pages); #ifndef CONFIG_DISCONTIGMEM EXPORT_SYMBOL(contig_page_data); +#else +EXPORT_SYMBOL(alloc_pages); #endif EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(kmem_find_general_cachep); diff -urN 2.4.4/mm/bootmem.c alpha-numa/mm/bootmem.c --- 2.4.4/mm/bootmem.c Sat Apr 28 05:24:48 2001 +++ alpha-numa/mm/bootmem.c Tue May 1 14:06:12 2001 @@ -83,6 +83,14 @@ if (!size) BUG(); + if (sidx < 0) + BUG(); + if (eidx < 0) + BUG(); + if (sidx >= eidx) + BUG(); + if ((addr >> PAGE_SHIFT) >= bdata->node_low_pfn) + BUG(); if (end > bdata->node_low_pfn) BUG(); for (i = sidx; i < eidx; i++) @@ -142,6 +150,9 @@ PAGE_SHIFT); if (!size) BUG(); + + if (align & (align-1)) + BUG(); /* * We try to allocate bootmem pages above 'goal' diff -urN 2.4.4/mm/mmap.c alpha-numa/mm/mmap.c --- 2.4.4/mm/mmap.c Sat Apr 28 05:24:48 2001 +++ alpha-numa/mm/mmap.c Tue May 1 14:06:12 2001 @@ -54,7 +54,7 @@ * of num_physpages for safety margin. */ - long free; + unsigned long free; /* Sometimes we want to use more memory than we have. */ if (sysctl_overcommit_memory) diff -urN 2.4.4/mm/page_alloc.c alpha-numa/mm/page_alloc.c --- 2.4.4/mm/page_alloc.c Sat Apr 28 05:24:48 2001 +++ alpha-numa/mm/page_alloc.c Tue May 1 14:06:12 2001 @@ -53,7 +53,7 @@ /* * Temporary debugging check. */ -#define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->offset) || (((x)-mem_map) >= (zone)->offset+(zone)->size)) +#define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->zone_start_mapnr) || (((x)-mem_map) >= (zone)->zone_start_mapnr+(zone)->size)) /* * Buddy system. Hairy. You really aren't expected to understand this @@ -94,7 +94,7 @@ zone = page->zone; mask = (~0UL) << order; - base = mem_map + zone->offset; + base = zone->zone_mem_map; page_idx = page - base; if (page_idx & ~mask) BUG(); @@ -111,7 +111,7 @@ if (area >= zone->free_area + MAX_ORDER) BUG(); - if (!test_and_change_bit(index, area->map)) + if (!__test_and_change_bit(index, area->map)) /* * the buddy page is still allocated. */ @@ -146,7 +146,7 @@ } #define MARK_USED(index, order, area) \ - change_bit((index) >> (1+(order)), (area)->map) + __change_bit((index) >> (1+(order)), (area)->map) static inline struct page * expand (zone_t *zone, struct page *page, unsigned long index, int low, int high, free_area_t * area) @@ -190,8 +190,9 @@ if (BAD_RANGE(zone,page)) BUG(); memlist_del(curr); - index = (page - mem_map) - zone->offset; - MARK_USED(index, curr_order, area); + index = page - zone->zone_mem_map; + if (curr_order != MAX_ORDER-1) + MARK_USED(index, curr_order, area); zone->free_pages -= 1 << order; page = expand(zone, page, index, order, curr_order, area); @@ -743,7 +744,10 @@ unsigned long i, j; unsigned long map_size; unsigned long totalpages, offset, realtotalpages; - unsigned int cumulative = 0; + const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); + + if (zone_start_paddr & ~PAGE_MASK) + BUG(); totalpages = 0; for (i = 0; i < MAX_NR_ZONES; i++) { @@ -812,8 +816,6 @@ if (!size) continue; - zone->offset = offset; - cumulative += size; mask = (realsize / zone_balance_ratio[j]); if (mask < zone_balance_min[j]) mask = zone_balance_min[j]; @@ -840,28 +842,34 @@ zone->zone_start_mapnr = offset; zone->zone_start_paddr = zone_start_paddr; + if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1)) + printk("BUG: wrong zone alignment, it will crash\n"); + for (i = 0; i < size; i++) { struct page *page = mem_map + offset + i; page->zone = zone; - if (j != ZONE_HIGHMEM) { + if (j != ZONE_HIGHMEM) page->virtual = __va(zone_start_paddr); - zone_start_paddr += PAGE_SIZE; - } + zone_start_paddr += PAGE_SIZE; } offset += size; mask = -1; - for (i = 0; i < MAX_ORDER; i++) { + for (i = 0; ; i++) { unsigned long bitmap_size; memlist_init(&zone->free_area[i].free_list); + if (i == MAX_ORDER-1) { + zone->free_area[i].map = NULL; + break; + } mask += mask; size = (size + ~mask) & mask; - bitmap_size = size >> i; + bitmap_size = size >> (i+1); bitmap_size = (bitmap_size + 7) >> 3; bitmap_size = LONG_ALIGN(bitmap_size); zone->free_area[i].map = - (unsigned int *) alloc_bootmem_node(pgdat, bitmap_size); + (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size); } } build_zonelists(pgdat); diff -urN 2.4.4/mm/swap_state.c alpha-numa/mm/swap_state.c --- 2.4.4/mm/swap_state.c Sat Apr 28 05:24:48 2001 +++ alpha-numa/mm/swap_state.c Tue May 1 14:06:12 2001 @@ -189,7 +189,6 @@ struct page * read_swap_cache_async(swp_entry_t entry) { struct page *found_page = 0, *new_page; - unsigned long new_page_addr; /* * Make sure the swap entry is still in use. @@ -203,10 +202,9 @@ if (found_page) goto out_free_swap; - new_page_addr = __get_free_page(GFP_USER); - if (!new_page_addr) + new_page = alloc_page(GFP_USER); + if (!new_page) goto out_free_swap; /* Out of memory */ - new_page = virt_to_page(new_page_addr); /* * Check the swap cache again, in case we stalled above. @@ -217,7 +215,8 @@ /* * Add it to the swap cache and read its contents. */ - lock_page(new_page); + if (TryLockPage(new_page)) + BUG(); add_to_swap_cache(new_page, entry); rw_swap_page(READ, new_page); return new_page; diff -urN 2.4.4/mm/vmscan.c alpha-numa/mm/vmscan.c --- 2.4.4/mm/vmscan.c Sat Apr 28 05:24:48 2001 +++ alpha-numa/mm/vmscan.c Tue May 1 14:06:12 2001 @@ -888,8 +888,7 @@ { struct task_struct *tsk = current; - tsk->session = 1; - tsk->pgrp = 1; + daemonize(); strcpy(tsk->comm, "kswapd"); sigfillset(&tsk->blocked); kswapd_task = tsk; @@ -997,8 +996,7 @@ struct task_struct *tsk = current; pg_data_t *pgdat; - tsk->session = 1; - tsk->pgrp = 1; + daemonize(); strcpy(tsk->comm, "kreclaimd"); sigfillset(&tsk->blocked); current->flags |= PF_MEMALLOC;