diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/arch/i386/config.in x/arch/i386/config.in --- x-ref/arch/i386/config.in 2003-12-04 21:21:48.000000000 +0100 +++ x/arch/i386/config.in 2003-12-04 21:21:53.000000000 +0100 @@ -255,6 +255,13 @@ else if [ "$CONFIG_X86_SUMMIT" = "y" ]; then define_bool CONFIG_X86_CLUSTERED_APIC y fi + if [ "$CONFIG_X86_NUMAQ" = "y" -o "$CONFIG_X86_SUMMIT" = "y" ]; then + bool ' Numa Memory Allocation Support' CONFIG_NUMA + if [ "$CONFIG_NUMA" = "y" ]; then + define_bool CONFIG_DISCONTIGMEM y + define_bool CONFIG_HAVE_ARCH_BOOTMEM_NODE y + fi + fi fi fi diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/arch/i386/kernel/Makefile x/arch/i386/kernel/Makefile --- x-ref/arch/i386/kernel/Makefile 2003-11-28 23:12:31.000000000 +0100 +++ x/arch/i386/kernel/Makefile 2003-12-04 21:21:53.000000000 +0100 @@ -30,6 +30,8 @@ obj-y += pci-pc.o pci-irq.o endif endif +obj-$(CONFIG_X86_NUMAQ) += numaq.o +obj-$(CONFIG_X86_SUMMIT) += srat.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_MTRR) += mtrr.o obj-$(CONFIG_X86_MSR) += msr.o diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/arch/i386/kernel/numaq.c x/arch/i386/kernel/numaq.c --- x-ref/arch/i386/kernel/numaq.c 1970-01-01 01:00:00.000000000 +0100 +++ x/arch/i386/kernel/numaq.c 2003-12-04 21:21:53.000000000 +0100 @@ -0,0 +1,143 @@ +/* + * Written by: Patricia Gaughen, IBM Corporation + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to + */ + +#include +#include +#include +#include +#include + +unsigned long long nodes_mem_start[MAX_NUMNODES]; +unsigned long long nodes_mem_size[MAX_NUMNODES]; + +/* + * Function: smp_dump_qct() + * + * Description: gets memory layout from the quad config table. This + * function also increments numnodes with the number of nodes (quads) + * present. + */ +static void __init smp_dump_qct(void) +{ + int node; + struct eachquadmem *eq; + struct sys_cfg_data *scd = + (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR); + +#define MB_TO_B(addr) ((addr) << 20) + numnodes = 0; + for(node = 0; node < MAX_NUMNODES; node++) { + if(scd->quads_present31_0 & (1 << node)) { + numnodes++; + eq = &scd->eq[node]; + /* Convert to bytes */ + nodes_mem_start[node] = MB_TO_B(((unsigned long long)eq->hi_shrd_mem_start) - + ((unsigned long long)eq->priv_mem_size)); + nodes_mem_size[node] = MB_TO_B(((unsigned long long)eq->hi_shrd_mem_size) + + ((unsigned long long)eq->priv_mem_size)); + } + } +} + +/* + * ----------------------------------------- + * + * functions related to physnode_map + * + * ----------------------------------------- + */ +/* + * physnode_map keeps track of the physical memory layout of the + * numaq nodes on a 256Mb break (each element of the array will + * represent 256Mb of memory and will be marked by the node id. so, + * if the first gig is on node 0, and the second gig is on node 1 + * physnode_map will contain: + * physnode_map[0-3] = 0; + * physnode_map[4-7] = 1; + * physnode_map[8- ] = -1; + */ +int physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; + +#define MB_TO_ELEMENT(x) (x >> ELEMENT_REPRESENTS) +#define PA_TO_MB(pa) (pa >> 20) /* assumption: a physical address is in bytes */ + +int numaqpa_to_nid(unsigned long long pa) +{ + int nid; + + nid = physnode_map[MB_TO_ELEMENT(PA_TO_MB(pa))]; + + /* the physical address passed in is not in the map for the system */ + if (nid == -1) + BUG(); + + return nid; +} + +int numaqpfn_to_nid(unsigned long pfn) +{ + return numaqpa_to_nid(((unsigned long long)pfn) << PAGE_SHIFT); +} + +/* + * for each node mark the regions + * TOPOFMEM = hi_shrd_mem_start + hi_shrd_mem_size + * + * need to be very careful to not mark 1024+ as belonging + * to node 0. will want 1027 to show as belonging to node 1 + * example: + * TOPOFMEM = 1024 + * 1024 >> 8 = 4 (subtract 1 for starting at 0] + * tmpvar = TOPOFMEM - 256 = 768 + * 1024 >> 8 = 4 (subtract 1 for starting at 0] + * + */ +static void __init initialize_physnode_map(void) +{ + int nid; + unsigned int topofmem, cur; + struct eachquadmem *eq; + struct sys_cfg_data *scd = + (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR); + + + for(nid = 0; nid < numnodes; nid++) { + if(scd->quads_present31_0 & (1 << nid)) { + eq = &scd->eq[nid]; + cur = eq->hi_shrd_mem_start; + topofmem = eq->hi_shrd_mem_start + eq->hi_shrd_mem_size; + while (cur < topofmem) { + physnode_map[cur >> 8] = nid; + cur += (ELEMENT_REPRESENTS - 1); + } + } + } +} + +void __init get_memcfg_numaq(void) +{ + smp_dump_qct(); + initialize_physnode_map(); +} diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/arch/i386/kernel/setup.c x/arch/i386/kernel/setup.c --- x-ref/arch/i386/kernel/setup.c 2003-12-04 21:21:48.000000000 +0100 +++ x/arch/i386/kernel/setup.c 2003-12-04 21:21:59.000000000 +0100 @@ -122,6 +122,7 @@ #include #include #include +#include /* * Machine setup.. */ @@ -885,33 +886,11 @@ nextchar: } } -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) - -/* - * Reserved space for vmalloc and iomap - defined in asm/page.h - */ -#ifdef CONFIG_HIGHMEM_EMULATION -#define ORDER_DOWN(x) ((x >> (MAX_ORDER-1)) << (MAX_ORDER-1)) -#define MAXMEM_PFN \ -({ \ - int __max_pfn; \ - if (max_pfn > PFN_DOWN(MAXMEM)) \ - __max_pfn = PFN_DOWN(MAXMEM); \ - else \ - __max_pfn = ORDER_DOWN(max_pfn / 5); \ - __max_pfn; \ -}) -#else -#define MAXMEM_PFN PFN_DOWN(MAXMEM) -#endif -#define MAX_NONPAE_PFN (1 << 20) /* * Find the highest page frame number we have available */ -static void __init find_max_pfn(void) +void __init find_max_pfn(void) { int i; @@ -933,7 +912,7 @@ static void __init find_max_pfn(void) /* * Determine low and high memory ranges: */ -static unsigned long __init find_max_low_pfn(void) +unsigned long __init find_max_low_pfn(void) { unsigned long max_low_pfn; @@ -989,6 +968,7 @@ static unsigned long __init find_max_low return max_low_pfn; } +#ifndef CONFIG_DISCONTIGMEM /* * Register fully available low RAM pages with the bootmem allocator. */ @@ -1115,6 +1095,7 @@ static unsigned long __init setup_memory return max_low_pfn; } +#endif /* !CONFIG_DISCONTIGMEM */ /* * Request address space for all standard RAM and ROM resources diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/arch/i386/kernel/srat.c x/arch/i386/kernel/srat.c --- x-ref/arch/i386/kernel/srat.c 1970-01-01 01:00:00.000000000 +0100 +++ x/arch/i386/kernel/srat.c 2003-12-04 21:21:53.000000000 +0100 @@ -0,0 +1,457 @@ +/* + * This code is taken from 64bit discontig mem support. + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to + */ + +/* + * ACPI 2.0 SRAT Table + * http://www.microsoft.com/HWDEV/design/SRAT.htm + * Processor and Memory affinity information + */ + +#include +#include +#include +#include +#include +#include "acpitable.h" + +#define SRAT_DEBUG + +#define NUM_KLUDGE_PAGES 4 /* Size of page descriptor kludge */ +#define PAGE_KLUDGE_START ((u32 *)empty_zero_page - NUM_KLUDGE_PAGES) + + +/* + * proximity macros and definitions + */ +#define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */ +#define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ +#define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) +#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) +#define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */ +/* bitmap length; _PXM is at most 255 */ +#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) +static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ + +struct node_memory_chunk_s node_memory_chunk[MAXCLUMPS]; +struct node_cpuid_s node_cpuid[NR_CPUS]; + +static int srat_num_cpus; /* number of cpus */ +static int num_memory_chunks; /* total number of memory chunks */ +static unsigned long zholes_size[MAX_NUMNODES]; + +unsigned long long nodes_mem_start[MAX_NUMNODES]; +unsigned long long nodes_mem_size[MAX_NUMNODES]; + +extern unsigned char acpi_checksum(void *buffer, int length); +extern struct acpi_table_rsdp *acpi_find_root_pointer(void); + + +/* Identify which cnode a physical address resides on */ +int numapa_to_nid(u64 paddr) +{ + int i; + struct node_memory_chunk_s *nmcp; + + /* We've got a sorted list. Binary search here? Do we care?? */ + nmcp = node_memory_chunk; + for (i = num_memory_chunks; --i >= 0; nmcp++) + if (paddr >= nmcp->start_paddr && paddr <= nmcp->end_paddr) + return (int)nmcp->nid; + + return -1; +} + +int numapfn_to_nid(unsigned long pfn) +{ + return numapa_to_nid(((unsigned long long)pfn) << PAGE_SHIFT); +} + +/* Identify CPU proximity domains */ + +static void __init parse_cpu_affinity_structure(char *p) +{ + struct acpi_table_processor_affinity *cpu_affinity = + (struct acpi_table_processor_affinity *) p; + + if (!cpu_affinity->flags.enabled) + return; /* empty entry */ + + /* mark this node as "seen" in node bitmap */ + BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain); + + node_cpuid[srat_num_cpus].phys_id = cpu_affinity->apic_id; + /* nid should be overridden as logical node id later */ + node_cpuid[srat_num_cpus].pxm = cpu_affinity->proximity_domain; + srat_num_cpus++; + +#ifdef SRAT_DEBUG + printk("CPU 0x%02X in proximity domain 0x%02X\n", + cpu_affinity->apic_id, cpu_affinity->proximity_domain); +#endif +} + +/* + * Identify memory proximity domains and hot-remove capabilities. + * Fill node memory chunk list structure. + */ + +static void __init parse_memory_affinity_structure (char *sratp) +{ + struct acpi_table_memory_affinity *memory_affinity = + (struct acpi_table_memory_affinity *) sratp; + u64 paddr, size; + u8 pxm; + struct node_memory_chunk_s *p, *q, *pend; + + if (!memory_affinity->flags.enabled) + return; /* empty entry */ + + /* mark this node as "seen" in node bitmap */ + BMAP_SET(pxm_bitmap, memory_affinity->proximity_domain); + + /* calculate info for memory chunk structure */ + paddr = memory_affinity->base_addr_hi; + paddr = (paddr << 32) | memory_affinity->base_addr_lo; + size = memory_affinity->length_hi; + size = (size << 32) | memory_affinity->length_lo; + pxm = memory_affinity->proximity_domain; + + if (num_memory_chunks >= MAXCLUMPS) { + printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n", + size/(1024*1024), paddr); + return; + } + + /* Insertion sort based on base address */ + pend = &node_memory_chunk[num_memory_chunks]; + for (p = &node_memory_chunk[0]; p < pend; p++) { + if (paddr < p->start_paddr) + break; + } + if (p < pend) { + for (q = pend; q >= p; q--) + *(q + 1) = *q; + } + p->start_paddr = paddr; + p->size = size; + p->end_paddr = paddr + size - 1; + p->pxm = pxm; + + num_memory_chunks++; + + +#ifdef SRAT_DEBUG + printk("Memory range 0x%llX to 0x%llX (type 0x%X) in proximity domain 0x%02X %s\n", + paddr, paddr + size - 1, + memory_affinity->memory_type, + memory_affinity->proximity_domain, + (memory_affinity->flags.hot_pluggable ? + "enabled and removable" : "enabled" ) ); +#endif +} + + +/* Parse the ACPI Static Resource Affinity Table */ +static int __init acpi20_parse_srat(acpi_table_srat_t *sratp) +{ + u8 *start, *end, *p; + int i, j, nid; + u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ + u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ + + start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ + p = start; + end = (u8 *)sratp + sratp->header.length; +printk("In acpi20_parse_srat: sratp=0x%p, start=0x%p, end=0x%p\n", sratp, start, end); + + memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ + memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); + memset(node_cpuid, 0, sizeof(node_cpuid)); + memset(zholes_size, 0, sizeof(zholes_size)); + + /* -1 in these maps means not available */ + memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); + memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); + + num_memory_chunks = 0; + while (p < end) { + switch (*p) { + case ACPI_SRAT_PROCESSOR_AFFINITY: + parse_cpu_affinity_structure(p); + break; + case ACPI_SRAT_MEMORY_AFFINITY: + parse_memory_affinity_structure(p); + break; + default: + printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]); + break; + } + p += p[1]; + if (p[1] == 0) { + printk("acpi20_parse_srat: Entry length value is zero;" + " can't parse any further!\n"); + break; + } + } +printk("SRAT scan complete\n"); + + /* Calculate total number of nodes in system from PXM bitmap and create + * a set of sequential node IDs starting at zero. (ACPI doesn't seem + * to specify the range of _PXM values.) + */ + numnodes = 0; /* init total nodes in system */ + for (i = 0; i < MAX_PXM_DOMAINS; i++) { + if (BMAP_TEST(pxm_bitmap, i)) { + pxm_to_nid_map[i] = numnodes; + nid_to_pxm_map[numnodes] = i; + ++numnodes; + } + } +printk("numnodes=%d\n", numnodes); + if (numnodes == 0) + BUG(); + + /* set cnode id in memory chunk structure */ + for (i = 0; i < num_memory_chunks; i++) + node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm]; + + /* set cnode id in cpu structure */ + for (i = 0; i < srat_num_cpus; i++) + node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].pxm]; + + printk("pxm bitmap: "); + for (i = 0; i < sizeof(pxm_bitmap); i++) { + printk("%02X ", pxm_bitmap[i]); + } + printk("\n"); + printk("Number of logical nodes in system = %d\n", numnodes); + printk("Number of memory chunks in system = %d\n", num_memory_chunks); + +printk("Memory table:\n"); + + /*calculate start/size arrays */ + for (nid = 0; nid < numnodes; nid++) { + u64 start_addr, size; + int been_here_before; + + start_addr = size = 0; + been_here_before = 0; + for (j = 0; j < num_memory_chunks; j++){ + if (node_memory_chunk[j].nid == nid) { + /* + * This should all be in pfns!!!! + * + * (1) move assignment into node_start_pfn and node_end_pfn into this function + * if node_start_pfn[nid] < (node_memory_chunk[j].start_addr >> PAGE_SHIFT) + * we've identified a hole... + * (do we need to validate that it's a hole?) + * make sure it handles multiple holes... so add zholes_size to zholes_size + * zholes_size[nid] = zholes_size[nid] + (node_memory_chunk[j].start_addr - node_end_pfn[nid] + * node_end_pfn[nid] gets updated to start_addr + size + * need to make sure to fill in if it's the first time through this code. + */ + if (been_here_before == 0) { + printk("found chunk for nid %d\n", nid); + + start_addr = node_memory_chunk[j].start_paddr; + size = node_memory_chunk[j].size; + + nodes_mem_start[nid] = start_addr; + nodes_mem_size[nid] = size; + + been_here_before = 1; + } else { + start_addr = node_memory_chunk[j].start_paddr; + size = node_memory_chunk[j].size; + + printk("HOLE: chunk %d nid %d start_paddr %16llx end_paddr %16llx size %16llx\n", + j, node_memory_chunk[j].nid, + node_memory_chunk[j].start_paddr, + node_memory_chunk[j].end_paddr, + node_memory_chunk[j].size); + + if (nodes_mem_start[nid] < start_addr) { + printk("found a whole on nid %d, chunk %d\n", nid, j); + zholes_size[nid] = zholes_size[nid] + + ((start_addr - (nodes_mem_start[nid] + nodes_mem_size[nid])) >> PAGE_SHIFT); + nodes_mem_size[nid] += size; + } + } + printk("%s (%d): start_addr = 0x%08llx size = %08llx\n", + __FUNCTION__, nid, nodes_mem_start[nid], nodes_mem_size[nid]); + printk("%s (%d): start=0x%llX size=0x%llX\n", + __FUNCTION__, nid, start_addr, size); + } + } + printk("%s (%d): start_addr = 0x%08llx size = %08llx\n", + __FUNCTION__, nid, nodes_mem_start[nid], nodes_mem_size[nid]); + } + return 0; +} + + +#define kludge_to_virt(idx) (PAGE_SIZE * ((unsigned long)((u32 *)empty_zero_page - (u32 *)pg0) - NUM_KLUDGE_PAGES + (unsigned long)(idx)) ) + +#define pde_kludge(idx, phys) (PAGE_KLUDGE_START[idx] = ((phys) & ~(PAGE_SIZE - 1)) | (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)) + +/* + * Temporarily use the virtual area starting from PAGE_KLUDGE_START, + * to map the target physical address. By using this area, we can + * map up to NUM_KLUDGE_PAGES pages temporarily, i.e. until the next + * page_kludge() call. + */ +static __init void * page_kludge(unsigned long phys, unsigned long size) +{ + unsigned long base, offset, mapped_size; + int idx; + + offset = phys & (PAGE_SIZE - 1); + mapped_size = PAGE_SIZE - offset; + pde_kludge(0, phys); + base = kludge_to_virt(0); + __flush_tlb_one(base); + wbinvd(); + + dprintk("page_kludge(0x%lx, 0x%lx): idx=%d mapped at %lx\n", phys, size, + FIX_IO_APIC_BASE_END, base); + + /* + * Most cases can be covered by the below. + */ + idx = 0; + while (mapped_size < size) { + if (idx >= NUM_KLUDGE_PAGES) + return NULL; /* cannot handle this */ + phys += PAGE_SIZE; + pde_kludge(idx, phys); + __flush_tlb_one(kludge_to_virt(idx)); + mapped_size += PAGE_SIZE; + ++idx; + } + + return((void *)(base + offset)); +} + + +void __init get_memcfg_from_srat(void) +{ + acpi_table_header *header = NULL; + struct acpi_table_rsdp *rsdp = NULL; + struct acpi_table_rsdt *rsdt = NULL; + struct acpi_table_rsdt saved_rsdt; + int tables = 0; + int i = 0; + u32 pde_save[NUM_KLUDGE_PAGES]; + + rsdp = (struct acpi_table_rsdp *) acpi_find_root_pointer(); + + if (!rsdp) { +printk("get_memcfg_from_srat: Didn't find ACPI root!\n"); + return; + } + + printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision, + rsdp->oem_id); + + if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) { + printk(KERN_WARNING "RSDP table signature incorrect\n"); + return; + } + +printk("get_memcfg_from_srat: calling page_kludge(0x%08X, %d)\n", rsdp->rsdt_address, sizeof(struct acpi_table_rsdt)); + rsdt = (struct acpi_table_rsdt *) + page_kludge(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt)); + + if (!rsdt) { + printk(KERN_WARNING "ACPI: Invalid root system description tables (RSDT)\n"); + return; + } +printk("get_memcfg_from_srat: page_kludge returned 0x%08X\n", (ulong)rsdt); + + header = & rsdt->header; + + if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) { + printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); + return; + } + + /* + * The number of tables is computed by taking the + * size of all entries (header size minus total + * size of RSDT) divided by the size of each entry + * (4-byte table pointers). + */ + tables = (header->length - sizeof(acpi_table_header)) / 4; +printk("tables = %d\n", tables); + + memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); + + if (saved_rsdt.header.length > sizeof(saved_rsdt)) { + printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", saved_rsdt.header.length); + return; + } +printk("Begin table scan....\n"); + memcpy(pde_save, PAGE_KLUDGE_START, sizeof(pde_save)); + + for (i = 0; i < tables; i++) { + /* Map in header, then map in full table length. */ + header = (acpi_table_header *) + page_kludge(saved_rsdt.entry[i], sizeof(acpi_table_header)); + if (!header) + break; + header = (acpi_table_header *) + page_kludge(saved_rsdt.entry[i], header->length); + if (!header) + break; + + if (strncmp((char *) &header->signature, "SRAT", 4)) + continue; + + if (acpi_checksum(header, header->length)) { + printk(KERN_WARNING "ACPI %s has invalid checksum\n", + acpi_table_signatures[i]); + continue; + } + + acpi20_parse_srat((acpi_table_srat_t *)header); + goto out; + } + + printk("get_memcfg_from_srat: no SRAT found!\n"); + out: + /* Undo page kludge. */ + memcpy(PAGE_KLUDGE_START, pde_save, sizeof(pde_save)); + __flush_tlb(); + wbinvd(); +} + +unsigned long __init get_zholes_size(int nid) +{ + if((nid >= numnodes) | (nid >= MAX_NUMNODES)) + printk("%s: nid = %d is invalid. numnodes = %d", + __FUNCTION__, nid, numnodes); + return zholes_size[nid]; +} + diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/arch/i386/mm/discontig.c x/arch/i386/mm/discontig.c --- x-ref/arch/i386/mm/discontig.c 1970-01-01 01:00:00.000000000 +0100 +++ x/arch/i386/mm/discontig.c 2003-12-04 21:21:53.000000000 +0100 @@ -0,0 +1,332 @@ +/* + * Written by: Patricia Gaughen, IBM Corporation + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to + */ + +#include +#include +#include +#include +#include +#include +#include + +struct pfns { + unsigned long start_pfn; + unsigned long max_pfn; +}; + +plat_pg_data_t *plat_node_data[MAX_NUMNODES]; +bootmem_data_t plat_node_bdata; +struct pfns plat_node_bootpfns[MAX_NUMNODES]; + +extern unsigned long find_max_low_pfn(void); +extern void find_max_pfn(void); +extern void pagetable_init (void); +extern void kmap_init(void); +extern void one_highpage_init(struct page *, int, int); +extern inline int page_is_ram (unsigned long); + +extern unsigned long long nodes_mem_start[], nodes_mem_size[]; +extern struct e820map e820; +extern char _end; +extern unsigned long highend_pfn, highstart_pfn; +extern unsigned long max_low_pfn; +extern unsigned long totalram_pages; +extern unsigned long totalhigh_pages; + +static void __init find_max_pfn_node(int nid) +{ + unsigned long node_datasz; + unsigned long start, end; + + start = plat_node_bootpfns[nid].start_pfn = PFN_UP(nodes_mem_start[nid]); + end = PFN_DOWN(nodes_mem_start[nid]) + PFN_DOWN(nodes_mem_size[nid]); + + if (start >= end) { + BUG(); + } + if (end > max_pfn) { + end = max_pfn; + } + plat_node_bootpfns[nid].max_pfn = end; + + node_datasz = PFN_UP(sizeof(plat_pg_data_t)); + PLAT_NODE_DATA(nid) = (plat_pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); + memset(PLAT_NODE_DATA(nid), 0, sizeof(plat_pg_data_t)); + min_low_pfn += node_datasz; +} + +static void __init register_bootmem_low_pages(unsigned long system_max_low_pfn) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + unsigned long curr_pfn, last_pfn, size; + /* + * Reserve usable low memory + */ + if (e820.map[i].type != E820_RAM) + continue; + /* + * We are rounding up the start address of usable memory: + */ + curr_pfn = PFN_UP(e820.map[i].addr); + if (curr_pfn >= system_max_low_pfn) + continue; + /* + * ... and at the end of the usable range downwards: + */ + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + + if (last_pfn > system_max_low_pfn) + last_pfn = system_max_low_pfn; + + /* + * .. finally, did all the rounding and playing + * around just make the area go away? + */ + if (last_pfn <= curr_pfn) + continue; + + size = last_pfn - curr_pfn; + free_bootmem_node(NODE_DATA(0), PFN_PHYS(curr_pfn), PFN_PHYS(size)); + } +} + +unsigned long __init setup_memory(void) +{ + int nid; + unsigned long bootmap_size, system_start_pfn, system_max_low_pfn; + + get_memcfg_numa(); + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ + system_start_pfn = min_low_pfn = PFN_UP(__pa(&_end)); + + find_max_pfn(); + system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > system_max_low_pfn) { + highstart_pfn = system_max_low_pfn; + } + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); +#endif + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", + pages_to_mb(system_max_low_pfn)); + + for (nid = 0; nid < numnodes; nid++) + { + find_max_pfn_node(nid); + + } + + NODE_DATA(0)->bdata = &plat_node_bdata; + + /* + * Initialize the boot-time allocator (with low memory only): + */ + bootmap_size = init_bootmem_node(NODE_DATA(0), min_low_pfn, 0, system_max_low_pfn); + + register_bootmem_low_pages(system_max_low_pfn); + + /* + * Reserve the bootmem bitmap itself as well. We do this in two + * steps (first step was init_bootmem()) because this catches + * the (very unlikely) case of us accidentally initializing the + * bootmem allocator with an invalid RAM area. + */ + reserve_bootmem_node(NODE_DATA(0), HIGH_MEMORY, (PFN_PHYS(min_low_pfn) + + bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); + + /* + * reserve physical page 0 - it's a special BIOS page on many boxes, + * enabling clean reboots, SMP operation, laptop functions. + */ + reserve_bootmem_node(NODE_DATA(0), 0, PAGE_SIZE); + + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + reserve_bootmem_node(NODE_DATA(0), PAGE_SIZE, PAGE_SIZE); + + /* + * Find and reserve possible boot-time SMP configuration: + */ + find_smp_config(); + + /*insert other nodes into pgdat_list*/ + for (nid = 1; nid < numnodes; nid++){ + NODE_DATA(nid)->node_next = pgdat_list; + pgdat_list = NODE_DATA(nid); + } + +#ifdef CONFIG_BLK_DEV_INITRD + if (LOADER_TYPE && INITRD_START) { + if (INITRD_START + INITRD_SIZE <= (system_max_low_pfn << PAGE_SHIFT)) { + reserve_bootmem(INITRD_START, INITRD_SIZE); + initrd_start = + INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_end = initrd_start+INITRD_SIZE; + } + else { + printk(KERN_ERR "initrd extends beyond end of memory " + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + INITRD_START + INITRD_SIZE, + system_max_low_pfn << PAGE_SHIFT); + initrd_start = 0; + } + } +#endif + + return system_max_low_pfn; +} + +/* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. + * + * This routines also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + + int nid; + + pagetable_init(); + + __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir))); + +#if CONFIG_X86_PAE + /* + * We will bail out later - printk doesnt work right now so + * the user would just see a hanging kernel. + */ + if (cpu_has_pae) + set_in_cr4(X86_CR4_PAE); +#endif + + __flush_tlb_all(); + +#ifdef CONFIG_HIGHMEM + kmap_init(); +#endif + + for (nid = 0; nid < numnodes; nid++) { + unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned int max_dma; + + unsigned long low = max_low_pfn; + unsigned long high = plat_node_bootpfns[nid].max_pfn; + unsigned long start = plat_node_bootpfns[nid].start_pfn; + + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + + if (start > low) { +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = high - start; +#endif + } else { + if (low < max_dma) + zones_size[ZONE_DMA] = low; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = low - max_dma; +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = high - low; +#endif + } + } + free_area_init_node(nid, NODE_DATA(nid), 0, zones_size, start, get_zholes_size(nid)); + } + return; +} + + +int __init mem_init_free_pages(int bad_ppro) +{ + int reservedpages; + int nid; + unsigned long pfn; + + bad_ppro = ppro_with_ram_bug(); + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem_node(NODE_DATA(0)); + + reservedpages = 0; + for (pfn = 0; pfn < max_low_pfn; pfn++) + /* + * Only count reserved RAM pages + */ + if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) + reservedpages++; +#ifdef CONFIG_HIGHMEM + for (nid = 0; nid < numnodes; nid++) { + unsigned long node_pfn, node_high_size, zone_start_pfn; + struct page * zone_mem_map; + + node_high_size = NODE_DATA(nid)->node_zones[ZONE_HIGHMEM].size; + zone_mem_map = NODE_DATA(nid)->node_zones[ZONE_HIGHMEM].zone_mem_map; + zone_start_pfn = NODE_DATA(nid)->node_zones[ZONE_HIGHMEM].zone_start_pfn; + + printk("Initializing highpages for node %d\n", nid); + for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { + one_highpage_init((struct page *) (zone_mem_map + node_pfn), zone_start_pfn + node_pfn, bad_ppro); + } + } + totalram_pages += totalhigh_pages; +#endif + return reservedpages; +} + +void __init mem_init_set_max_mapnr(void) +{ + unsigned long lmax_mapnr; + int nid; + +#ifdef CONFIG_HIGHMEM + highmem_start_page = mem_map + NODE_DATA(0)->node_zones[ZONE_HIGHMEM].zone_start_mapnr; + num_physpages = highend_pfn; + num_mappedpages = max_low_pfn; + + for (nid = 0; nid < numnodes; nid++) { + lmax_mapnr = PLAT_NODE_DATA_STARTNR(nid) + PLAT_NODE_DATA_SIZE(nid); + if (lmax_mapnr > max_mapnr) { + max_mapnr = lmax_mapnr; + } + } + +#else + max_mapnr = num_mappedpages = num_physpages = max_low_pfn; +#endif +} diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/arch/i386/mm/init.c x/arch/i386/mm/init.c --- x-ref/arch/i386/mm/init.c 2003-12-04 21:21:48.000000000 +0100 +++ x/arch/i386/mm/init.c 2003-12-04 21:21:53.000000000 +0100 @@ -40,8 +40,8 @@ mmu_gather_t mmu_gathers[NR_CPUS]; unsigned long highstart_pfn, highend_pfn; -static unsigned long totalram_pages; -static unsigned long totalhigh_pages; +unsigned long totalram_pages; +unsigned long totalhigh_pages; int do_check_pgt_cache(int low, int high) { @@ -214,7 +214,7 @@ static void __init fixrange_init (unsign } } -static void __init pagetable_init (void) +void __init pagetable_init (void) { unsigned long vaddr, end; pgd_t *pgd, *pgd_base; @@ -354,6 +354,7 @@ static void __init zone_sizes_init(void) free_area_init(zones_size); } +#ifndef CONFIG_DISCONTIGMEM /* * paging_init() sets up the page tables - note that the first 8MB are * already mapped by head.S. @@ -383,6 +384,7 @@ void __init paging_init(void) #endif zone_sizes_init(); } +#endif /* !CONFIG_DISCONTIGMEM */ /* * Test if the WP bit works in supervisor mode. It isn't supported on 386's @@ -430,28 +432,6 @@ void __init test_wp_bit(void) } } -static inline int page_is_ram (unsigned long pagenr) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long addr, end; - - if (e820.map[i].type != E820_RAM) /* not usable memory */ - continue; - /* - * !!!FIXME!!! Some BIOSen report areas as RAM that - * are not. Notably the 640->1Mb area. We need a sanity - * check here. - */ - addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; - if ((pagenr >= addr) && (pagenr < end)) - return 1; - } - return 0; -} - static inline int page_kills_ppro(unsigned long pagenr) { if(pagenr >= 0x70000 && pagenr <= 0x7003F) @@ -484,6 +464,7 @@ static void __init set_max_mapnr_init(vo #endif } +#ifndef CONFIG_DISCONTIGMEM static int __init free_pages_init(void) { extern int ppro_with_ram_bug(void); @@ -509,6 +490,7 @@ static int __init free_pages_init(void) #endif return reservedpages; } +#endif /* !CONFIG_DISCONTIGMEM */ void __init mem_init(void) { diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/arch/i386/mm/Makefile x/arch/i386/mm/Makefile --- x-ref/arch/i386/mm/Makefile 2002-11-29 02:22:55.000000000 +0100 +++ x/arch/i386/mm/Makefile 2003-12-04 21:21:53.000000000 +0100 @@ -11,5 +11,6 @@ O_TARGET := mm.o obj-y := init.o fault.o ioremap.o extable.o pageattr.o export-objs := pageattr.o +obj-$(CONFIG_DISCONTIGMEM) += discontig.o include $(TOPDIR)/Rules.make diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/e820.h x/include/asm-i386/e820.h --- x-ref/include/asm-i386/e820.h 2000-08-18 18:30:51.000000000 +0200 +++ x/include/asm-i386/e820.h 2003-12-04 21:21:53.000000000 +0100 @@ -35,6 +35,29 @@ struct e820map { }; extern struct e820map e820; + +static inline int page_is_ram (unsigned long pagenr) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + unsigned long addr, end; + + if (e820.map[i].type != E820_RAM) /* not usable memory */ + continue; + /* + * !!!FIXME!!! Some BIOSen report areas as RAM that + * are not. Notably the 640->1Mb area. We need a sanity + * check here. + */ + addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; + end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; + if ((pagenr >= addr) && (pagenr < end)) + return 1; + } + return 0; +} + #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/io.h x/include/asm-i386/io.h --- x-ref/include/asm-i386/io.h 2003-07-18 06:24:48.000000000 +0200 +++ x/include/asm-i386/io.h 2003-12-04 21:21:53.000000000 +0100 @@ -100,10 +100,22 @@ static inline void * phys_to_virt(unsign * Change "struct page" to physical address. */ #ifdef CONFIG_HIGHMEM64G + +#ifndef CONFIG_DISCONTIGMEM #define page_to_phys(page) ((u64)(page - mem_map) << PAGE_SHIFT) #else +#define page_to_phys(page) ((u64)(page - page_zone(page)->zone_mem_map + page_zone(page)->zone_start_pfn) << PAGE_SHIFT) +#endif /* !CONFIG_DISCONTIGMEM */ + +#else + +#ifndef CONFIG_DISCONTIGMEM #define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT) -#endif +#else +#define page_to_phys(page) ((page - page_zone(page)->zone_mem_map + page_zone(page)->zone_start_pfn) << PAGE_SHIFT) +#endif /* !CONFIG_DISCONTIGMEM */ + +#endif /* CONFIG_HIGHMEM64G */ extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/max_numnodes.h x/include/asm-i386/max_numnodes.h --- x-ref/include/asm-i386/max_numnodes.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-i386/max_numnodes.h 2003-12-04 21:21:53.000000000 +0100 @@ -0,0 +1,12 @@ +#ifndef _ASM_MAX_NUMNODES_H +#define _ASM_MAX_NUMNODES_H + +#include + +#ifdef CONFIG_X86_NUMAQ +#include +#else +#define MAX_NUMNODES 1 +#endif /* CONFIG_X86_NUMAQ */ + +#endif /* _ASM_MAX_NUMNODES_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/mmzone.h x/include/asm-i386/mmzone.h --- x-ref/include/asm-i386/mmzone.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-i386/mmzone.h 2003-12-04 21:21:53.000000000 +0100 @@ -0,0 +1,106 @@ +/* + * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002 + * + */ + +#ifndef _ASM_MMZONE_H_ +#define _ASM_MMZONE_H_ + +#ifdef CONFIG_DISCONTIGMEM + +#ifdef CONFIG_X86_NUMAQ +#include +#elif CONFIG_X86_SUMMIT +#include +#else +#define PHYSADDR_TO_NID(pa) (0) +#define PFN_TO_NID(pfn) (0) +#ifdef CONFIG_NUMA +#define _cpu_to_node(cpu) 0 +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_X86_NUMAQ */ + +#ifdef CONFIG_NUMA +#define numa_node_id() _cpu_to_node(smp_processor_id()) +#endif /* CONFIG_NUMA */ + +typedef struct plat_pglist_data { + pg_data_t gendata; +} plat_pg_data_t; + +extern plat_pg_data_t *plat_node_data[]; + +/* + * Following are macros that are specific to this numa platform. + */ +#define reserve_bootmem(addr, size) \ + reserve_bootmem_node(NODE_DATA(0), (addr), (size)) +#define alloc_bootmem(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) +#define alloc_bootmem_pages(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) +#define alloc_bootmem_node(ignore, x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_node(ignore, x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages_node(ignore, x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) + +#define PLAT_NODE_DATA(n) (plat_node_data[(n)]) +#define PLAT_NODE_DATA_STARTNR(n) \ + (PLAT_NODE_DATA(n)->gendata.node_start_mapnr) +#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_size) +/* #define PLAT_NODE_DATA_LOCALNR(p, n) \ */ +/* (((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn) */ +#define PLAT_NODE_DATA_LOCALNR(pfn, n) \ + ((pfn) - PLAT_NODE_DATA(n)->gendata.node_start_pfn) + +/* + * Following are macros that each numa implmentation must define. + */ + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define KVADDR_TO_NID(kaddr) PHYSADDR_TO_NID(__pa(kaddr)) + +/* + * Return a pointer to the node data for node n. + */ +#define NODE_DATA(n) (&((PLAT_NODE_DATA(n))->gendata)) + +/* + * NODE_MEM_MAP gives the kaddr for the mem_map of the node. + */ +#define NODE_MEM_MAP(nid) (NODE_DATA(nid)->node_mem_map) + +/* + * Given a kaddr, ADDR_TO_MAPBASE finds the owning node of the memory + * and returns the the mem_map of that node. + */ +#define ADDR_TO_MAPBASE(kaddr) \ + NODE_MEM_MAP(KVADDR_TO_NID((unsigned long)(kaddr))) + +/* + * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory + * and returns the kaddr corresponding to first physical page in the + * node's mem_map. + */ +#define LOCAL_BASE_ADDR(kaddr) ((unsigned long)__va(NODE_DATA(KVADDR_TO_NID(kaddr))->node_start_pfn << PAGE_SHIFT)) + +#define LOCAL_MAP_NR(kvaddr) \ + (((unsigned long)(kvaddr)-LOCAL_BASE_ADDR(kvaddr)) >> PAGE_SHIFT) + +#define kern_addr_valid(kaddr) test_bit(LOCAL_MAP_NR(kaddr), \ + NODE_DATA(KVADDR_TO_NID(kaddr))->valid_addr_bitmap) + +#define virt_to_page(kaddr) (ADDR_TO_MAPBASE(kaddr) + LOCAL_MAP_NR(kaddr)) +/* This does not check the holes between lmem_maps */ +#define VALID_PAGE(page) (((page) - mem_map) < max_mapnr) + +#endif /* CONFIG_DISCONTIGMEM */ +#endif /* _ASM_MMZONE_H_ */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/numaq.h x/include/asm-i386/numaq.h --- x-ref/include/asm-i386/numaq.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-i386/numaq.h 2003-12-04 21:21:53.000000000 +0100 @@ -0,0 +1,179 @@ +/* + * Written by: Patricia Gaughen, IBM Corporation + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to + */ + +#ifndef NUMAQ_H +#define NUMAQ_H + +#ifdef CONFIG_X86_NUMAQ + +#include + +/* + * for now assume that 8Gb is max amount of RAM for whole system + * 8Gb * 1024Mb/Gb = 8192 Mb + * 8192 Mb / 256Mb = 32 + */ +#define MAX_ELEMENTS 32 +#define ELEMENT_REPRESENTS 8 /* 256 Mb */ + +#define PHYSADDR_TO_NID(pa) numaqpa_to_nid(pa) +#define PFN_TO_NID(pa) numaqpfn_to_nid(pa) +#define MAX_NUMNODES 8 +#ifdef CONFIG_NUMA +#define _cpu_to_node(cpu) (cpu_to_logical_apicid(cpu) >> 4) +#endif /* CONFIG_NUMA */ +extern int numaqpa_to_nid(unsigned long long); +extern int numaqpfn_to_nid(unsigned long); +extern void get_memcfg_numaq(void); +#define get_memcfg_numa() get_memcfg_numaq() + +/* + * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the + */ +#define SYS_CFG_DATA_PRIV_ADDR 0x0009d000 /* place for scd in private quad space */ + +/* + * Communication area for each processor on lynxer-processor tests. + * + * NOTE: If you change the size of this eachproc structure you need + * to change the definition for EACH_QUAD_SIZE. + */ +struct eachquadmem { + unsigned int priv_mem_start; /* Starting address of this */ + /* quad's private memory. */ + /* This is always 0. */ + /* In MB. */ + unsigned int priv_mem_size; /* Size of this quad's */ + /* private memory. */ + /* In MB. */ + unsigned int low_shrd_mem_strp_start;/* Starting address of this */ + /* quad's low shared block */ + /* (untranslated). */ + /* In MB. */ + unsigned int low_shrd_mem_start; /* Starting address of this */ + /* quad's low shared memory */ + /* (untranslated). */ + /* In MB. */ + unsigned int low_shrd_mem_size; /* Size of this quad's low */ + /* shared memory. */ + /* In MB. */ + unsigned int lmmio_copb_start; /* Starting address of this */ + /* quad's local memory */ + /* mapped I/O in the */ + /* compatibility OPB. */ + /* In MB. */ + unsigned int lmmio_copb_size; /* Size of this quad's local */ + /* memory mapped I/O in the */ + /* compatibility OPB. */ + /* In MB. */ + unsigned int lmmio_nopb_start; /* Starting address of this */ + /* quad's local memory */ + /* mapped I/O in the */ + /* non-compatibility OPB. */ + /* In MB. */ + unsigned int lmmio_nopb_size; /* Size of this quad's local */ + /* memory mapped I/O in the */ + /* non-compatibility OPB. */ + /* In MB. */ + unsigned int io_apic_0_start; /* Starting address of I/O */ + /* APIC 0. */ + unsigned int io_apic_0_sz; /* Size I/O APIC 0. */ + unsigned int io_apic_1_start; /* Starting address of I/O */ + /* APIC 1. */ + unsigned int io_apic_1_sz; /* Size I/O APIC 1. */ + unsigned int hi_shrd_mem_start; /* Starting address of this */ + /* quad's high shared memory.*/ + /* In MB. */ + unsigned int hi_shrd_mem_size; /* Size of this quad's high */ + /* shared memory. */ + /* In MB. */ + unsigned int mps_table_addr; /* Address of this quad's */ + /* MPS tables from BIOS, */ + /* in system space.*/ + unsigned int lcl_MDC_pio_addr; /* Port-I/O address for */ + /* local access of MDC. */ + unsigned int rmt_MDC_mmpio_addr; /* MM-Port-I/O address for */ + /* remote access of MDC. */ + unsigned int mm_port_io_start; /* Starting address of this */ + /* quad's memory mapped Port */ + /* I/O space. */ + unsigned int mm_port_io_size; /* Size of this quad's memory*/ + /* mapped Port I/O space. */ + unsigned int mm_rmt_io_apic_start; /* Starting address of this */ + /* quad's memory mapped */ + /* remote I/O APIC space. */ + unsigned int mm_rmt_io_apic_size; /* Size of this quad's memory*/ + /* mapped remote I/O APIC */ + /* space. */ + unsigned int mm_isa_start; /* Starting address of this */ + /* quad's memory mapped ISA */ + /* space (contains MDC */ + /* memory space). */ + unsigned int mm_isa_size; /* Size of this quad's memory*/ + /* mapped ISA space (contains*/ + /* MDC memory space). */ + unsigned int rmt_qmi_addr; /* Remote addr to access QMI.*/ + unsigned int lcl_qmi_addr; /* Local addr to access QMI. */ +}; + +/* + * Note: This structure must be NOT be changed unless the multiproc and + * OS are changed to reflect the new structure. + */ +struct sys_cfg_data { + unsigned int quad_id; + unsigned int bsp_proc_id; /* Boot Strap Processor in this quad. */ + unsigned int scd_version; /* Version number of this table. */ + unsigned int first_quad_id; + unsigned int quads_present31_0; /* 1 bit for each quad */ + unsigned int quads_present63_32; /* 1 bit for each quad */ + unsigned int config_flags; + unsigned int boot_flags; + unsigned int csr_start_addr; /* Absolute value (not in MB) */ + unsigned int csr_size; /* Absolute value (not in MB) */ + unsigned int lcl_apic_start_addr; /* Absolute value (not in MB) */ + unsigned int lcl_apic_size; /* Absolute value (not in MB) */ + unsigned int low_shrd_mem_base; /* 0 or 512MB or 1GB */ + unsigned int low_shrd_mem_quad_offset; /* 0,128M,256M,512M,1G */ + /* may not be totally populated */ + unsigned int split_mem_enbl; /* 0 for no low shared memory */ + unsigned int mmio_sz; /* Size of total system memory mapped I/O */ + /* (in MB). */ + unsigned int quad_spin_lock; /* Spare location used for quad */ + /* bringup. */ + unsigned int nonzero55; /* For checksumming. */ + unsigned int nonzeroaa; /* For checksumming. */ + unsigned int scd_magic_number; + unsigned int system_type; + unsigned int checksum; + /* + * memory configuration area for each quad + */ + struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */ +}; + +#endif /* CONFIG_X86_NUMAQ */ +#endif /* NUMAQ_H */ + diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/page.h x/include/asm-i386/page.h --- x-ref/include/asm-i386/page.h 2003-12-04 21:21:48.000000000 +0100 +++ x/include/asm-i386/page.h 2003-12-04 21:21:53.000000000 +0100 @@ -134,8 +134,10 @@ static __inline__ int get_order(unsigned #define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#ifndef CONFIG_DISCONTIGMEM #define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) #define VALID_PAGE(page) ((page - mem_map) < max_mapnr) +#endif /* !CONFIG_DISCONTIGMEM */ #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/pgtable-2level.h x/include/asm-i386/pgtable-2level.h --- x-ref/include/asm-i386/pgtable-2level.h 2003-12-04 21:21:48.000000000 +0100 +++ x/include/asm-i386/pgtable-2level.h 2003-12-04 21:21:53.000000000 +0100 @@ -58,9 +58,16 @@ static inline pmd_t * pmd_offset(pgd_t * } #define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) #define pte_same(a, b) ((a).pte_low == (b).pte_low) + +#ifndef CONFIG_DISCONTIGMEM #define pte_page(x) (mem_map+((unsigned long)(((x).pte_low >> PAGE_SHIFT)))) -#define pte_none(x) (!(x).pte_low) #define __pmd_page(x) (mem_map + ((x).pmd_low >> PAGE_SHIFT)) +#else +#define pte_page(x) (NODE_MEM_MAP(PHYSADDR_TO_NID((x).pte_low)) + PLAT_NODE_DATA_LOCALNR(((unsigned long)((x).pte_low >> PAGE_SHIFT)), PHYSADDR_TO_NID((x).pte_low))) +#define __pmd_page(x) (NODE_MEM_MAP(PHYSADDR_TO_NID((x).pmd_low)) + PLAT_NODE_DATA_LOCALNR((((x).pmd_low >> PAGE_SHIFT)), PHYSADDR_TO_NID((x).pmd_low))) +#endif /* !CONFIG_DISCONTIGMEM */ + +#define pte_none(x) (!(x).pte_low) #define pmd_none(x) (!(x).pmd_low) #define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) #define __mk_pmd(page_nr,pgprot) __pmd(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/pgtable-3level.h x/include/asm-i386/pgtable-3level.h --- x-ref/include/asm-i386/pgtable-3level.h 2003-12-04 21:21:48.000000000 +0100 +++ x/include/asm-i386/pgtable-3level.h 2003-12-04 21:21:53.000000000 +0100 @@ -94,9 +94,17 @@ static inline int pte_same(pte_t a, pte_ return a.pte_low == b.pte_low && a.pte_high == b.pte_high; } +#ifndef CONFIG_DISCONTIGMEM #define pte_page(x) (mem_map+(((x).pte_low >> PAGE_SHIFT) | ((x).pte_high << (32 - PAGE_SHIFT)))) +#define __pmd_page(x) (mem_map+(((x).pmd_low >> PAGE_SHIFT) | ((x).pmd_high << (32 - PAGE_SHIFT)))) +#else +/* pte_page = lmem_map + nodelocal_pfn */ +#define pte_pfn(x) (((x).pte_low >> PAGE_SHIFT) | ((x).pte_high << (32 - PAGE_SHIFT))) +#define pmd_pfn(x) (((x).pmd_low >> PAGE_SHIFT) | ((x).pmd_high << (32 - PAGE_SHIFT))) +#define pte_page(x) (NODE_MEM_MAP(PFN_TO_NID(pte_pfn(x))) + PLAT_NODE_DATA_LOCALNR(pte_pfn(x), PFN_TO_NID(pte_pfn(x)))) +#define __pmd_page(x) (NODE_MEM_MAP(PFN_TO_NID(pmd_pfn(x))) + PLAT_NODE_DATA_LOCALNR(pmd_pfn(x), PFN_TO_NID(pmd_pfn(x)))) +#endif /* !CONFIG_DISCONTIGMEM */ #define pte_none(x) (!(x).pte_low && !(x).pte_high) -#define __pmd_page(x) (mem_map + (((x).pmd_low >> PAGE_SHIFT) | ((x).pmd_high << (32-PAGE_SHIFT)))) #define pmd_none(x) (!(x).pmd_low && !(x).pmd_high) static inline pte_t __mk_pte(unsigned long page_nr, pgprot_t pgprot) diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/pgtable.h x/include/asm-i386/pgtable.h --- x-ref/include/asm-i386/pgtable.h 2003-12-04 21:21:48.000000000 +0100 +++ x/include/asm-i386/pgtable.h 2003-12-04 21:21:53.000000000 +0100 @@ -306,9 +306,13 @@ static inline void ptep_mkdirty(pte_t *p * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ - +#ifndef CONFIG_DISCONTIGMEM #define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) #define mk_pmd(page, pgprot) __mk_pmd((page) - mem_map, (pgprot)) +#else +#define mk_pte(page, pgprot) __mk_pte(((page) - page_zone(page)->zone_mem_map + page_zone(page)->zone_start_pfn), (pgprot)) +#define mk_pmd(page, pgprot) __mk_pmd(((page) - page_zone(page)->zone_mem_map + page_zone(page)->zone_start_pfn), (pgprot)) +#endif /* !CONFIG_DISCONTIGMEM */ /* This takes a physical page address that is used by the remapping functions */ #define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) @@ -433,7 +437,10 @@ int change_page_attr(struct page *, int, /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ #define PageSkip(page) (0) + +#ifndef CONFIG_DISCONTIGMEM #define kern_addr_valid(addr) (1) +#endif /* !CONFIG_DISCONTIGMEM */ #define io_remap_page_range remap_page_range diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/setup.h x/include/asm-i386/setup.h --- x-ref/include/asm-i386/setup.h 1999-11-12 19:12:11.000000000 +0100 +++ x/include/asm-i386/setup.h 2003-12-04 21:21:53.000000000 +0100 @@ -1,10 +1,27 @@ -/* - * Just a place holder. We don't want to have to test x86 before - * we include stuff - */ - #ifndef _i386_SETUP_H #define _i386_SETUP_H +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) + +/* + * Reserved space for vmalloc and iomap - defined in asm/page.h + */ +#ifdef CONFIG_HIGHMEM_EMULATION +#define ORDER_DOWN(x) ((x >> (MAX_ORDER-1)) << (MAX_ORDER-1)) +#define MAXMEM_PFN \ +({ \ + int __max_pfn; \ + if (max_pfn > PFN_DOWN(MAXMEM)) \ + __max_pfn = PFN_DOWN(MAXMEM); \ + else \ + __max_pfn = ORDER_DOWN(max_pfn / 5); \ + __max_pfn; \ +}) +#else +#define MAXMEM_PFN PFN_DOWN(MAXMEM) +#endif +#define MAX_NONPAE_PFN (1 << 20) #endif /* _i386_SETUP_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/asm-i386/srat.h x/include/asm-i386/srat.h --- x-ref/include/asm-i386/srat.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-i386/srat.h 2003-12-04 21:21:53.000000000 +0100 @@ -0,0 +1,69 @@ +/* + * Code taken from 64 bit discontigmem support. + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to + */ + +#ifndef SRAT_DISCTG_H +#define SRAT_DISCTG_H + + +extern int numapa_to_nid(u64); +int numapfn_to_nid(unsigned long pfn); +#define PHYSADDR_TO_NID(pa) numapa_to_nid(pa) +#define PFN_TO_NID(pa) numapfn_to_nid(pa) + +extern void get_memcfg_from_srat(void); +#define get_memcfg_numa() get_memcfg_from_srat() +extern unsigned long get_zholes_size(int); + +#define MAX_NUMNODES 8 +#define MAX_CLUMPS_PER_NODE 4 +#define MAXCLUMPS (MAX_CLUMPS_PER_NODE * MAX_NUMNODES) + +/* + * cpu -> pxm_domain structure + */ +struct node_cpuid_s{ + u8 phys_id; /* phys apic ID (no EID for IA32) */ + u8 pxm; // proximity domain of cpu + u8 nid; +}; + +extern struct node_cpuid_s node_cpuid[]; + +#define _cpu_to_node(cpu) (node_cpuid[cpu].nid) + +/* + * memory -> pxm_domain structure + */ +struct node_memory_chunk_s { + u64 start_paddr; + u64 end_paddr; + u64 size; + u8 pxm; // proximity domain of node + u8 nid; // which cnode contains this chunk? + u8 bank; // which mem bank on this node +}; +extern struct node_memory_chunk_s node_memory_chunk[]; + +#endif /* SRAT_DISCTG_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/linux/bootmem.h x/include/linux/bootmem.h --- x-ref/include/linux/bootmem.h 2003-07-18 06:24:55.000000000 +0200 +++ x/include/linux/bootmem.h 2003-12-04 21:21:53.000000000 +0100 @@ -32,9 +32,10 @@ typedef struct bootmem_data { extern unsigned long __init bootmem_bootmap_pages (unsigned long); extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); -extern void __init reserve_bootmem (unsigned long addr, unsigned long size); extern void __init free_bootmem (unsigned long addr, unsigned long size); extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); +#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE +extern void __init reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ @@ -43,6 +44,7 @@ extern void * __init __alloc_bootmem (un __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem((x), PAGE_SIZE, 0) +#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern unsigned long __init free_all_bootmem (void); extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); @@ -50,11 +52,13 @@ extern void __init reserve_bootmem_node extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) +#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #endif /* _LINUX_BOOTMEM_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/mm/bootmem.c x/mm/bootmem.c --- x-ref/mm/bootmem.c 2003-12-04 21:21:45.000000000 +0100 +++ x/mm/bootmem.c 2003-12-04 21:21:53.000000000 +0100 @@ -311,10 +311,12 @@ unsigned long __init init_bootmem (unsig return(init_bootmem_core(&contig_page_data, start, 0, pages)); } +#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE void __init reserve_bootmem (unsigned long addr, unsigned long size) { reserve_bootmem_core(contig_page_data.bdata, addr, size); } +#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ void __init free_bootmem (unsigned long addr, unsigned long size) {