From: Andi Kleen x86-64 merge. Only touches architecture specific code. Mainly an accumulation of various bugfixes and smaller improvements. Also makes everything compile again. There is one security fix in there. Please consider applying to your tree. Thanks. - Document boot options - Update defconfig - Make 32bit vsyscall SYSCALL support a variable (default to off) - Compile fixes, merge (local.h, elf binfmt etc.) - Emulate FIOQSIZE - Fix ipc32 return value - Improve MCE handler (cosmetic fixes, fix pci interface) - Add ifdefs to head.S - Shrink some arrays - Merge with 2.6.0test2/i386 - Readd ioport fix that got lost (William Chan) - NMI syncup (Mikael Pettersson) - Improve per cpu data slightly by putting the offset counter into the PDA. It's down to 4 instructions for a reference now. Also only copy data for the existing CPUs (no CPU hotplug here) - Sort exception table at bootup. This avoids problems with exception handlers from __init segments causing unordered exception tables. - Fix for_each_cpu on UP - Add beginnings of pcibus_to_cpumask (low level code still missing) - Fill in tgkill and utimes syscalls for 64bit Documentation/x86_64/boot-options.txt | 152 ++++++++++++++++++++++++++++++++++ arch/x86_64/defconfig | 15 ++- arch/x86_64/ia32/ia32_binfmt.c | 8 + arch/x86_64/ia32/ia32_ioctl.c | 1 arch/x86_64/ia32/ipc32.c | 4 arch/x86_64/kernel/apic.c | 4 arch/x86_64/kernel/bluesmoke.c | 17 ++- arch/x86_64/kernel/head.S | 7 - arch/x86_64/kernel/io_apic.c | 20 ++++ arch/x86_64/kernel/ioport.c | 32 ++++--- arch/x86_64/kernel/mpparse.c | 4 arch/x86_64/kernel/nmi.c | 8 - arch/x86_64/kernel/setup64.c | 41 +++++---- arch/x86_64/mm/extable.c | 27 ++++++ include/asm-x86_64/apic.h | 4 include/asm-x86_64/desc.h | 1 include/asm-x86_64/hw_irq.h | 2 include/asm-x86_64/io.h | 6 + include/asm-x86_64/local.h | 73 ++++++++++++++++ include/asm-x86_64/mpspec.h | 8 - include/asm-x86_64/nmi.h | 2 include/asm-x86_64/pda.h | 2 include/asm-x86_64/percpu.h | 55 +++++++++++- include/asm-x86_64/processor.h | 1 include/asm-x86_64/smp.h | 1 include/asm-x86_64/topology.h | 7 + include/asm-x86_64/unistd.h | 6 + 27 files changed, 430 insertions(+), 78 deletions(-) diff -puN arch/x86_64/defconfig~x86_64-merge arch/x86_64/defconfig --- 25/arch/x86_64/defconfig~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/defconfig 2003-07-29 21:07:47.000000000 -0700 @@ -24,8 +24,11 @@ CONFIG_SYSVIPC=y CONFIG_SYSCTL=y CONFIG_LOG_BUF_SHIFT=16 # CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y CONFIG_FUTEX=y CONFIG_EPOLL=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y # # Loadable module support @@ -135,6 +138,7 @@ CONFIG_BLK_DEV_FD=y # CONFIG_BLK_DEV_DAC960 is not set # CONFIG_BLK_DEV_UMEM is not set CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set # CONFIG_BLK_DEV_NBD is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=4096 @@ -163,6 +167,7 @@ CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_BLK_DEV_IDESCSI is not set # CONFIG_IDE_TASK_IOCTL is not set +# CONFIG_IDE_TASKFILE_IO is not set # # IDE chipset support/bugfixes @@ -322,7 +327,7 @@ CONFIG_IPV6_PRIVACY=y # CONFIG_XFRM_USER is not set # -# SCTP Configuration (EXPERIMENTAL) +# SCTP Configuration (EXPERIMENTAL) # CONFIG_IPV6_SCTP__=y # CONFIG_IP_SCTP is not set @@ -379,7 +384,6 @@ CONFIG_NET_PCI=y CONFIG_AMD8111_ETH=y # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set -# CONFIG_TC35815 is not set # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set # CONFIG_E100 is not set @@ -727,6 +731,9 @@ CONFIG_SOUND_ICH=y # CONFIG_SOUND_MSNDPIN is not set # CONFIG_SOUND_VIA82CXXX is not set # CONFIG_SOUND_OSS is not set +# CONFIG_SOUND_ALI5455 is not set +# CONFIG_SOUND_FORTE is not set +# CONFIG_SOUND_AD1980 is not set # # USB support @@ -753,10 +760,8 @@ CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_INIT_DEBUG is not set -CONFIG_KALLSYMS=y # CONFIG_FRAME_POINTER is not set -CONFIG_IOMMU_DEBUG=y -CONFIG_IOMMU_LEAK=y +# CONFIG_IOMMU_DEBUG is not set CONFIG_MCE_DEBUG=y # diff -puN arch/x86_64/ia32/ia32_binfmt.c~x86_64-merge arch/x86_64/ia32/ia32_binfmt.c --- 25/arch/x86_64/ia32/ia32_binfmt.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/ia32/ia32_binfmt.c 2003-07-29 21:07:47.000000000 -0700 @@ -32,12 +32,14 @@ #define AT_SYSINFO 32 #define AT_SYSINFO_EHDR 33 -#if 0 /* disabled for now because the code has still problems */ +int sysctl_vsyscall32; + #define ARCH_DLINFO do { \ + if (sysctl_vsyscall32) { \ NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \ + } \ } while(0) -#endif struct file; struct elf_phdr; @@ -202,7 +204,7 @@ static inline int elf_core_copy_task_reg } static inline int -elf_core_copy_task_fpregs(struct task_struct *tsk, elf_fpregset_t *fpu) +elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *xregs, elf_fpregset_t *fpu) { struct _fpstate_ia32 *fpstate = (void*)fpu; struct pt_regs *regs = (struct pt_regs *)(tsk->thread.rsp0); diff -puN arch/x86_64/ia32/ia32_ioctl.c~x86_64-merge arch/x86_64/ia32/ia32_ioctl.c --- 25/arch/x86_64/ia32/ia32_ioctl.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/ia32/ia32_ioctl.c 2003-07-29 21:07:47.000000000 -0700 @@ -726,6 +726,7 @@ COMPATIBLE_IOCTL(BNEPCONNADD) COMPATIBLE_IOCTL(BNEPCONNDEL) COMPATIBLE_IOCTL(BNEPGETCONNLIST) COMPATIBLE_IOCTL(BNEPGETCONNINFO) +COMPATIBLE_IOCTL(FIOQSIZE) /* And these ioctls need translation */ HANDLE_IOCTL(TIOCGDEV, tiocgdev) diff -puN arch/x86_64/ia32/ipc32.c~x86_64-merge arch/x86_64/ia32/ipc32.c --- 25/arch/x86_64/ia32/ipc32.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/ia32/ipc32.c 2003-07-29 21:07:47.000000000 -0700 @@ -687,9 +687,7 @@ sys32_ipc (u32 call, int first, int seco return sys_shmget(first, second, third); case SHMCTL: return shmctl32(first, second, (void *)AA(ptr)); - default: - return -EINVAL; } - return -EINVAL; + return -ENOSYS; } diff -puN arch/x86_64/kernel/apic.c~x86_64-merge arch/x86_64/kernel/apic.c --- 25/arch/x86_64/kernel/apic.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/apic.c 2003-07-29 21:07:47.000000000 -0700 @@ -441,9 +441,6 @@ void __init setup_local_APIC (void) #ifdef CONFIG_PM -#include -#include - static struct { /* 'active' is true if the local APIC was enabled by us and not the BIOS; this signifies that we are also responsible @@ -540,7 +537,6 @@ static struct sysdev_class lapic_sysclas .suspend = lapic_suspend, }; -/* not static, needed by child devices */ static struct sys_device device_lapic = { .id = 0, .cls = &lapic_sysclass, diff -puN arch/x86_64/kernel/bluesmoke.c~x86_64-merge arch/x86_64/kernel/bluesmoke.c --- 25/arch/x86_64/kernel/bluesmoke.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/bluesmoke.c 2003-07-29 21:07:47.000000000 -0700 @@ -127,9 +127,8 @@ static struct pci_dev *find_k8_nb(void) { struct pci_dev *dev = NULL; int cpu = smp_processor_id(); - while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (dev->bus->number==0 && PCI_FUNC(dev->devfn)==3 && - PCI_SLOT(dev->devfn) == (24U+cpu)) + while ((dev = pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, dev)) != NULL) { + if (dev->bus->number==0 && PCI_SLOT(dev->devfn) == (24U+cpu)) return dev; } return NULL; @@ -198,7 +197,7 @@ static char *highbits[32] = { [0] = "err cpu1", }; -static void check_k8_nb(void) +static void check_k8_nb(int header) { struct pci_dev *nb; nb = find_k8_nb(); @@ -210,6 +209,9 @@ static void check_k8_nb(void) pci_read_config_dword(nb, 0x4c, &statushigh); if (!(statushigh & (1<<31))) return; + if (header) + printk(KERN_ERR "CPU %d: Silent Northbridge MCE\n", smp_processor_id()); + printk(KERN_ERR "Northbridge status %08x%08x\n", statushigh,statuslow); @@ -271,9 +273,12 @@ static void k8_machine_check(struct pt_r rdmsrl(MSR_IA32_MCG_STATUS, status); if ((status & (1<<2)) == 0) { if (!regs) - check_k8_nb(); + check_k8_nb(1); return; } + + printk(KERN_EMERG "CPU %d: Machine Check Exception: %016Lx\n", smp_processor_id(), status); + if (status & 1) printk(KERN_EMERG "MCG_STATUS: unrecoverable\n"); @@ -291,7 +296,7 @@ static void k8_machine_check(struct pt_r if (nbstatus & (1UL<57)) printk(KERN_EMERG "Unrecoverable condition\n"); - check_k8_nb(); + check_k8_nb(0); if (nbstatus & (1UL<<58)) { u64 adr; diff -puN arch/x86_64/kernel/head.S~x86_64-merge arch/x86_64/kernel/head.S --- 25/arch/x86_64/kernel/head.S~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/head.S 2003-07-29 21:07:47.000000000 -0700 @@ -307,7 +307,8 @@ ENTRY(empty_bad_pmd_table) ENTRY(level3_physmem_pgt) .quad 0x0000000000105007 /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */ -.org 0xb000 + .org 0xb000 +#ifdef CONFIG_ACPI_SLEEP ENTRY(wakeup_level4_pgt) .quad 0x0000000000102007 /* -> level3_ident_pgt */ .fill 255,8,0 @@ -315,9 +316,9 @@ ENTRY(wakeup_level4_pgt) .fill 254,8,0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad 0x0000000000103007 /* -> level3_kernel_pgt */ +#endif -.org 0xc000 -.data + .data .align 16 .globl cpu_gdt_descr diff -puN arch/x86_64/kernel/io_apic.c~x86_64-merge arch/x86_64/kernel/io_apic.c --- 25/arch/x86_64/kernel/io_apic.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/io_apic.c 2003-07-29 21:07:47.000000000 -0700 @@ -63,7 +63,7 @@ int nr_ioapic_registers[MAX_IO_APICS]; */ static struct irq_pin_list { - int apic, pin, next; + short apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; /* @@ -1781,3 +1781,21 @@ int io_apic_set_pci_routing (int ioapic, } #endif /*CONFIG_ACPI_BOOT*/ + +#ifndef CONFIG_SMP +void send_IPI_self(int vector) +{ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); +} +#endif diff -puN arch/x86_64/kernel/ioport.c~x86_64-merge arch/x86_64/kernel/ioport.c --- 25/arch/x86_64/kernel/ioport.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/ioport.c 2003-07-29 21:07:47.000000000 -0700 @@ -51,42 +51,50 @@ static void set_bitmap(unsigned long *bi } } + /* * this changes the io permissions bitmap in the current task. */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct * t = ¤t->thread; - struct tss_struct * tss; - int ret = 0; + int cpu = get_cpu(); if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32)) return -EINVAL; if (turn_on && !capable(CAP_SYS_RAWIO)) return -EPERM; + struct tss_struct * tss = init_tss + cpu; + + /* + * If it's the first ioperm() call in this thread's lifetime, set the + * IO bitmap up. ioperm() is much less timing critical than clone(), + * this is why we delay this operation until now: + */ if (!t->io_bitmap_ptr) { - t->io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL); + t->io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!t->io_bitmap_ptr) { - ret = -ENOMEM; - goto out; - } - memset(t->io_bitmap_ptr,0xff,(IO_BITMAP_SIZE+1)*4); - tss = init_tss + get_cpu(); - tss->io_map_base = IO_BITMAP_OFFSET; put_cpu(); + return -ENOMEM; + } + + memset(t->io_bitmap_ptr,0xff,IO_BITMAP_BYTES); } - tss = init_tss + get_cpu(); /* * do it in the per-thread copy and in the TSS ... */ set_bitmap((unsigned long *) t->io_bitmap_ptr, from, num, !turn_on); + if (tss->io_map_base != IO_BITMAP_OFFSET) { + memcpy(tss->io_bitmap, t->io_bitmap_ptr, sizeof(tss->io_bitmap)); + tss->io_map_base = IO_BITMAP_OFFSET; + } else { set_bitmap((unsigned long *) tss->io_bitmap, from, num, !turn_on); + } - out: put_cpu(); - return ret; + return 0; } /* diff -puN arch/x86_64/kernel/mpparse.c~x86_64-merge arch/x86_64/kernel/mpparse.c --- 25/arch/x86_64/kernel/mpparse.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/mpparse.c 2003-07-29 21:07:47.000000000 -0700 @@ -41,8 +41,10 @@ int acpi_found_madt; * MP-table. */ int apic_version [MAX_APICS]; -int mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; +unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; +unsigned long mp_bus_to_cpumask [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1UL }; + int mp_current_pci_id = 0; /* I/O APIC entries */ struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; diff -puN arch/x86_64/kernel/nmi.c~x86_64-merge arch/x86_64/kernel/nmi.c --- 25/arch/x86_64/kernel/nmi.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/nmi.c 2003-07-29 21:07:47.000000000 -0700 @@ -141,14 +141,14 @@ void disable_lapic_nmi_watchdog(void) /* tell do_nmi() and others that we're not active any more */ nmi_watchdog = 0; } + void enable_lapic_nmi_watchdog(void) - { +{ if (nmi_active < 0) { nmi_watchdog = NMI_LOCAL_APIC; setup_apic_nmi_watchdog(); } - } - +} void disable_timer_nmi_watchdog(void) { @@ -173,8 +173,6 @@ void enable_timer_nmi_watchdog(void) #ifdef CONFIG_PM -#include - static int nmi_pm_active; /* nmi_active before suspend */ static int lapic_nmi_suspend(struct sys_device *dev, u32 state) diff -puN arch/x86_64/kernel/setup64.c~x86_64-merge arch/x86_64/kernel/setup64.c --- 25/arch/x86_64/kernel/setup64.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/setup64.c 2003-07-29 21:07:47.000000000 -0700 @@ -114,32 +114,37 @@ Valid options: __setup("noexec32=", nonx32_setup); -#ifndef __GENERIC_PER_CPU - -unsigned long __per_cpu_offset[NR_CPUS]; - +/* + * Great future plan: + * Declare PDA itself and support (irqstack,tss,pml4) as per cpu data. + * Always point %gs to its beginning + */ void __init setup_per_cpu_areas(void) { - unsigned long size, i; - unsigned char *ptr; + int i; + unsigned long size; /* Copy section for each CPU (we discard the original) */ size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); - if (!size) - return; +#ifdef CONFIG_MODULES + if (size < PERCPU_ENOUGH_ROOM) + size = PERCPU_ENOUGH_ROOM; +#endif + + /* We don't support CPU hotplug, so only allocate as much as needed here */ - ptr = alloc_bootmem(size * NR_CPUS); + int maxi = max_t(unsigned, numnodes, num_online_cpus()); - for (i = 0; i < NR_CPUS; i++, ptr += size) { - /* hide this from the compiler to avoid problems */ - unsigned long offset; - asm("subq %[b],%0" : "=r" (offset) : "0" (ptr), [b] "r" (&__per_cpu_start)); - __per_cpu_offset[i] = offset; - cpu_pda[i].cpudata_offset = offset; - memcpy(ptr, __per_cpu_start, size); + for (i = 0; i < maxi; i++) { + /* If possible allocate on the node of the CPU. + In case it doesn't exist round-robin nodes. */ + unsigned char *ptr = alloc_bootmem_node(NODE_DATA(i % numnodes), size); + if (!ptr) + panic("Cannot allocate cpu data for CPU %d\n", i); + cpu_pda[i].data_offset = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } } -#endif void pda_init(int cpu) { @@ -153,7 +158,7 @@ void pda_init(int cpu) pda->me = pda; pda->cpunumber = cpu; pda->irqcount = -1; - pda->cpudata_offset = 0; + pda->data_offset = 0; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; pda->active_mm = &init_mm; diff -puN arch/x86_64/mm/extable.c~x86_64-merge arch/x86_64/mm/extable.c --- 25/arch/x86_64/mm/extable.c~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/arch/x86_64/mm/extable.c 2003-07-29 21:07:47.000000000 -0700 @@ -5,6 +5,7 @@ #include #include #include +#include #include /* Simple binary search */ @@ -28,3 +29,29 @@ search_extable(const struct exception_ta } return NULL; } + +/* When an exception handler is in an non standard section (like __init) + the fixup table can end up unordered. Fix that here. */ +static __init int check_extable(void) +{ + extern struct exception_table_entry __start___ex_table[]; + extern struct exception_table_entry __stop___ex_table[]; + struct exception_table_entry *e; + int change; + + /* The input is near completely presorted, which makes bubble sort the + best (and simplest) sort algorithm. */ + do { + change = 0; + for (e = __start___ex_table+1; e < __stop___ex_table; e++) { + if (e->insn < e[-1].insn) { + struct exception_table_entry tmp = e[-1]; + e[-1] = e[0]; + e[0] = tmp; + change = 1; + } + } + } while (change != 0); + return 0; +} +core_initcall(check_extable); diff -puN /dev/null Documentation/x86_64/boot-options.txt --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/Documentation/x86_64/boot-options.txt 2003-07-29 21:07:47.000000000 -0700 @@ -0,0 +1,152 @@ +AMD64 specific boot options + +There are many others (usually documented in driver documentation), but +only the AMD64 specific ones are listed here. + +Machine check + +(see the Opteron BIOS&Kernel manual for more details on the banks etc.) + + mce=off disable machine check + mce=nok8 disable k8 specific features + mce=disable disable bank NUMBER + mce=enable enable bank number + mce=device Enable more machine check options in Northbridge. + Can be useful for device driver debugging. + mce=NUMBER mcheck timer interval number seconds. + Can be also comma separated in a single mce= + + nomce (for compatibility with i386): same as mce=off + +APICs + + nolocalapic Don't use a local or IO-APIC. This should only + be needed if you have a buggy BIOS. The newer + kernels already turn it off by default if the + BIOS didn't enable the local APIC, so it will + be hopefully not needed. + Note this code path is not very well tested, you are on + your own. + + apic Use IO-APIC. Default + + noapic Don't use the IO-APIC. + Also only lightly tested. + + pirq=... See Documentation/i386/IO-APIC.txt + +Early Console + + syntax: earlyprintk=vga + earlyprintk=serial[,ttySn[,baudrate]] + + The early console is useful when the kernel crashes before the + normal console is initialized. It is not enabled by + default because it has some cosmetic problems. + Append ,keep to not disable it when the real console takes over. + Only vga or serial at a time, not both. + Currently only ttyS0 and ttyS1 are supported. + Interaction with the standard serial driver is not very good. + The VGA output is eventually overwritten by the real console. + +Timing + + notsc + Don't use the CPU time stamp counter to read the wall time. + This can be used to work around timing problems on multiprocessor systems + with not properly synchronized CPUs. Only useful with a SMP kernel + + report_lost_ticks + Report when timer interrupts are lost because some code turned off + interrupts for too long. + + nmi_watchdog=NUMBER + NUMBER can be: + 0 don't use an NMI watchdog + 1 use the IO-APIC timer for the NMI watchdog + 2 use the local APIC for the NMI watchdog using a performance counter. Note + This will use one performance counter and the local APIC's performance + vector. + +Idle loop + + idle=poll + Don't do power saving in the idle loop using HLT, but poll for rescheduling + event. This will make the CPUs eat a lot more power, but may be useful + to get slightly better performance in multiprocessor benchmarks. It also + makes some profiling using performance counters more accurate. + +Rebooting + + reboot=b[ios] | t[riple] | k[bd] [, [w]arm | [c]old] + bios Use the CPU reboto vector for warm reset + warm Don't set the cold reboot flag + cold Set the cold reboto flag + triple Force a triple fault (init) + kbd Use the keyboard controller. cold reset (default) + + Using warm reset will be much faster especially on big memory + systems because the BIOS will not go through the memory check. + Disadvantage is that not all hardware will be completely reinitialized + on reboot so there may be boot problems on some systems. + +Non Executable Mappings + + noexec=on|off + + on Enable + off Disable + noforce (default) Don't enable by default for heap/stack/data, + but allow PROT_EXEC to be effective + + noexec32=opt{,opt} + + Control the no exec default for 32bit processes. + Requires noexec=on or noexec=noforce to be effective. + + Valid options: + all,on Heap,stack,data is non executable. + off (default) Heap,stack,data is executable + stack Stack is non executable, heap/data is. + force Don't imply PROT_EXEC for PROT_READ + compat (default) Imply PROT_EXEC for PROT_READ + +SMP + + nosmp Only use a single CPU + + maxcpus=NUMBER only use upto NUMBER CPUs + + cpumask=MASK only use cpus with bits set in mask + +NUMA + + numa=off Only set up a single NUMA node spanning all memory. + + +ACPI + + acpi=off Don't enable ACPI + +PCI + + pci=off Don't use PCI + pci=conf1 Use conf1 access. + pci=conf2 Use conf2 access. + pci=rom Assign ROMs. + pci=assign-busses Assign busses + pci=irqmask=MASK Set PCI interrupt mask to MASK + pci=lastbus=NUMBER Scan upto NUMBER busses, no matter what the mptable says. + +IOMMU + + iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]] + size set size of iommu (in bytes) + noagp don't initialize the AGP driver and use full aperture. + off don't use the IOMMU + leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) + memaper[=order] allocate an own aperture over RAM with size 32MB^order. + noforce don't force IOMMU usage. Default. + force Force IOMMU + + diff -puN include/asm-x86_64/apic.h~x86_64-merge include/asm-x86_64/apic.h --- 25/include/asm-x86_64/apic.h~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/include/asm-x86_64/apic.h 2003-07-29 21:07:47.000000000 -0700 @@ -84,10 +84,6 @@ extern int APIC_init_uniprocessor (void) extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); -#ifdef CONFIG_PM -extern struct sys_device device_lapic; -#endif - extern int check_nmi_watchdog (void); extern unsigned int nmi_watchdog; diff -puN include/asm-x86_64/desc.h~x86_64-merge include/asm-x86_64/desc.h --- 25/include/asm-x86_64/desc.h~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/include/asm-x86_64/desc.h 2003-07-29 21:07:47.000000000 -0700 @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ +#include #include #include diff -puN include/asm-x86_64/hw_irq.h~x86_64-merge include/asm-x86_64/hw_irq.h --- 25/include/asm-x86_64/hw_irq.h~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/include/asm-x86_64/hw_irq.h 2003-07-29 21:07:47.000000000 -0700 @@ -164,7 +164,7 @@ static inline void x86_do_profile (struc atomic_inc((atomic_t *)&prof_buffer[rip]); } -#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */ +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { if (IO_APIC_IRQ(i)) send_IPI_self(IO_APIC_VECTOR(i)); diff -puN include/asm-x86_64/io.h~x86_64-merge include/asm-x86_64/io.h --- 25/include/asm-x86_64/io.h~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/include/asm-x86_64/io.h 2003-07-29 21:07:47.000000000 -0700 @@ -301,6 +301,12 @@ out: #define flush_write_buffers() +/* Disable vmerge for now. Need to fix the block layer code + to check for non iommu addresses first. + When the IOMMU is force it is safe to enable. */ +extern int force_iommu; +#define BIO_VERMGE_BOUNDARY (force_iommu ? 4096 : 0) + #endif /* __KERNEL__ */ #endif diff -puN /dev/null include/asm-x86_64/local.h --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/include/asm-x86_64/local.h 2003-07-29 21:07:47.000000000 -0700 @@ -0,0 +1,73 @@ +#ifndef _ARCH_X8664_LOCAL_H +#define _ARCH_X8664_LOCAL_H + +#include + +typedef struct +{ + volatile unsigned int counter; +} local_t; + +#define LOCAL_INIT(i) { (i) } + +#define local_read(v) ((v)->counter) +#define local_set(v,i) (((v)->counter) = (i)) + +static __inline__ void local_inc(local_t *v) +{ + __asm__ __volatile__( + "incl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +static __inline__ void local_dec(local_t *v) +{ + __asm__ __volatile__( + "decl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +static __inline__ void local_add(unsigned long i, local_t *v) +{ + __asm__ __volatile__( + "addl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +static __inline__ void local_sub(unsigned long i, local_t *v) +{ + __asm__ __volatile__( + "subl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/* On x86, these are no better than the atomic variants. */ +#define __local_inc(l) local_inc(l) +#define __local_dec(l) local_dec(l) +#define __local_add(i,l) local_add((i),(l)) +#define __local_sub(i,l) local_sub((i),(l)) + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + * + * This could be done better if we moved the per cpu data directly + * after GS. + */ +#define cpu_local_read(v) local_read(&__get_cpu_var(v)) +#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) +#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) +#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) +#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) +#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) + +#define __cpu_local_inc(v) cpu_local_inc(v) +#define __cpu_local_dec(v) cpu_local_dec(v) +#define __cpu_local_add(i, v) cpu_local_add((i), (v)) +#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) + +#endif /* _ARCH_I386_LOCAL_H */ diff -puN include/asm-x86_64/mpspec.h~x86_64-merge include/asm-x86_64/mpspec.h --- 25/include/asm-x86_64/mpspec.h~x86_64-merge 2003-07-29 21:07:46.000000000 -0700 +++ 25-akpm/include/asm-x86_64/mpspec.h 2003-07-29 21:07:47.000000000 -0700 @@ -164,11 +164,9 @@ enum mp_bustype { MP_BUS_PCI, MP_BUS_MCA }; -extern int mp_bus_id_to_type [MAX_MP_BUSSES]; -extern int mp_bus_id_to_node [MAX_MP_BUSSES]; -extern int mp_bus_id_to_local [MAX_MP_BUSSES]; -extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +extern unsigned char mp_bus_id_to_type [MAX_MP_BUSSES]; extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; +extern unsigned long mp_bus_to_cpumask [MAX_MP_BUSSES]; extern unsigned int boot_cpu_physical_apicid; extern cpumask_t phys_cpu_present_map; @@ -177,11 +175,9 @@ extern void find_smp_config (void); extern void get_smp_config (void); extern int nr_ioapics; extern int apic_version [MAX_APICS]; -extern int mp_bus_id_to_type [MAX_MP_BUSSES]; extern int mp_irq_entries; extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; extern int mpc_default_type; -extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; extern int mp_current_pci_id; extern unsigned long mp_lapic_addr; extern int pic_mode; diff -puN include/asm-x86_64/nmi.h~x86_64-merge include/asm-x86_64/nmi.h --- 25/include/asm-x86_64/nmi.h~x86_64-merge 2003-07-29 21:07:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/nmi.h 2003-07-29 21:07:47.000000000 -0700 @@ -48,4 +48,6 @@ static inline void unset_nmi_pm_callback extern void default_do_nmi(struct pt_regs *); +extern void default_do_nmi(struct pt_regs *); + #endif /* ASM_NMI_H */ diff -puN include/asm-x86_64/pda.h~x86_64-merge include/asm-x86_64/pda.h --- 25/include/asm-x86_64/pda.h~x86_64-merge 2003-07-29 21:07:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/pda.h 2003-07-29 21:07:47.000000000 -0700 @@ -9,7 +9,7 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { struct task_struct *pcurrent; /* Current process */ - unsigned long cpudata_offset; + unsigned long data_offset; /* Per cpu data offset from linker address */ struct x8664_pda *me; /* Pointer to itself */ unsigned long kernelstack; /* TOS for current process */ unsigned long oldrsp; /* user rsp for system call */ diff -puN include/asm-x86_64/percpu.h~x86_64-merge include/asm-x86_64/percpu.h --- 25/include/asm-x86_64/percpu.h~x86_64-merge 2003-07-29 21:07:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/percpu.h 2003-07-29 21:07:47.000000000 -0700 @@ -1,6 +1,53 @@ -#ifndef __ARCH_I386_PERCPU__ -#define __ARCH_I386_PERCPU__ +#ifndef _ASM_X8664_PERCPU_H_ +#define _ASM_X8664_PERCPU_H_ -#include +#include -#endif /* __ARCH_I386_PERCPU__ */ +#ifdef CONFIG_SMP + +/* Same as the generic code except that we cache the per cpu offset + in the pda. This gives an 3 instruction reference for per cpu data */ + +#include +#include +#define __my_cpu_offset() read_pda(data_offset) +#define __per_cpu_offset(cpu) (cpu_pda[cpu].data_offset) + +/* Separate out the type, so (int[3], foo) works. */ +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu + +/* var is in discarded region: offset to particular copy we want */ +#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset(cpu))) +#define __get_cpu_var(var) \ + (*RELOC_HIDE(&var##__per_cpu, __my_cpu_offset())) + +static inline void percpu_modcopy(void *pcpudst, const void *src, + unsigned long size) +{ + unsigned int i; + for (i = 0; i < NR_CPUS; i++) + if (cpu_possible(i)) + memcpy(pcpudst + __per_cpu_offset(i), src, size); +} + +extern void setup_per_cpu_areas(void); + +#else /* ! SMP */ + +#define DEFINE_PER_CPU(type, name) \ + __typeof__(type) name##__per_cpu + +#define per_cpu(var, cpu) ((void)cpu, var##__per_cpu) +#define __get_cpu_var(var) var##__per_cpu + +#endif /* SMP */ + +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu) + +DECLARE_PER_CPU(struct x8664_pda, per_cpu_pda); + +#endif diff -puN include/asm-x86_64/processor.h~x86_64-merge include/asm-x86_64/processor.h --- 25/include/asm-x86_64/processor.h~x86_64-merge 2003-07-29 21:07:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/processor.h 2003-07-29 21:07:47.000000000 -0700 @@ -181,6 +181,7 @@ static inline void clear_in_cr4 (unsigne * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. */ #define IO_BITMAP_SIZE 32 +#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 diff -puN include/asm-x86_64/smp.h~x86_64-merge include/asm-x86_64/smp.h --- 25/include/asm-x86_64/smp.h~x86_64-merge 2003-07-29 21:07:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/smp.h 2003-07-29 21:08:35.000000000 -0700 @@ -100,6 +100,7 @@ extern inline int safe_smp_processor_id( #ifndef CONFIG_SMP #define stack_smp_processor_id() 0 #define safe_smp_processor_id() 0 +#define for_each_cpu(x,mask) (void)(mask), (x)=0; #define cpu_logical_map(x) (x) #else #include diff -puN include/asm-x86_64/topology.h~x86_64-merge include/asm-x86_64/topology.h --- 25/include/asm-x86_64/topology.h~x86_64-merge 2003-07-29 21:07:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/topology.h 2003-07-29 21:07:47.000000000 -0700 @@ -5,6 +5,8 @@ #ifdef CONFIG_DISCONTIGMEM +#include + /* Map the K8 CPU local memory controllers to a simple 1:1 CPU:NODE topology */ extern int fake_node; @@ -17,6 +19,11 @@ extern cpumask_t cpu_online_map; #define node_to_cpu_mask(node) (fake_node ? cpu_online_map : cpumask_of_cpu(node)) #define node_to_memblk(node) (node) +static inline unsigned long pcibus_to_cpumask(int bus) +{ + return mp_bus_to_cpumask[bus] & cpu_online_map; +} + #define NODE_BALANCE_RATE 30 /* CHECKME */ #endif diff -puN include/asm-x86_64/unistd.h~x86_64-merge include/asm-x86_64/unistd.h --- 25/include/asm-x86_64/unistd.h~x86_64-merge 2003-07-29 21:07:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/unistd.h 2003-07-29 21:07:47.000000000 -0700 @@ -526,8 +526,12 @@ __SYSCALL(__NR_exit_group, sys_exit_grou __SYSCALL(__NR_epoll_wait, sys_epoll_wait) #define __NR_epoll_ctl 233 __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) +#define __NR_tgkill 234 +__SYSCALL(__NR_tgkill, sys_tgkill) +#define __NR_utimes 235 +__SYSCALL(__NR_utimes, sys_utimes) -#define __NR_syscall_max __NR_epoll_ctl +#define __NR_syscall_max __NR_utimes #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ _