diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/config.in x/arch/x86_64/config.in --- x-ref/arch/x86_64/config.in 2003-05-11 05:41:19.000000000 +0200 +++ x/arch/x86_64/config.in 2003-05-11 05:41:36.000000000 +0200 @@ -99,6 +99,8 @@ fi bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL +int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100 +int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0 if [ "$CONFIG_PROC_FS" = "y" ]; then define_bool CONFIG_KCORE_ELF y fi diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/kernel/entry.S x/arch/x86_64/kernel/entry.S --- x-ref/arch/x86_64/kernel/entry.S 2003-05-11 05:06:25.000000000 +0200 +++ x/arch/x86_64/kernel/entry.S 2003-05-11 05:41:36.000000000 +0200 @@ -158,12 +158,12 @@ sysret_signal_test: cmpq $0,tsk_need_resched(%rcx) je sysret_restore_args sti - call schedule + call do_schedule jmp sysret_signal_test sysret_reschedule: sti - call schedule + call do_schedule jmp sysret_with_reschedule tracesys: @@ -213,7 +213,7 @@ intret_with_reschedule: intret_reschedule: sti - call schedule + call do_schedule jmp intret_with_reschedule intret_signal: @@ -229,7 +229,7 @@ intret_signal_test: cmpq $0,tsk_need_resched(%rcx) je retint_restore_args_swapgs sti - call schedule + call do_schedule # RED-PEN: can we lose signals here? jmp intret_signal_test @@ -369,12 +369,12 @@ retint_signal_test: cmpq $0,tsk_need_resched(%rcx) je retint_restore_args_swapgs sti - call schedule + call do_schedule jmp retint_signal_test retint_reschedule: sti - call schedule + call do_schedule cli jmp retint_with_reschedule @@ -485,7 +485,7 @@ error_reschedule: cmpl $2,%r15d je error_restore_swapgs sti - call schedule + call do_schedule jmp error_test error_signal: @@ -501,7 +501,7 @@ error_signal_test: cmpq $0,tsk_need_resched(%rcx) je error_restore_swapgs sti - call schedule + call do_schedule jmp error_signal_test error_kernelspace: diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/kernel/init_task.c x/arch/x86_64/kernel/init_task.c --- x-ref/arch/x86_64/kernel/init_task.c 2002-11-29 02:22:58.000000000 +0100 +++ x/arch/x86_64/kernel/init_task.c 2003-05-11 05:41:36.000000000 +0200 @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/kernel/process.c x/arch/x86_64/kernel/process.c --- x-ref/arch/x86_64/kernel/process.c 2003-05-11 05:06:25.000000000 +0200 +++ x/arch/x86_64/kernel/process.c 2003-05-11 05:41:36.000000000 +0200 @@ -131,9 +131,6 @@ static void poll_idle (void) void cpu_idle (void) { /* endless idle loop with no priority at all */ - init_idle(); - current->nice = 20; - current->counter = -100; while (1) { void (*idle)(void) = pm_idle; diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/kernel/setup.c x/arch/x86_64/kernel/setup.c --- x-ref/arch/x86_64/kernel/setup.c 2003-05-11 05:06:25.000000000 +0200 +++ x/arch/x86_64/kernel/setup.c 2003-05-11 05:41:36.000000000 +0200 @@ -52,6 +52,7 @@ struct cpuinfo_x86 boot_cpu_data = { cpuid_level: -1, + pte_quick: LIST_HEAD_INIT(boot_cpu_data.pte_quick) }; unsigned long mmu_cr4_features; diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/kernel/setup64.c x/arch/x86_64/kernel/setup64.c --- x-ref/arch/x86_64/kernel/setup64.c 2003-05-11 05:06:25.000000000 +0200 +++ x/arch/x86_64/kernel/setup64.c 2003-05-11 05:41:36.000000000 +0200 @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -56,15 +56,17 @@ void pda_init(int cpu) if (cpu == 0) { /* others are initialized in smpboot.c */ - cpu_pda[cpu].pcurrent = init_tasks[cpu]; + cpu_pda[cpu].pcurrent = &init_task; cpu_pda[cpu].irqstackptr = boot_cpu_stack; level4 = init_level4_pgt; + cpu_pda[cpu].cpudata = &boot_cpu_data; } else { cpu_pda[cpu].irqstackptr = (char *) __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); if (!cpu_pda[cpu].irqstackptr) panic("cannot allocate irqstack for cpu %d\n", cpu); level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0); + cpu_pda[cpu].cpudata = &cpu_data[cpu]; } if (!level4) panic("Cannot allocate top level page for cpu %d", cpu); diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/kernel/smpboot.c x/arch/x86_64/kernel/smpboot.c --- x-ref/arch/x86_64/kernel/smpboot.c 2003-05-11 05:06:25.000000000 +0200 +++ x/arch/x86_64/kernel/smpboot.c 2003-05-11 05:41:36.000000000 +0200 @@ -155,6 +155,10 @@ void __init smp_store_cpu_info(int id) struct cpuinfo_x86 *c = cpu_data + id; *c = boot_cpu_data; + INIT_LIST_HEAD(&c->pte_quick); + c->pmd_quick = 0; + c->pgd_quick = 0; + c->pgtable_cache_sz = 0; identify_cpu(c); } @@ -340,7 +344,7 @@ void __init smp_callin(void) * (This works even if the APIC is not enabled.) */ phys_id = GET_APIC_ID(apic_read(APIC_ID)); - cpuid = current->processor; + cpuid = current->cpu; if (test_and_set_bit(cpuid, &cpu_online_map)) { printk("huh, phys CPU#%d, CPU#%d already present??\n", phys_id, cpuid); @@ -545,17 +549,15 @@ static int __init do_boot_cpu (int apici if (!idle) panic("No idle process for CPU %d", cpu); - idle->processor = cpu; + init_idle(idle, cpu); + x86_cpu_to_apicid[cpu] = apicid; x86_apicid_to_cpu[apicid] = cpu; - idle->cpus_runnable = 1<cpus_allowed = 1<thread.rip = (unsigned long)start_secondary; idle->thread.rsp = (unsigned long)idle + THREAD_SIZE - 8; - del_from_runqueue(idle); unhash_process(idle); - cpu_pda[cpu].pcurrent = init_tasks[cpu] = idle; + cpu_pda[cpu].pcurrent = idle; /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); @@ -773,6 +775,7 @@ static int __init do_boot_cpu (int apici } cycles_t cacheflush_time; +unsigned long cache_decay_ticks; static __init void smp_tune_scheduling (void) { @@ -808,9 +811,13 @@ static __init void smp_tune_scheduling ( cacheflush_time *= 10; /* Add an NUMA factor */ + cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000; + printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", (long)cacheflush_time/(cpu_khz/1000), ((long)cacheflush_time*100/(cpu_khz/1000)) % 100); + printk("task migration cache decay timeout: %ld msecs.\n", + (cache_decay_ticks + 1) * 1000 / HZ); } /* @@ -855,8 +862,7 @@ void __init smp_boot_cpus(void) x86_apicid_to_cpu[boot_cpu_id] = 0; x86_cpu_to_apicid[0] = boot_cpu_id; global_irq_holder = 0; - current->processor = 0; - init_idle(); + current->cpu = 0; smp_tune_scheduling(); /* diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/kernel/x8664_ksyms.c x/arch/x86_64/kernel/x8664_ksyms.c --- x-ref/arch/x86_64/kernel/x8664_ksyms.c 2003-05-11 05:06:25.000000000 +0200 +++ x/arch/x86_64/kernel/x8664_ksyms.c 2003-05-11 05:41:36.000000000 +0200 @@ -194,6 +194,7 @@ EXPORT_SYMBOL(copy_from_user); EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(copy_user_generic); +#if 0 /* Export kernel syscalls */ EXPORT_SYMBOL(sys_wait4); EXPORT_SYMBOL(sys_exit); @@ -206,7 +207,7 @@ EXPORT_SYMBOL(sys_delete_module); EXPORT_SYMBOL(sys_sync); EXPORT_SYMBOL(sys_pause); EXPORT_SYMBOL(sys_setsid); /* Rather dubious */ - +#endif EXPORT_SYMBOL(memcpy_fromio); EXPORT_SYMBOL(memcpy_toio); diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/mm/fault.c x/arch/x86_64/mm/fault.c --- x-ref/arch/x86_64/mm/fault.c 2003-05-11 05:06:25.000000000 +0200 +++ x/arch/x86_64/mm/fault.c 2003-05-11 05:41:36.000000000 +0200 @@ -112,7 +112,7 @@ asmlinkage void do_page_fault(struct pt_ { struct task_struct *tsk; struct mm_struct *mm; - struct vm_area_struct * vma; + struct vm_area_struct * vma, * prev_vma; unsigned long address; unsigned long fixup; int write; @@ -173,7 +173,8 @@ again: if (address + 128 < regs->rsp) goto bad_area; } - if (expand_stack(vma, address)) + find_vma_prev(mm, address, &prev_vma); + if (expand_stack(vma, address, prev_vma)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so @@ -197,6 +198,7 @@ good_area: goto bad_area; } +survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/mm/init.c x/arch/x86_64/mm/init.c --- x-ref/arch/x86_64/mm/init.c 2003-05-11 05:06:25.000000000 +0200 +++ x/arch/x86_64/mm/init.c 2003-05-11 05:41:36.000000000 +0200 @@ -44,21 +44,21 @@ static unsigned long totalram_pages; int do_check_pgt_cache(int low, int high) { int freed = 0; - if(read_pda(pgtable_cache_sz) > high) { + if(pgtable_cache_size > high) { do { - if (read_pda(pgd_quick)) { - pgd_free_slow(pgd_alloc_one_fast()); + if (pgd_quicklist) { + free_pgd_slow(pgd_alloc(NULL)); freed++; } - if (read_pda(pmd_quick)) { + if (pmd_quicklist) { pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); freed++; } - if (read_pda(pte_quick)) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); + if (!list_empty(&pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast_lifo(NULL, 0)); freed++; } - } while(read_pda(pgtable_cache_sz) > low); + } while(pgtable_cache_size > low); } return freed; } @@ -92,7 +92,7 @@ void show_mem(void) printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); - printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz)); + printk("%ld pages in page table cache\n", pgtable_cache_size); show_buffers(); } #endif diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/tools/offset.c x/arch/x86_64/tools/offset.c --- x-ref/arch/x86_64/tools/offset.c 2002-11-29 02:22:58.000000000 +0100 +++ x/arch/x86_64/tools/offset.c 2003-05-11 05:41:36.000000000 +0200 @@ -32,7 +32,6 @@ int main(void) ENTRY(need_resched); ENTRY(exec_domain); ENTRY(ptrace); - ENTRY(processor); ENTRY(need_resched); ENTRY(thread); #undef ENTRY @@ -42,13 +41,10 @@ int main(void) ENTRY(pcurrent); ENTRY(irqrsp); ENTRY(irqcount); - ENTRY(pgd_quick); - ENTRY(pmd_quick); - ENTRY(pte_quick); - ENTRY(pgtable_cache_sz); ENTRY(cpunumber); ENTRY(irqstackptr); ENTRY(level4_pgt); + ENTRY(cpudata); #undef ENTRY output("#ifdef __ASSEMBLY__"); outconst("#define PT_TRACESYS %0", PT_TRACESYS); diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/bitops.h x/include/asm-x86_64/bitops.h --- x-ref/include/asm-x86_64/bitops.h 2003-05-11 05:06:35.000000000 +0200 +++ x/include/asm-x86_64/bitops.h 2003-05-11 05:41:36.000000000 +0200 @@ -78,6 +78,15 @@ static __inline__ void clear_bit(long nr #define smp_mb__before_clear_bit() barrier() #define smp_mb__after_clear_bit() barrier() +static __inline__ void __clear_bit(long nr, volatile void * addr) +{ + __asm__ __volatile__( + "btrq %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + + /** * __change_bit - Toggle a bit in memory * @nr: the bit to set @@ -315,6 +324,63 @@ static __inline__ int find_next_zero_bit return (offset + set + res); } +#include +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit-number of the first set bit, not the number of the byte + * containing a bit. + */ +static __inline__ int find_first_bit(void * addr, unsigned size) +{ + int d0, d1; + int res; + + /* This looks at memory. Mark it volatile to tell gcc not to move it around */ + __asm__ __volatile__( + "xorl %%eax,%%eax\n\t" + "repe; scasl\n\t" + "jz 1f\n\t" + "leaq -4(%%rdi),%%rdi\n\t" + "bsfl (%%rdi),%%eax\n" + "1:\tsubq %%rbx,%%rdi\n\t" + "shlq $3,%%rdi\n\t" + "addq %%rdi,%%rax" + :"=a" (res), "=&c" (d0), "=&D" (d1) + :"1" ((size + 31) >> 5), "2" (addr), "b" (addr)); + return res; +} + +static __inline__ int find_next_bit (void * addr, int size, int offset) +{ + unsigned int * p = ((unsigned int *) addr) + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for nonzero in the first 32 bits: + */ + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (set) + : "r" (*p >> bit)); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No set bit yet, search remaining full words for a bit + */ + res = find_first_bit (p, size - 32 * (p - (unsigned int *) addr)); + return (offset + set + res); +} + + /* * Find string of zero bits in a bitmap. -1 when not found. */ @@ -376,6 +442,35 @@ static __inline__ int ffs(int x) return r+1; } +static __inline__ unsigned long __ffs(unsigned long word) +{ + __asm__("bsfq %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + + +/* + * Every architecture must define this function. It's the fastest + * way of searching a 140-bit bitmap where the first 100 bits are + * unlikely to be set. It's guaranteed that at least one of the 140 + * bits is cleared. + */ +static inline int _sched_find_first_bit(unsigned long *b) +{ + if (unlikely(b[0])) + return __ffs(b[0]); + if (unlikely(b[1])) + return __ffs(b[1]) + 32; + if (unlikely(b[2])) + return __ffs(b[2]) + 64; + if (b[3]) + return __ffs(b[3]) + 96; + return __ffs(b[4]) + 128; +} + + /** * hweightN - returns the hamming weight of a N-bit word * @x: the word to weigh diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/fcntl.h x/include/asm-x86_64/fcntl.h --- x-ref/include/asm-x86_64/fcntl.h 2002-11-29 02:23:18.000000000 +0100 +++ x/include/asm-x86_64/fcntl.h 2003-05-11 05:41:36.000000000 +0200 @@ -20,6 +20,7 @@ #define O_LARGEFILE 0100000 #define O_DIRECTORY 0200000 /* must be a directory */ #define O_NOFOLLOW 0400000 /* don't follow links */ +#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ #define F_DUPFD 0 /* dup */ #define F_GETFD 1 /* get close_on_exec */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/ioctls.h x/include/asm-x86_64/ioctls.h --- x-ref/include/asm-x86_64/ioctls.h 2002-11-29 02:23:18.000000000 +0100 +++ x/include/asm-x86_64/ioctls.h 2003-05-11 05:41:36.000000000 +0200 @@ -67,6 +67,7 @@ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ #define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ #define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 /* Used for packet mode */ #define TIOCPKT_DATA 0 diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/mman.h x/include/asm-x86_64/mman.h --- x-ref/include/asm-x86_64/mman.h 2002-11-29 02:23:18.000000000 +0100 +++ x/include/asm-x86_64/mman.h 2003-05-11 05:41:36.000000000 +0200 @@ -4,6 +4,7 @@ #define PROT_READ 0x1 /* page can be read */ #define PROT_WRITE 0x2 /* page can be written */ #define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 #define PROT_NONE 0x0 /* page can not be accessed */ #define MAP_SHARED 0x01 /* Share changes */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/pda.h x/include/asm-x86_64/pda.h --- x-ref/include/asm-x86_64/pda.h 2003-05-11 05:06:35.000000000 +0200 +++ x/include/asm-x86_64/pda.h 2003-05-11 05:41:36.000000000 +0200 @@ -6,19 +6,15 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ /* To use a new field with the *_pda macros it needs to be added to tools/offset.c */ struct x8664_pda { + struct task_struct *pcurrent; /* Current process */ unsigned long kernelstack; /* TOS for current process */ unsigned long oldrsp; /* user rsp for system call */ unsigned long irqrsp; /* Old rsp for interrupts. */ - struct task_struct *pcurrent; /* Current process */ int irqcount; /* Irq nesting counter. Starts with -1 */ int cpunumber; /* Logical CPU number */ - /* XXX: could be a single list */ - unsigned long *pgd_quick; - unsigned long *pmd_quick; - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; char *irqstackptr; /* top of irqstack */ unsigned long volatile *level4_pgt; + struct cpuinfo_x86 *cpudata; } ____cacheline_aligned; #define PDA_STACKOFFSET (5*8) diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/pgalloc.h x/include/asm-x86_64/pgalloc.h --- x-ref/include/asm-x86_64/pgalloc.h 2002-11-29 02:23:18.000000000 +0100 +++ x/include/asm-x86_64/pgalloc.h 2003-05-11 05:41:36.000000000 +0200 @@ -4,16 +4,16 @@ #include #include #include -#include #include -#include #include +#include -#define inc_pgcache_size() add_pda(pgtable_cache_sz,1UL) -#define dec_pgcache_size() sub_pda(pgtable_cache_sz,1UL) +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) -#define pmd_populate(mm, pmd, pte) \ - set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) +#define pmd_populate(mm, pmd, page) set_pmd(pmd, mk_pmd(page, __pgprot(_PAGE_TABLE))) #define pgd_populate(mm, pgd, pmd) \ set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd))) @@ -22,41 +22,30 @@ extern __inline__ pmd_t *get_pmd_slow(vo return (pmd_t *)get_zeroed_page(GFP_KERNEL); } -extern __inline__ pmd_t *get_pmd_fast(void) -{ - unsigned long *ret; - - if ((ret = read_pda(pmd_quick)) != NULL) { - write_pda(pmd_quick, (unsigned long *)(*ret)); - ret[0] = 0; - dec_pgcache_size(); - } else - ret = (unsigned long *)get_pmd_slow(); - return (pmd_t *)ret; -} - extern __inline__ void pmd_free(pmd_t *pmd) { - *(unsigned long *)pmd = (unsigned long) read_pda(pmd_quick); - write_pda(pmd_quick,(unsigned long *) pmd); - inc_pgcache_size(); + *(unsigned long *)pmd = (unsigned long) pmd_quicklist; + pmd_quicklist = (unsigned long *) pmd; + pgtable_cache_size++; } extern __inline__ void pmd_free_slow(pmd_t *pmd) { +#ifdef CONFIG_CHECKING if ((unsigned long)pmd & (PAGE_SIZE-1)) - out_of_line_bug(); + BUG(); +#endif free_page((unsigned long)pmd); } static inline pmd_t *pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr) { - unsigned long *ret = (unsigned long *)read_pda(pmd_quick); + unsigned long *ret; - if (ret != NULL) { - write_pda(pmd_quick, (unsigned long *)(*ret)); + if ((ret = (unsigned long *)pmd_quicklist) != NULL) { /* avoid likely/unlikely here */ + pmd_quicklist = (unsigned long *)(*ret); ret[0] = 0; - dec_pgcache_size(); + pgtable_cache_size--; } return (pmd_t *)ret; } @@ -66,78 +55,88 @@ static inline pmd_t *pmd_alloc_one (stru return (pmd_t *)get_zeroed_page(GFP_KERNEL); } -static inline pgd_t *pgd_alloc_one_fast (void) +static inline pgd_t *get_pgd_slow(void) { - unsigned long *ret = read_pda(pgd_quick); - - if (ret) { - write_pda(pgd_quick,(unsigned long *)(*ret)); - ret[0] = 0; - dec_pgcache_size(); - } - return (pgd_t *) ret; + return (pgd_t *)get_zeroed_page(GFP_KERNEL); } -static inline pgd_t *pgd_alloc (struct mm_struct *mm) +static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */ - pgd_t *pgd = pgd_alloc_one_fast(); + unsigned long *ret; - if (pgd == NULL) - pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); - return pgd; + if ((ret = pgd_quicklist) != NULL) { + pgd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } else + ret = (unsigned long *)get_pgd_slow(); + return (pgd_t *)ret; } static inline void pgd_free (pgd_t *pgd) { - *(unsigned long *)pgd = (unsigned long) read_pda(pgd_quick); - write_pda(pgd_quick,(unsigned long *) pgd); - inc_pgcache_size(); + *(unsigned long *)pgd = (unsigned long) pgd_quicklist; + pgd_quicklist = (unsigned long *) pgd; + pgtable_cache_size++; } -static inline void pgd_free_slow (pgd_t *pgd) +static inline void free_pgd_slow (pgd_t *pgd) { +#ifdef CONFIG_CHECKING if ((unsigned long)pgd & (PAGE_SIZE-1)) - out_of_line_bug(); + BUG(); +#endif free_page((unsigned long)pgd); } -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) { - return (pte_t *)get_zeroed_page(GFP_KERNEL); + struct list_head * entry = pte_quicklist.next; /* FIFO */ + struct page * page = NULL; + + if (entry != &pte_quicklist) { /* don't add a likely/unlikely here */ + list_del(entry); + page = list_entry(entry, struct page, list); + pgtable_cache_size--; + } + return page; } -extern __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast_lifo(struct mm_struct *mm, + unsigned long address) { - unsigned long *ret; + struct list_head * entry = pte_quicklist.prev; /* LIFO */ + struct page * page = NULL; - if ((ret = read_pda(pte_quick)) != NULL) { - write_pda(pte_quick, (unsigned long *)(*ret)); - ret[0] = ret[1]; - dec_pgcache_size(); + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); + pgtable_cache_size--; } - return (pte_t *)ret; + return page; } /* Should really implement gc for free page table pages. This could be done with a reference count in struct page. */ -extern __inline__ void pte_free(pte_t *pte) +static inline void pte_free(struct page * page) { - *(unsigned long *)pte = (unsigned long) read_pda(pte_quick); - write_pda(pte_quick, (unsigned long *) pte); - inc_pgcache_size(); + list_add(&page->list, &pte_quicklist); + pgtable_cache_size++; } -extern __inline__ void pte_free_slow(pte_t *pte) +static __inline__ void pte_free_slow(struct page * page) { - if ((unsigned long)pte & (PAGE_SIZE-1)) - out_of_line_bug(); - free_page((unsigned long)pte); + __free_page(page); } +static inline void pte_free_via_pmd(pmd_t pmd) +{ + pte_free(virt_to_page(pte_offset(&pmd, 0))); +} extern int do_check_pgt_cache(int, int); diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/pgtable.h x/include/asm-x86_64/pgtable.h --- x-ref/include/asm-x86_64/pgtable.h 2003-05-11 05:06:35.000000000 +0200 +++ x/include/asm-x86_64/pgtable.h 2003-05-11 05:41:36.000000000 +0200 @@ -355,6 +355,13 @@ static inline void ptep_mkdirty(pte_t *p set_pte(&__pte, __pte(__val)); \ __pte; \ }) +#define mk_pmd(page,pgprot) \ +({ \ + pmd_t __pmd; \ + \ + set_pmd(&__pmd, __pmd((((page)-mem_map) << PAGE_SHIFT) | pgprot_val(pgprot))); \ + __pmd; \ +}) /* This takes a physical page address that is used by the remapping functions */ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) @@ -399,6 +406,16 @@ extern inline pte_t pte_modify(pte_t pte #define pte_offset(dir, address) ((pte_t *) __pmd_page(*(dir)) + \ __pte_offset(address)) +#define pte_offset2(dir, address) pte_offset(dir, address) +#define pte_offset_atomic(dir, address) pte_offset(dir, address) +#define pte_offset_atomic2(dir, address) pte_offset(dir, address) +#define pte_offset_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_offset2_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_kunmap(ptep) do { } while(0) +#define pte_kunmap2(ptep) do { } while(0) +#define pte_kunmap_atomic2(ptep) do { } while(0) +#define pte_alloc_atomic(mm, pmd, address) pte_alloc(mm, pmd, address) + /* never use these in the common code */ #define pml4_page(level4) ((unsigned long) __va(pml4_val(level4) & PHYSICAL_PAGE_MASK)) #define pml4_index(address) (((address) >> PML4_SHIFT) & (PTRS_PER_PML4-1)) diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/prefetch.h x/include/asm-x86_64/prefetch.h --- x-ref/include/asm-x86_64/prefetch.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-x86_64/prefetch.h 2003-05-11 05:42:09.000000000 +0200 @@ -0,0 +1,12 @@ +#ifndef __ASM_X86_64_PREFETCH_H +#define __ASM_X86_64_PREFETCH_H + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +#define prefetch(x) __builtin_prefetch((x),0,1) +#define prefetchw(x) __builtin_prefetch((x),1,1) +#define spin_lock_prefetch(x) prefetchw(x) + +#endif /* __ASM_X86_64_PREFETCH_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/processor.h x/include/asm-x86_64/processor.h --- x-ref/include/asm-x86_64/processor.h 2003-05-11 05:06:35.000000000 +0200 +++ x/include/asm-x86_64/processor.h 2003-05-11 05:41:55.000000000 +0200 @@ -14,9 +14,11 @@ #include #include #include +#include #include #include #include +#include #define TF_MASK 0x00000100 #define IF_MASK 0x00000200 @@ -54,6 +56,11 @@ struct cpuinfo_x86 { __u8 x86_virt_bits, x86_phys_bits; __u32 x86_power; unsigned long loops_per_jiffy; + + unsigned long *pgd_quick; + unsigned long *pmd_quick; + struct list_head pte_quick; + unsigned long pgtable_cache_sz; } ____cacheline_aligned; #define X86_VENDOR_INTEL 0 @@ -71,7 +78,7 @@ extern struct tss_struct init_tss[NR_CPU #ifdef CONFIG_SMP extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +#define current_cpu_data (*read_pda(cpudata)) #else #define cpu_data (&boot_cpu_data) #define current_cpu_data boot_cpu_data @@ -407,13 +414,6 @@ extern inline void sync_core(void) #define cpu_has_fpu 1 -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -#define prefetch(x) __builtin_prefetch((x),0,1) -#define prefetchw(x) __builtin_prefetch((x),1,1) -#define spin_lock_prefetch(x) prefetchw(x) #define cpu_relax() rep_nop() diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/smp.h x/include/asm-x86_64/smp.h --- x-ref/include/asm-x86_64/smp.h 2003-05-11 05:06:35.000000000 +0200 +++ x/include/asm-x86_64/smp.h 2003-05-11 05:41:36.000000000 +0200 @@ -79,7 +79,7 @@ extern void smp_store_cpu_info(int id); #define smp_processor_id() read_pda(cpunumber) -#define stack_smp_processor_id() (stack_current()->processor) +#define stack_smp_processor_id() (stack_current()->cpu) extern __inline int hard_smp_processor_id(void) diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/system.h x/include/asm-x86_64/system.h --- x-ref/include/asm-x86_64/system.h 2003-05-11 05:06:35.000000000 +0200 +++ x/include/asm-x86_64/system.h 2003-05-11 05:41:36.000000000 +0200 @@ -257,7 +257,7 @@ static inline unsigned long __cmpxchg(vo #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) /* interrupt control.. */ -#define __save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) +#define __save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %0":"=g" (x): /* no input */ :"memory"); } while (0) #define __restore_flags(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") #define __cli() __asm__ __volatile__("cli": : :"memory") #define __sti() __asm__ __volatile__("sti": : :"memory") diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-x86_64/unistd.h x/include/asm-x86_64/unistd.h --- x-ref/include/asm-x86_64/unistd.h 2003-05-11 05:06:35.000000000 +0200 +++ x/include/asm-x86_64/unistd.h 2003-05-11 05:41:36.000000000 +0200 @@ -629,7 +629,7 @@ static inline pid_t setsid(void) return sys_setsid(); } -extern ssize_t sys_write(unsigned int, char *, size_t); +extern asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count); static inline ssize_t write(unsigned int fd, char * buf, size_t count) { return sys_write(fd, buf, count);