diff -urNp ref/arch/x86_64/config.in 2.4.20pre5aa1/arch/x86_64/config.in --- ref/arch/x86_64/config.in Thu Aug 29 02:13:06 2002 +++ 2.4.20pre5aa1/arch/x86_64/config.in Fri Aug 30 03:04:20 2002 @@ -11,8 +11,6 @@ define_bool CONFIG_ISA n define_bool CONFIG_SBUS n define_bool CONFIG_UID16 y -define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y -define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n define_bool CONFIG_X86_CMPXCHG y define_bool CONFIG_EARLY_PRINTK y diff -urNp ref/arch/x86_64/kernel/process.c 2.4.20pre5aa1/arch/x86_64/kernel/process.c --- ref/arch/x86_64/kernel/process.c Thu Aug 29 02:13:06 2002 +++ 2.4.20pre5aa1/arch/x86_64/kernel/process.c Fri Aug 30 03:04:20 2002 @@ -133,7 +133,7 @@ void cpu_idle (void) /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { void (*idle)(void) = pm_idle; diff -urNp ref/arch/x86_64/kernel/setup.c 2.4.20pre5aa1/arch/x86_64/kernel/setup.c --- ref/arch/x86_64/kernel/setup.c Thu Aug 29 02:13:06 2002 +++ 2.4.20pre5aa1/arch/x86_64/kernel/setup.c Fri Aug 30 03:04:20 2002 @@ -52,6 +52,7 @@ struct cpuinfo_x86 boot_cpu_data = { cpuid_level: -1, + pte_quick: LIST_HEAD_INIT(boot_cpu_data.pte_quick) }; unsigned long mmu_cr4_features; diff -urNp ref/arch/x86_64/kernel/setup64.c 2.4.20pre5aa1/arch/x86_64/kernel/setup64.c --- ref/arch/x86_64/kernel/setup64.c Thu Aug 29 02:13:06 2002 +++ 2.4.20pre5aa1/arch/x86_64/kernel/setup64.c Fri Aug 30 03:04:20 2002 @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -60,12 +60,14 @@ void pda_init(int cpu) cpu_pda[cpu].pcurrent = init_tasks[cpu]; cpu_pda[cpu].irqstackptr = boot_cpu_stack; level4 = init_level4_pgt; + cpu_pda[cpu].cpudata = &boot_cpu_data; } else { cpu_pda[cpu].irqstackptr = (char *) __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); if (!cpu_pda[cpu].irqstackptr) panic("cannot allocate irqstack for cpu %d\n", cpu); level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0); + cpu_pda[cpu].cpudata = &cpu_data[cpu]; } if (!level4) panic("Cannot allocate top level page for cpu %d", cpu); diff -urNp ref/arch/x86_64/kernel/smpboot.c 2.4.20pre5aa1/arch/x86_64/kernel/smpboot.c --- ref/arch/x86_64/kernel/smpboot.c Thu Aug 29 02:13:06 2002 +++ 2.4.20pre5aa1/arch/x86_64/kernel/smpboot.c Fri Aug 30 03:04:20 2002 @@ -151,6 +151,10 @@ void __init smp_store_cpu_info(int id) struct cpuinfo_x86 *c = cpu_data + id; *c = boot_cpu_data; + INIT_LIST_HEAD(&c->pte_quick); + c->pmd_quick = 0; + c->pgd_quick = 0; + c->pgtable_cache_sz = 0; identify_cpu(c); /* * Mask B, Pentium, but not Pentium MMX diff -urNp ref/arch/x86_64/mm/fault.c 2.4.20pre5aa1/arch/x86_64/mm/fault.c --- ref/arch/x86_64/mm/fault.c Thu Aug 29 02:13:07 2002 +++ 2.4.20pre5aa1/arch/x86_64/mm/fault.c Fri Aug 30 03:04:20 2002 @@ -100,7 +100,7 @@ asmlinkage void do_page_fault(struct pt_ { struct task_struct *tsk; struct mm_struct *mm; - struct vm_area_struct * vma; + struct vm_area_struct * vma, * prev_vma; unsigned long address; unsigned long fixup; int write; @@ -158,7 +158,8 @@ again: if (address + 128 < regs->rsp) goto bad_area; } - if (expand_stack(vma, address)) + find_vma_prev(mm, address, &prev_vma); + if (expand_stack(vma, address, prev_vma)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so @@ -182,6 +183,7 @@ good_area: goto bad_area; } +survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo diff -urNp ref/arch/x86_64/mm/init.c 2.4.20pre5aa1/arch/x86_64/mm/init.c --- ref/arch/x86_64/mm/init.c Thu Aug 29 02:13:07 2002 +++ 2.4.20pre5aa1/arch/x86_64/mm/init.c Fri Aug 30 03:04:20 2002 @@ -43,21 +43,21 @@ static unsigned long totalram_pages; int do_check_pgt_cache(int low, int high) { int freed = 0; - if(read_pda(pgtable_cache_sz) > high) { + if(pgtable_cache_size > high) { do { - if (read_pda(pgd_quick)) { - pgd_free_slow(pgd_alloc_one_fast()); + if (pgd_quicklist) { + free_pgd_slow(pgd_alloc(NULL)); freed++; } - if (read_pda(pmd_quick)) { + if (pmd_quicklist) { pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); freed++; } - if (read_pda(pte_quick)) { - pte_free_slow(pte_alloc_one_fast(NULL, 0)); + if (!list_empty(&pte_quicklist)) { + pte_free_slow(pte_alloc_one_fast_lifo(NULL, 0)); freed++; } - } while(read_pda(pgtable_cache_sz) > low); + } while(pgtable_cache_size > low); } return freed; } @@ -90,7 +90,7 @@ void show_mem(void) printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); - printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz)); + printk("%ld pages in page table cache\n", pgtable_cache_size); show_buffers(); } diff -urNp ref/arch/x86_64/tools/offset.c 2.4.20pre5aa1/arch/x86_64/tools/offset.c --- ref/arch/x86_64/tools/offset.c Thu Aug 29 02:13:07 2002 +++ 2.4.20pre5aa1/arch/x86_64/tools/offset.c Fri Aug 30 03:04:20 2002 @@ -42,13 +42,10 @@ int main(void) ENTRY(pcurrent); ENTRY(irqrsp); ENTRY(irqcount); - ENTRY(pgd_quick); - ENTRY(pmd_quick); - ENTRY(pte_quick); - ENTRY(pgtable_cache_sz); ENTRY(cpunumber); ENTRY(irqstackptr); ENTRY(level4_pgt); + ENTRY(cpudata); #undef ENTRY output("#ifdef __ASSEMBLY__"); outconst("#define PT_TRACESYS %0", PT_TRACESYS); diff -urNp ref/include/asm-x86_64/fcntl.h 2.4.20pre5aa1/include/asm-x86_64/fcntl.h --- ref/include/asm-x86_64/fcntl.h Thu Aug 29 02:13:20 2002 +++ 2.4.20pre5aa1/include/asm-x86_64/fcntl.h Fri Aug 30 03:04:20 2002 @@ -20,6 +20,7 @@ #define O_LARGEFILE 0100000 #define O_DIRECTORY 0200000 /* must be a directory */ #define O_NOFOLLOW 0400000 /* don't follow links */ +#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ #define F_DUPFD 0 /* dup */ #define F_GETFD 1 /* get close_on_exec */ diff -urNp ref/include/asm-x86_64/ioctls.h 2.4.20pre5aa1/include/asm-x86_64/ioctls.h --- ref/include/asm-x86_64/ioctls.h Thu Aug 29 02:13:20 2002 +++ 2.4.20pre5aa1/include/asm-x86_64/ioctls.h Fri Aug 30 03:04:20 2002 @@ -67,6 +67,7 @@ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ #define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ #define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 /* Used for packet mode */ #define TIOCPKT_DATA 0 diff -urNp ref/include/asm-x86_64/pda.h 2.4.20pre5aa1/include/asm-x86_64/pda.h --- ref/include/asm-x86_64/pda.h Thu Aug 29 02:13:20 2002 +++ 2.4.20pre5aa1/include/asm-x86_64/pda.h Fri Aug 30 03:04:20 2002 @@ -10,19 +10,15 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ /* To use a new field with the *_pda macros it needs to be added to tools/offset.c */ struct x8664_pda { + struct task_struct *pcurrent; /* Current process */ unsigned long kernelstack; /* TOS for current process */ unsigned long oldrsp; /* user rsp for system call */ unsigned long irqrsp; /* Old rsp for interrupts. */ - struct task_struct *pcurrent; /* Current process */ int irqcount; /* Irq nesting counter. Starts with -1 */ int cpunumber; /* Logical CPU number */ - /* XXX: could be a single list */ - unsigned long *pgd_quick; - unsigned long *pmd_quick; - unsigned long *pte_quick; - unsigned long pgtable_cache_sz; char *irqstackptr; /* top of irqstack */ unsigned long volatile *level4_pgt; + struct cpuinfo_x86 *cpudata; } ____cacheline_aligned; #define PDA_STACKOFFSET (5*8) diff -urNp ref/include/asm-x86_64/pgalloc.h 2.4.20pre5aa1/include/asm-x86_64/pgalloc.h --- ref/include/asm-x86_64/pgalloc.h Thu Aug 29 02:13:20 2002 +++ 2.4.20pre5aa1/include/asm-x86_64/pgalloc.h Fri Aug 30 03:04:20 2002 @@ -4,16 +4,16 @@ #include #include #include -#include #include -#include #include +#include -#define inc_pgcache_size() add_pda(pgtable_cache_sz,1UL) -#define dec_pgcache_size() sub_pda(pgtable_cache_sz,1UL) +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) -#define pmd_populate(mm, pmd, pte) \ - set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) +#define pmd_populate(mm, pmd, page) set_pmd(pmd, mk_pmd(page, __pgprot(_PAGE_TABLE))) #define pgd_populate(mm, pgd, pmd) \ set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd))) @@ -25,41 +25,30 @@ extern __inline__ pmd_t *get_pmd_slow(vo return ret; } -extern __inline__ pmd_t *get_pmd_fast(void) -{ - unsigned long *ret; - - if ((ret = read_pda(pmd_quick)) != NULL) { - write_pda(pmd_quick, (unsigned long *)(*ret)); - ret[0] = 0; - dec_pgcache_size(); - } else - ret = (unsigned long *)get_pmd_slow(); - return (pmd_t *)ret; -} - extern __inline__ void pmd_free(pmd_t *pmd) { - *(unsigned long *)pmd = (unsigned long) read_pda(pmd_quick); - write_pda(pmd_quick,(unsigned long *) pmd); - inc_pgcache_size(); + *(unsigned long *)pmd = (unsigned long) pmd_quicklist; + pmd_quicklist = (unsigned long *) pmd; + pgtable_cache_size++; } extern __inline__ void pmd_free_slow(pmd_t *pmd) { +#ifdef CONFIG_CHECKING if ((unsigned long)pmd & (PAGE_SIZE-1)) - out_of_line_bug(); + BUG(); +#endif free_page((unsigned long)pmd); } static inline pmd_t *pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr) { - unsigned long *ret = (unsigned long *)read_pda(pmd_quick); + unsigned long *ret; - if (__builtin_expect(ret != NULL, 1)) { - write_pda(pmd_quick, (unsigned long *)(*ret)); + if ((ret = (unsigned long *)pmd_quicklist) != NULL) { /* avoid likely/unlikely here */ + pmd_quicklist = (unsigned long *)(*ret); ret[0] = 0; - dec_pgcache_size(); + pgtable_cache_size--; } return (pmd_t *)ret; } @@ -73,88 +62,91 @@ static inline pmd_t *pmd_alloc_one (stru return pmd; } - -static inline pgd_t *pgd_alloc_one_fast (void) +static inline pgd_t *get_pgd_slow(void) { - unsigned long *ret = read_pda(pgd_quick); - - if (__builtin_expect(ret != NULL, 1)) { - write_pda(pgd_quick,(unsigned long *)(*ret)); - ret[0] = 0; - dec_pgcache_size(); - } else - ret = NULL; - return (pgd_t *) ret; + pgd_t * pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + if (__builtin_expect(pgd != NULL, 1)) + clear_page(pgd); + return pgd; } -static inline pgd_t *pgd_alloc (struct mm_struct *mm) +static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */ - pgd_t *pgd = pgd_alloc_one_fast(); + unsigned long *ret; - if (__builtin_expect(pgd == NULL, 0)) { - pgd = (pgd_t *)__get_free_page(GFP_KERNEL); - if (__builtin_expect(pgd != NULL, 1)) - clear_page(pgd); - } - return pgd; + if ((ret = pgd_quicklist) != NULL) { + pgd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } else + ret = (unsigned long *)get_pgd_slow(); + return (pgd_t *)ret; } static inline void pgd_free (pgd_t *pgd) { - *(unsigned long *)pgd = (unsigned long) read_pda(pgd_quick); - write_pda(pgd_quick,(unsigned long *) pgd); - inc_pgcache_size(); + *(unsigned long *)pgd = (unsigned long) pgd_quicklist; + pgd_quicklist = (unsigned long *) pgd; + pgtable_cache_size++; } -static inline void pgd_free_slow (pgd_t *pgd) +static inline void free_pgd_slow (pgd_t *pgd) { +#ifdef CONFIG_CHECKING if ((unsigned long)pgd & (PAGE_SIZE-1)) - out_of_line_bug(); + BUG(); +#endif free_page((unsigned long)pgd); } -static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) { - pte_t *pte; + struct list_head * entry = pte_quicklist.next; /* FIFO */ + struct page * page = NULL; - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pte) - clear_page(pte); - return pte; + if (entry != &pte_quicklist) { /* don't add a likely/unlikely here */ + list_del(entry); + page = list_entry(entry, struct page, list); + pgtable_cache_size--; + } + return page; } -extern __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline struct page * pte_alloc_one_fast_lifo(struct mm_struct *mm, + unsigned long address) { - unsigned long *ret; + struct list_head * entry = pte_quicklist.prev; /* LIFO */ + struct page * page = NULL; - if(__builtin_expect((ret = read_pda(pte_quick)) != NULL, !0)) { - write_pda(pte_quick, (unsigned long *)(*ret)); - ret[0] = ret[1]; - dec_pgcache_size(); + if (entry != &pte_quicklist) { + list_del(entry); + page = list_entry(entry, struct page, list); + pgtable_cache_size--; } - return (pte_t *)ret; + return page; } /* Should really implement gc for free page table pages. This could be done with a reference count in struct page. */ -extern __inline__ void pte_free(pte_t *pte) +static inline void pte_free(struct page * page) { - *(unsigned long *)pte = (unsigned long) read_pda(pte_quick); - write_pda(pte_quick, (unsigned long *) pte); - inc_pgcache_size(); + list_add(&page->list, &pte_quicklist); + pgtable_cache_size++; } -extern __inline__ void pte_free_slow(pte_t *pte) +static __inline__ void pte_free_slow(struct page * page) { - if ((unsigned long)pte & (PAGE_SIZE-1)) - out_of_line_bug(); - free_page((unsigned long)pte); + __free_page(page); } +static inline void pte_free_via_pmd(pmd_t pmd) +{ + pte_free(virt_to_page(pte_offset(&pmd, 0))); +} extern int do_check_pgt_cache(int, int); diff -urNp ref/include/asm-x86_64/pgtable.h 2.4.20pre5aa1/include/asm-x86_64/pgtable.h --- ref/include/asm-x86_64/pgtable.h Thu Aug 29 02:13:20 2002 +++ 2.4.20pre5aa1/include/asm-x86_64/pgtable.h Fri Aug 30 03:04:20 2002 @@ -337,6 +337,13 @@ static inline void ptep_mkdirty(pte_t *p set_pte(&__pte, __pte(__val)); \ __pte; \ }) +#define mk_pmd(page,pgprot) \ +({ \ + pmd_t __pmd; \ + \ + set_pmd(&__pmd, __pmd((((page)-mem_map) << PAGE_SHIFT) | pgprot_val(pgprot))); \ + __pmd; \ +}) /* This takes a physical page address that is used by the remapping functions */ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) @@ -379,6 +386,16 @@ extern inline pte_t pte_modify(pte_t pte #define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ __pte_offset(address)) +#define pte_offset2(dir, address) pte_offset(dir, address) +#define pte_offset_atomic(dir, address) pte_offset(dir, address) +#define pte_offset_atomic2(dir, address) pte_offset(dir, address) +#define pte_offset_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_offset2_under_lock(dir, address, mm) pte_offset(dir, address) +#define pte_kunmap(ptep) do { } while(0) +#define pte_kunmap2(ptep) do { } while(0) +#define pte_kunmap_atomic2(ptep) do { } while(0) +#define pte_alloc_atomic(mm, pmd, address) pte_alloc(mm, pmd, address) + /* never use these in the common code */ #define pml4_page(level4) ((unsigned long) __va(pml4_val(level4) & PAGE_MASK)) #define pml4_index(address) (((address) >> PML4_SHIFT) & (PTRS_PER_PML4-1)) diff -urNp ref/include/asm-x86_64/processor.h 2.4.20pre5aa1/include/asm-x86_64/processor.h --- ref/include/asm-x86_64/processor.h Thu Aug 29 02:13:20 2002 +++ 2.4.20pre5aa1/include/asm-x86_64/processor.h Fri Aug 30 03:04:20 2002 @@ -14,9 +14,11 @@ #include #include #include +#include #include #include #include +#include #define TF_MASK 0x00000100 #define IF_MASK 0x00000200 @@ -52,6 +54,11 @@ struct cpuinfo_x86 { int x86_clflush_size; int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ unsigned long loops_per_jiffy; + + unsigned long *pgd_quick; + unsigned long *pmd_quick; + struct list_head pte_quick; + unsigned long pgtable_cache_sz; } ____cacheline_aligned; #define X86_VENDOR_INTEL 0 @@ -69,7 +76,7 @@ extern struct tss_struct init_tss[NR_CPU #ifdef CONFIG_SMP extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +#define current_cpu_data (*read_pda(cpudata)) #else #define cpu_data (&boot_cpu_data) #define current_cpu_data boot_cpu_data @@ -395,13 +402,6 @@ extern inline void rep_nop(void) #define cpu_has_fpu 1 -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -#define prefetch(x) __builtin_prefetch((x),0) -#define prefetchw(x) __builtin_prefetch((x),1) -#define spin_lock_prefetch(x) prefetchw(x) #define cpu_relax() rep_nop() diff -urNp ref/include/asm-x86_64/system.h 2.4.20pre5aa1/include/asm-x86_64/system.h --- ref/include/asm-x86_64/system.h Thu Aug 29 02:13:20 2002 +++ 2.4.20pre5aa1/include/asm-x86_64/system.h Fri Aug 30 03:04:20 2002 @@ -239,7 +239,7 @@ static inline unsigned long __cmpxchg(vo #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) /* interrupt control.. */ -#define __save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) +#define __save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %0":"=g" (x): /* no input */ :"memory"); } while (0) #define __restore_flags(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") #define __cli() __asm__ __volatile__("cli": : :"memory") #define __sti() __asm__ __volatile__("sti": : :"memory") diff -urNp ref/include/asm-x86_64/unistd.h 2.4.20pre5aa1/include/asm-x86_64/unistd.h --- ref/include/asm-x86_64/unistd.h Thu Aug 29 02:13:20 2002 +++ 2.4.20pre5aa1/include/asm-x86_64/unistd.h Fri Aug 30 03:04:20 2002 @@ -592,7 +592,7 @@ static inline pid_t setsid(void) return sys_setsid(); } -extern ssize_t sys_write(unsigned int, char *, size_t); +extern asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count); static inline ssize_t write(unsigned int fd, char * buf, size_t count) { return sys_write(fd, buf, count);