Return-Path: X-Sieve: cmu-sieve 2.0 Return-path: Envelope-to: mbligh@localhost Delivery-date: Wed, 17 Mar 2004 14:24:41 -0800 Received: from w-mbligh.beaverton.ibm.com ([127.0.0.1] helo=mail.aracnet.com ident=mbligh) by w-mbligh.beaverton.ibm.com with esmtp (Exim 3.35 #1 (Debian)) id 1B3jSq-0002xN-00 for ; Wed, 17 Mar 2004 14:24:40 -0800 Received: from psmtp.com (exprod5mx105.postini.com [12.158.34.61]) by citrine.spiritone.com (8.12.10/8.12.8) with SMTP id i2HMP9HT029115 for ; Wed, 17 Mar 2004 14:25:09 -0800 Delivered-To: Received: from source ([32.97.182.106]) by exprod5mx105.postini.com ([12.158.34.245]) with SMTP; Wed, 17 Mar 2004 17:21:22 EST Received: from northrelay02.pok.ibm.com (northrelay02.pok.ibm.com [9.56.224.150]) by e6.ny.us.ibm.com (8.12.10/8.12.2) with ESMTP id i2HMLH4i534814 for ; Wed, 17 Mar 2004 17:21:17 -0500 Received: from DYN317989BLD.beaverton.ibm.com (d01av02.pok.ibm.com [9.56.224.216]) by northrelay02.pok.ibm.com (8.12.10/NCO/VER6.6) with ESMTP id i2HMLFEW128918 for ; Wed, 17 Mar 2004 17:21:16 -0500 Subject: 2.6.4-mjb1 : 780-hugetlb_dyn_as From: Adam Litke To: Martin Bligh Content-Type: text/plain Organization: IBM Message-Id: <1079561743.5231.4.camel@agtpad> Mime-Version: 1.0 X-Mailer: Ximian Evolution 1.4.5 Date: Wed, 17 Mar 2004 14:15:43 -0800 Content-Transfer-Encoding: 7bit X-Accept: 2.6 or must-fix diff -upN reference/arch/ppc64/kernel/setup.c current/arch/ppc64/kernel/setup.c --- reference/arch/ppc64/kernel/setup.c 2004-03-30 10:13:21.000000000 -0800 +++ current/arch/ppc64/kernel/setup.c 2004-03-31 12:32:13.000000000 -0800 @@ -602,6 +602,10 @@ void __init setup_arch(char **cmdline_p) init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; +#ifdef CONFIG_HUGETLBFS + init_mm.context.hugetlb_end = TASK_HPAGE_END_32; + init_mm.context.hugetlb_base = TASK_HPAGE_END_32; +#endif /* Save unparsed command line copy for /proc/cmdline */ strlcpy(saved_command_line, cmd_line, sizeof(saved_command_line)); diff -upN reference/arch/ppc64/mm/hugetlbpage.c current/arch/ppc64/mm/hugetlbpage.c --- reference/arch/ppc64/mm/hugetlbpage.c 2004-03-31 12:32:13.000000000 -0800 +++ current/arch/ppc64/mm/hugetlbpage.c 2004-03-31 12:32:13.000000000 -0800 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +237,23 @@ static void do_slbia(void *unused) asm volatile ("isync; slbia; isync":::"memory"); } +/* Returns the correct ending address for a process' hugetlb region */ +static inline unsigned long +get_hugetlb_area_end(struct task_struct *task) +{ + u32 stack_end, default_end = TASK_HPAGE_END_32; + + /* + * We use rlim_cur so that unprivileged applications can signal + * our code using ulimit. + */ + stack_end = 0xffffffff - task->rlim[RLIMIT_STACK].rlim_cur; + stack_end = min(default_end, stack_end); + + /* Boundary must be segment aligned */ + return _ALIGN_DOWN(stack_end, MM_SEGMENT_SIZE); +} + /* Activate the low hpage region for 32bit processes. mmap_sem must * be held*/ static int open_32bit_htlbpage_range(struct mm_struct *mm) @@ -246,15 +264,19 @@ static int open_32bit_htlbpage_range(str if (mm->context.low_hpages) return 0; /* The window is already open */ + /* Set up the area boundaries */ + mm->context.hugetlb_end = get_hugetlb_area_end(current); + mm->context.hugetlb_base = mm->context.hugetlb_end; + /* Check no VMAs are in the region */ - vma = find_vma(mm, TASK_HPAGE_BASE_32); + vma = find_vma(mm, mm->context.hugetlb_base); - if (vma && (vma->vm_start < TASK_HPAGE_END_32)) + if (vma && (vma->vm_start < mm->context.hugetlb_end)) return -EBUSY; /* Clean up any leftover PTE pages in the region */ spin_lock(&mm->page_table_lock); - for (addr = TASK_HPAGE_BASE_32; addr < TASK_HPAGE_END_32; + for (addr = mm->context.hugetlb_base; addr < mm->context.hugetlb_end; addr += PMD_SIZE) { pgd_t *pgd = pgd_offset(mm, addr); pmd_t *pmd = pmd_offset(pgd, addr); @@ -586,8 +608,8 @@ full_search: } if (!vma || addr + len <= vma->vm_start) { if (is_hugepage_only_range(addr, len)) { - if (addr < TASK_HPAGE_END_32) - addr = TASK_HPAGE_END_32; + if (addr < mm->context.hugetlb_end) + addr = mm->context.hugetlb_end; else addr = TASK_HPAGE_END; @@ -603,6 +625,32 @@ full_search: } } +unsigned long grow_hugetlb_region(unsigned long hpage_base, unsigned long len) +{ + struct vm_area_struct *vma = NULL; + unsigned long new_base, vma_start = hpage_base; + + vma = find_vma(current->mm, vma_start); + vma_start = (vma && vma->vm_start < current->mm->context.hugetlb_end) ? + vma->vm_start : current->mm->context.hugetlb_end; + printk("First vma in hugetlb region starts at: %lx\n", vma_start); + + new_base = _ALIGN_DOWN(vma_start - len, MM_SEGMENT_SIZE); + if (new_base < TASK_HPAGE_BASE_32) + return -ENOMEM; + + printk("Try to move hugetlb_base down to: %lx\n", new_base); + vma = find_vma(current->mm, new_base); + if (vma && vma->vm_start < hpage_base) { + printk("Found vma at %lx aborting\n", vma->vm_start); + return -ENOMEM; + } + + current->mm->context.hugetlb_base = new_base; + printk("Area clean returning an area at: %lx\n", vma_start-len); + return vma_start - len; +} + unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -623,8 +671,8 @@ unsigned long hugetlb_get_unmapped_area( if (err) return err; /* Should this just be EINVAL? */ - base = TASK_HPAGE_BASE_32; - end = TASK_HPAGE_END_32; + base = current->mm->context.hugetlb_base; + end = current->mm->context.hugetlb_end; } else { base = TASK_HPAGE_BASE; end = TASK_HPAGE_END; @@ -637,6 +685,10 @@ unsigned long hugetlb_get_unmapped_area( for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (addr + len > end) { + /* Try to get the space by expanding the hugetlb region */ + addr = grow_hugetlb_region(base, len); + if (addr > 0) + return addr; if (test_thread_flag(TIF_32BIT)) close_32bit_htlbpage_range(current->mm); return -ENOMEM; diff -upN reference/include/asm-ppc64/mmu.h current/include/asm-ppc64/mmu.h --- reference/include/asm-ppc64/mmu.h 2004-03-11 14:35:23.000000000 -0800 +++ current/include/asm-ppc64/mmu.h 2004-03-31 12:32:13.000000000 -0800 @@ -24,6 +24,8 @@ typedef struct { mm_context_id_t id; #ifdef CONFIG_HUGETLB_PAGE int low_hpages; + unsigned long hugetlb_base; + unsigned long hugetlb_end; #endif } mm_context_t; @@ -191,6 +193,8 @@ void create_valid_hpte( unsigned long sl #define LARGE_PAGE_SHIFT 24 +#define MM_SEGMENT_SIZE (1UL << 28) + static inline unsigned long hpt_hash(unsigned long vpn, int large) { unsigned long vsid; diff -upN reference/include/asm-ppc64/page.h current/include/asm-ppc64/page.h --- reference/include/asm-ppc64/page.h 2004-03-11 14:35:23.000000000 -0800 +++ current/include/asm-ppc64/page.h 2004-03-31 12:32:13.000000000 -0800 @@ -33,15 +33,21 @@ #define TASK_HPAGE_BASE (0x0000010000000000UL) #define TASK_HPAGE_END (0x0000018000000000UL) -/* For 32-bit processes the hugepage range is 2-3G */ -#define TASK_HPAGE_BASE_32 (0x80000000UL) -#define TASK_HPAGE_END_32 (0xc0000000UL) +/* + * We have much greater contention for segments in a + * 32-bit address space. Therefore, the region reserved + * for huge pages is dynamically resized. These values + * define the maximum range allowed for huge pages. + */ +#define TASK_HPAGE_BASE_32 (0x40000000UL) +#define TASK_HPAGE_END_32 (0xf0000000UL) #define ARCH_HAS_HUGEPAGE_ONLY_RANGE #define is_hugepage_only_range(addr, len) \ ( ((addr > (TASK_HPAGE_BASE-len)) && (addr < TASK_HPAGE_END)) || \ (current->mm->context.low_hpages && \ - (addr > (TASK_HPAGE_BASE_32-len)) && (addr < TASK_HPAGE_END_32)) ) + (addr > (current->mm->context.hugetlb_base-len)) && \ + (addr < current->mm->context.hugetlb_end)) ) #define hugetlb_free_pgtables free_pgtables #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA @@ -49,7 +55,7 @@ ((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) && \ ((((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \ ((context).low_hpages && \ - (((addr) >= TASK_HPAGE_BASE_32) && ((addr) < TASK_HPAGE_END_32))))) + (((addr) >= context.hugetlb_base) && ((addr) < context.hugetlb_end))))) #else /* !CONFIG_HUGETLB_PAGE */ diff -upN reference/include/linux/sched.h current/include/linux/sched.h --- reference/include/linux/sched.h 2004-03-31 12:32:13.000000000 -0800 +++ current/include/linux/sched.h 2004-03-31 12:32:13.000000000 -0800 @@ -500,6 +500,7 @@ struct task_struct { unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ + unsigned long fault_count; }; static inline pid_t process_group(struct task_struct *tsk)