diff options
author | Paul Gortmaker <paul.gortmaker@windriver.com> | 2018-02-23 12:21:27 -0500 |
---|---|---|
committer | Paul Gortmaker <paul.gortmaker@windriver.com> | 2018-02-23 12:21:27 -0500 |
commit | a26b7c40d42a51871392ca1d957d403a565e6259 (patch) | |
tree | d6bcd8ec0be5af76a527ab1378ed5b93ac70f258 | |
parent | e95ca72a966ab15bedc87aca7bcef9b22a5eef78 (diff) | |
download | longterm-queue-4.8-a26b7c40d42a51871392ca1d957d403a565e6259.tar.gz |
4.9 backports to help enable pending CVE fixes
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
30 files changed, 4584 insertions, 0 deletions
diff --git a/queue/EXPORT_SYMBOL-for-asm.patch b/queue/EXPORT_SYMBOL-for-asm.patch new file mode 100644 index 0000000..03a85f2 --- /dev/null +++ b/queue/EXPORT_SYMBOL-for-asm.patch @@ -0,0 +1,161 @@ +From 22823ab419d8ed884195cfa75483fd3a99bb1462 Mon Sep 17 00:00:00 2001 +From: Al Viro <viro@zeniv.linux.org.uk> +Date: Mon, 11 Jan 2016 10:54:54 -0500 +Subject: [PATCH] EXPORT_SYMBOL() for asm + +commit 22823ab419d8ed884195cfa75483fd3a99bb1462 upstream. + +Add asm-usable variants of EXPORT_SYMBOL/EXPORT_SYMBOL_GPL. This +commit just adds the default implementation; most of the architectures +can simply add export.h to asm/Kbuild and start using <asm/export.h> +from assembler. The rest needs to have their <asm/export.h> define +everal macros and then explicitly include <asm-generic/export.h> + +One area where the things might diverge from default is the alignment; +normally it's 8 bytes on 64bit targets and 4 on 32bit ones, both for +unsigned long and for struct kernel_symbol. Unfortunately, amd64 and +m68k are unusual - m68k aligns to 2 bytes (for both) and amd64 aligns +struct kernel_symbol to 16 bytes. For those we'll need asm/export.h to +override the constants used by generic version - KSYM_ALIGN and KCRC_ALIGN +for kernel_symbol and unsigned long resp. And no, __alignof__ would not +do the trick - on amd64 __alignof__ of struct kernel_symbol is 8, not 16. + +More serious source of unpleasantness is treatment of function +descriptors on architectures that have those. Things like ppc64, +parisc, ia64, etc. need more than the address of the first insn to +call an arbitrary function. As the result, their representation of +pointers to functions is not the typical "address of the entry point" - +it's an address of a small static structure containing all the required +information (including the entry point, of course). Sadly, the asm-side +conventions differ in what the function name refers to - entry point or +the function descriptor. On ppc64 we do the latter; + bar: .quad foo +is what void (*bar)(void) = foo; turns into and the rare places where +we need to explicitly work with the label of entry point are dealt with +as DOTSYM(foo). For our purposes it's ideal - generic macros are usable. +However, parisc would have foo and P%foo used for label of entry point +and address of the function descriptor and + bar: .long P%foo +woudl be used instead. ia64 goes similar to parisc in that respect, +except that there it's @fptr(foo) rather than P%foo. Such architectures +need to define KSYM_FUNC that would turn a function name into whatever +is needed to refer to function descriptor. + +What's more, on such architectures we need to know whether we are exporting +a function or an object - in assembler we have to tell that explicitly, to +decide whether we want EXPORT_SYMBOL(foo) produce e.g. + __ksymtab_foo: .quad foo +or + __ksymtab_foo: .quad @fptr(foo) + +For that reason we introduce EXPORT_DATA_SYMBOL{,_GPL}(), to be used for +exports of data objects. On normal architectures it's the same thing +as EXPORT_SYMBOL{,_GPL}(), but on parisc-like ones they differ and the +right one needs to be used. Most of the exports are functions, so we +keep EXPORT_SYMBOL for those... + +Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> + +diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h +new file mode 100644 +index 000000000000..43199a049da5 +--- /dev/null ++++ b/include/asm-generic/export.h +@@ -0,0 +1,94 @@ ++#ifndef __ASM_GENERIC_EXPORT_H ++#define __ASM_GENERIC_EXPORT_H ++ ++#ifndef KSYM_FUNC ++#define KSYM_FUNC(x) x ++#endif ++#ifdef CONFIG_64BIT ++#define __put .quad ++#ifndef KSYM_ALIGN ++#define KSYM_ALIGN 8 ++#endif ++#ifndef KCRC_ALIGN ++#define KCRC_ALIGN 8 ++#endif ++#else ++#define __put .long ++#ifndef KSYM_ALIGN ++#define KSYM_ALIGN 4 ++#endif ++#ifndef KCRC_ALIGN ++#define KCRC_ALIGN 4 ++#endif ++#endif ++ ++#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX ++#define KSYM(name) _##name ++#else ++#define KSYM(name) name ++#endif ++ ++/* ++ * note on .section use: @progbits vs %progbits nastiness doesn't matter, ++ * since we immediately emit into those sections anyway. ++ */ ++.macro ___EXPORT_SYMBOL name,val,sec ++#ifdef CONFIG_MODULES ++ .globl KSYM(__ksymtab_\name) ++ .section ___ksymtab\sec+\name,"a" ++ .balign KSYM_ALIGN ++KSYM(__ksymtab_\name): ++ __put \val, KSYM(__kstrtab_\name) ++ .previous ++ .section __ksymtab_strings,"a" ++KSYM(__kstrtab_\name): ++#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX ++ .asciz "_\name" ++#else ++ .asciz "\name" ++#endif ++ .previous ++#ifdef CONFIG_MODVERSIONS ++ .section ___kcrctab\sec+\name,"a" ++ .balign KCRC_ALIGN ++KSYM(__kcrctab_\name): ++ __put KSYM(__crc_\name) ++ .weak KSYM(__crc_\name) ++ .previous ++#endif ++#endif ++.endm ++#undef __put ++ ++#if defined(__KSYM_DEPS__) ++ ++#define __EXPORT_SYMBOL(sym, val, sec) === __KSYM_##sym === ++ ++#elif defined(CONFIG_TRIM_UNUSED_KSYMS) ++ ++#include <linux/kconfig.h> ++#include <generated/autoksyms.h> ++ ++#define __EXPORT_SYMBOL(sym, val, sec) \ ++ __cond_export_sym(sym, val, sec, config_enabled(__KSYM_##sym)) ++#define __cond_export_sym(sym, val, sec, conf) \ ++ ___cond_export_sym(sym, val, sec, conf) ++#define ___cond_export_sym(sym, val, sec, enabled) \ ++ __cond_export_sym_##enabled(sym, val, sec) ++#define __cond_export_sym_1(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec ++#define __cond_export_sym_0(sym, val, sec) /* nothing */ ++ ++#else ++#define __EXPORT_SYMBOL(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec ++#endif ++ ++#define EXPORT_SYMBOL(name) \ ++ __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)),) ++#define EXPORT_SYMBOL_GPL(name) \ ++ __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl) ++#define EXPORT_DATA_SYMBOL(name) \ ++ __EXPORT_SYMBOL(name, KSYM(name),) ++#define EXPORT_DATA_SYMBOL_GPL(name) \ ++ __EXPORT_SYMBOL(name, KSYM(name),_gpl) ++ ++#endif +-- +2.15.0 + diff --git a/queue/fork-Add-generic-vmalloced-stack-support.patch b/queue/fork-Add-generic-vmalloced-stack-support.patch new file mode 100644 index 0000000..e36cebb --- /dev/null +++ b/queue/fork-Add-generic-vmalloced-stack-support.patch @@ -0,0 +1,292 @@ +From ba14a194a434ccc8f733e263ad2ce941e35e5787 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 11 Aug 2016 02:35:21 -0700 +Subject: [PATCH] fork: Add generic vmalloced stack support + +commit ba14a194a434ccc8f733e263ad2ce941e35e5787 upstream. + +If CONFIG_VMAP_STACK=y is selected, kernel stacks are allocated with +__vmalloc_node_range(). + +Grsecurity has had a similar feature (called GRKERNSEC_KSTACKOVERFLOW=y) +for a long time. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Acked-by: Michal Hocko <mhocko@suse.com> +Cc: Alexander Potapenko <glider@google.com> +Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: Dmitry Vyukov <dvyukov@google.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Oleg Nesterov <oleg@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/14c07d4fd173a5b117f51e8b939f9f4323e39899.1470907718.git.luto@kernel.org +[ Minor edits. ] +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/Kconfig b/arch/Kconfig +index e9c9334507dd..9ecf9f6f9e15 100644 +--- a/arch/Kconfig ++++ b/arch/Kconfig +@@ -707,4 +707,38 @@ config ARCH_NO_COHERENT_DMA_MMAP + config CPU_NO_EFFICIENT_FFS + def_bool n + ++config HAVE_ARCH_VMAP_STACK ++ def_bool n ++ help ++ An arch should select this symbol if it can support kernel stacks ++ in vmalloc space. This means: ++ ++ - vmalloc space must be large enough to hold many kernel stacks. ++ This may rule out many 32-bit architectures. ++ ++ - Stacks in vmalloc space need to work reliably. For example, if ++ vmap page tables are created on demand, either this mechanism ++ needs to work while the stack points to a virtual address with ++ unpopulated page tables or arch code (switch_to() and switch_mm(), ++ most likely) needs to ensure that the stack's page table entries ++ are populated before running on a possibly unpopulated stack. ++ ++ - If the stack overflows into a guard page, something reasonable ++ should happen. The definition of "reasonable" is flexible, but ++ instantly rebooting without logging anything would be unfriendly. ++ ++config VMAP_STACK ++ default y ++ bool "Use a virtually-mapped stack" ++ depends on HAVE_ARCH_VMAP_STACK && !KASAN ++ ---help--- ++ Enable this if you want the use virtually-mapped kernel stacks ++ with guard pages. This causes kernel stack overflows to be ++ caught immediately rather than causing difficult-to-diagnose ++ corruption. ++ ++ This is presently incompatible with KASAN because KASAN expects ++ the stack to map directly to the KASAN shadow map using a formula ++ that is incorrect if the stack is in vmalloc space. ++ + source "kernel/gcov/Kconfig" +diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h +index 29bd59790d6c..c7026429816b 100644 +--- a/arch/ia64/include/asm/thread_info.h ++++ b/arch/ia64/include/asm/thread_info.h +@@ -56,7 +56,7 @@ struct thread_info { + #define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) + #define task_thread_info(tsk) ((struct thread_info *) 0) + #endif +-#define free_thread_stack(ti) /* nothing */ ++#define free_thread_stack(tsk) /* nothing */ + #define task_stack_page(tsk) ((void *)(tsk)) + + #define __HAVE_THREAD_FUNCTIONS +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 62c68e513e39..20f9f47bcfd0 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1923,6 +1923,9 @@ struct task_struct { + #ifdef CONFIG_MMU + struct task_struct *oom_reaper_list; + #endif ++#ifdef CONFIG_VMAP_STACK ++ struct vm_struct *stack_vm_area; ++#endif + /* CPU-specific state of this task */ + struct thread_struct thread; + /* +@@ -1939,6 +1942,18 @@ extern int arch_task_struct_size __read_mostly; + # define arch_task_struct_size (sizeof(struct task_struct)) + #endif + ++#ifdef CONFIG_VMAP_STACK ++static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) ++{ ++ return t->stack_vm_area; ++} ++#else ++static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) ++{ ++ return NULL; ++} ++#endif ++ + /* Future-safe accessor for struct task_struct's cpus_allowed. */ + #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) + +diff --git a/kernel/fork.c b/kernel/fork.c +index 52e725d4a866..9b85f6b2cdcd 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -158,19 +158,39 @@ void __weak arch_release_thread_stack(unsigned long *stack) + * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a + * kmemcache based allocator. + */ +-# if THREAD_SIZE >= PAGE_SIZE +-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, +- int node) ++# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) ++static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) + { ++#ifdef CONFIG_VMAP_STACK ++ void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, ++ VMALLOC_START, VMALLOC_END, ++ THREADINFO_GFP | __GFP_HIGHMEM, ++ PAGE_KERNEL, ++ 0, node, ++ __builtin_return_address(0)); ++ ++ /* ++ * We can't call find_vm_area() in interrupt context, and ++ * free_thread_stack() can be called in interrupt context, ++ * so cache the vm_struct. ++ */ ++ if (stack) ++ tsk->stack_vm_area = find_vm_area(stack); ++ return stack; ++#else + struct page *page = alloc_pages_node(node, THREADINFO_GFP, + THREAD_SIZE_ORDER); + + return page ? page_address(page) : NULL; ++#endif + } + +-static inline void free_thread_stack(unsigned long *stack) ++static inline void free_thread_stack(struct task_struct *tsk) + { +- __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER); ++ if (task_stack_vm_area(tsk)) ++ vfree(tsk->stack); ++ else ++ __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); + } + # else + static struct kmem_cache *thread_stack_cache; +@@ -181,9 +201,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, + return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); + } + +-static void free_thread_stack(unsigned long *stack) ++static void free_thread_stack(struct task_struct *tsk) + { +- kmem_cache_free(thread_stack_cache, stack); ++ kmem_cache_free(thread_stack_cache, tsk->stack); + } + + void thread_stack_cache_init(void) +@@ -213,24 +233,47 @@ struct kmem_cache *vm_area_cachep; + /* SLAB cache for mm_struct structures (tsk->mm) */ + static struct kmem_cache *mm_cachep; + +-static void account_kernel_stack(unsigned long *stack, int account) ++static void account_kernel_stack(struct task_struct *tsk, int account) + { +- /* All stack pages are in the same zone and belong to the same memcg. */ +- struct page *first_page = virt_to_page(stack); ++ void *stack = task_stack_page(tsk); ++ struct vm_struct *vm = task_stack_vm_area(tsk); ++ ++ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); ++ ++ if (vm) { ++ int i; ++ ++ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); ++ ++ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { ++ mod_zone_page_state(page_zone(vm->pages[i]), ++ NR_KERNEL_STACK_KB, ++ PAGE_SIZE / 1024 * account); ++ } + +- mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, +- THREAD_SIZE / 1024 * account); ++ /* All stack pages belong to the same memcg. */ ++ memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB, ++ account * (THREAD_SIZE / 1024)); ++ } else { ++ /* ++ * All stack pages are in the same zone and belong to the ++ * same memcg. ++ */ ++ struct page *first_page = virt_to_page(stack); ++ ++ mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, ++ THREAD_SIZE / 1024 * account); + +- memcg_kmem_update_page_stat( +- first_page, MEMCG_KERNEL_STACK_KB, +- account * (THREAD_SIZE / 1024)); ++ memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB, ++ account * (THREAD_SIZE / 1024)); ++ } + } + + void free_task(struct task_struct *tsk) + { +- account_kernel_stack(tsk->stack, -1); ++ account_kernel_stack(tsk, -1); + arch_release_thread_stack(tsk->stack); +- free_thread_stack(tsk->stack); ++ free_thread_stack(tsk); + rt_mutex_debug_task_free(tsk); + ftrace_graph_exit_task(tsk); + put_seccomp_filter(tsk); +@@ -342,6 +385,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + { + struct task_struct *tsk; + unsigned long *stack; ++ struct vm_struct *stack_vm_area; + int err; + + if (node == NUMA_NO_NODE) +@@ -354,11 +398,23 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + if (!stack) + goto free_tsk; + ++ stack_vm_area = task_stack_vm_area(tsk); ++ + err = arch_dup_task_struct(tsk, orig); ++ ++ /* ++ * arch_dup_task_struct() clobbers the stack-related fields. Make ++ * sure they're properly initialized before using any stack-related ++ * functions again. ++ */ ++ tsk->stack = stack; ++#ifdef CONFIG_VMAP_STACK ++ tsk->stack_vm_area = stack_vm_area; ++#endif ++ + if (err) + goto free_stack; + +- tsk->stack = stack; + #ifdef CONFIG_SECCOMP + /* + * We must handle setting up seccomp filters once we're under +@@ -390,14 +446,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + tsk->task_frag.page = NULL; + tsk->wake_q.next = NULL; + +- account_kernel_stack(stack, 1); ++ account_kernel_stack(tsk, 1); + + kcov_task_init(tsk); + + return tsk; + + free_stack: +- free_thread_stack(stack); ++ free_thread_stack(tsk); + free_tsk: + free_task_struct(tsk); + return NULL; +-- +2.15.0 + diff --git a/queue/fork-Optimize-task-creation-by-caching-two-thread-st.patch b/queue/fork-Optimize-task-creation-by-caching-two-thread-st.patch new file mode 100644 index 0000000..5ccc7e0 --- /dev/null +++ b/queue/fork-Optimize-task-creation-by-caching-two-thread-st.patch @@ -0,0 +1,121 @@ +From ac496bf48d97f2503eaa353996a4dd5e4383eaf0 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 15 Sep 2016 22:45:49 -0700 +Subject: [PATCH] fork: Optimize task creation by caching two thread stacks per + CPU if CONFIG_VMAP_STACK=y +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit ac496bf48d97f2503eaa353996a4dd5e4383eaf0 upstream. + +vmalloc() is a bit slow, and pounding vmalloc()/vfree() will eventually +force a global TLB flush. + +To reduce pressure on them, if CONFIG_VMAP_STACK=y, cache two thread +stacks per CPU. This will let us quickly allocate a hopefully +cache-hot, TLB-hot stack under heavy forking workloads (shell script style). + +On my silly pthread_create() benchmark, it saves about 2 µs per +pthread_create()+join() with CONFIG_VMAP_STACK=y. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/94811d8e3994b2e962f88866290017d498eb069c.1474003868.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/kernel/fork.c b/kernel/fork.c +index 5dd0a516626d..c060c7e7c247 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -159,15 +159,41 @@ void __weak arch_release_thread_stack(unsigned long *stack) + * kmemcache based allocator. + */ + # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) ++ ++#ifdef CONFIG_VMAP_STACK ++/* ++ * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB ++ * flush. Try to minimize the number of calls by caching stacks. ++ */ ++#define NR_CACHED_STACKS 2 ++static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); ++#endif ++ + static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) + { + #ifdef CONFIG_VMAP_STACK +- void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, +- VMALLOC_START, VMALLOC_END, +- THREADINFO_GFP | __GFP_HIGHMEM, +- PAGE_KERNEL, +- 0, node, +- __builtin_return_address(0)); ++ void *stack; ++ int i; ++ ++ local_irq_disable(); ++ for (i = 0; i < NR_CACHED_STACKS; i++) { ++ struct vm_struct *s = this_cpu_read(cached_stacks[i]); ++ ++ if (!s) ++ continue; ++ this_cpu_write(cached_stacks[i], NULL); ++ ++ tsk->stack_vm_area = s; ++ local_irq_enable(); ++ return s->addr; ++ } ++ local_irq_enable(); ++ ++ stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, ++ VMALLOC_START, VMALLOC_END, ++ THREADINFO_GFP | __GFP_HIGHMEM, ++ PAGE_KERNEL, ++ 0, node, __builtin_return_address(0)); + + /* + * We can't call find_vm_area() in interrupt context, and +@@ -187,10 +213,28 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) + + static inline void free_thread_stack(struct task_struct *tsk) + { +- if (task_stack_vm_area(tsk)) ++#ifdef CONFIG_VMAP_STACK ++ if (task_stack_vm_area(tsk)) { ++ unsigned long flags; ++ int i; ++ ++ local_irq_save(flags); ++ for (i = 0; i < NR_CACHED_STACKS; i++) { ++ if (this_cpu_read(cached_stacks[i])) ++ continue; ++ ++ this_cpu_write(cached_stacks[i], tsk->stack_vm_area); ++ local_irq_restore(flags); ++ return; ++ } ++ local_irq_restore(flags); ++ + vfree(tsk->stack); +- else +- __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); ++ return; ++ } ++#endif ++ ++ __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); + } + # else + static struct kmem_cache *thread_stack_cache; +-- +2.15.0 + diff --git a/queue/locking-static_keys-Provide-DECLARE-and-well-as-DEFI.patch b/queue/locking-static_keys-Provide-DECLARE-and-well-as-DEFI.patch new file mode 100644 index 0000000..9539d43 --- /dev/null +++ b/queue/locking-static_keys-Provide-DECLARE-and-well-as-DEFI.patch @@ -0,0 +1,42 @@ +From b8fb03785d4de097507d0cf45873525e0ac4d2b2 Mon Sep 17 00:00:00 2001 +From: Tony Luck <tony.luck@intel.com> +Date: Thu, 1 Sep 2016 11:39:33 -0700 +Subject: [PATCH] locking/static_keys: Provide DECLARE and well as DEFINE + macros + +commit b8fb03785d4de097507d0cf45873525e0ac4d2b2 upstream. + +We will need to provide declarations of static keys in header +files. Provide DECLARE_STATIC_KEY_{TRUE,FALSE} macros. + +Signed-off-by: Tony Luck <tony.luck@intel.com> +Acked-by: Borislav Petkov <bp@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Link: http://lkml.kernel.org/r/816881cf85bd3cf13385d212882618f38a3b5d33.1472754711.git.tony.luck@intel.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h +index 661af564fae8..595fb46213fc 100644 +--- a/include/linux/jump_label.h ++++ b/include/linux/jump_label.h +@@ -267,9 +267,15 @@ struct static_key_false { + #define DEFINE_STATIC_KEY_TRUE(name) \ + struct static_key_true name = STATIC_KEY_TRUE_INIT + ++#define DECLARE_STATIC_KEY_TRUE(name) \ ++ extern struct static_key_true name ++ + #define DEFINE_STATIC_KEY_FALSE(name) \ + struct static_key_false name = STATIC_KEY_FALSE_INIT + ++#define DECLARE_STATIC_KEY_FALSE(name) \ ++ extern struct static_key_false name ++ + extern bool ____wrong_branch_error(void); + + #define static_key_enabled(x) \ +-- +2.15.0 + diff --git a/queue/sched-core-Add-try_get_task_stack-and-put_task_stack.patch b/queue/sched-core-Add-try_get_task_stack-and-put_task_stack.patch new file mode 100644 index 0000000..513a9ea --- /dev/null +++ b/queue/sched-core-Add-try_get_task_stack-and-put_task_stack.patch @@ -0,0 +1,81 @@ +From c6c314a613cd7d03fb97713e0d642b493de42e69 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 15 Sep 2016 22:45:43 -0700 +Subject: [PATCH] sched/core: Add try_get_task_stack() and put_task_stack() + +commit c6c314a613cd7d03fb97713e0d642b493de42e69 upstream. + +There are a few places in the kernel that access stack memory +belonging to a different task. Before we can start freeing task +stacks before the task_struct is freed, we need a way for those code +paths to pin the stack. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/17a434f50ad3d77000104f21666575e10a9c1fbd.1474003868.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index a287e8b13549..a95867267e9f 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -3094,11 +3094,19 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) + { + return &task->thread_info; + } ++ ++/* ++ * When accessing the stack of a non-current task that might exit, use ++ * try_get_task_stack() instead. task_stack_page will return a pointer ++ * that could get freed out from under you. ++ */ + static inline void *task_stack_page(const struct task_struct *task) + { + return task->stack; + } ++ + #define setup_thread_stack(new,old) do { } while(0) ++ + static inline unsigned long *end_of_stack(const struct task_struct *task) + { + return task->stack; +@@ -3134,6 +3142,14 @@ static inline unsigned long *end_of_stack(struct task_struct *p) + } + + #endif ++ ++static inline void *try_get_task_stack(struct task_struct *tsk) ++{ ++ return task_stack_page(tsk); ++} ++ ++static inline void put_task_stack(struct task_struct *tsk) {} ++ + #define task_stack_end_corrupted(task) \ + (*(end_of_stack(task)) != STACK_END_MAGIC) + +diff --git a/init/Kconfig b/init/Kconfig +index ec8d43894b02..3b9a47fe843b 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -33,6 +33,9 @@ config THREAD_INFO_IN_TASK + make this work, an arch will need to remove all thread_info fields + except flags and fix any runtime bugs. + ++ One subtle change that will be needed is to use try_get_task_stack() ++ and put_task_stack() in save_thread_stack_tsk() and get_wchan(). ++ + menu "General setup" + + config BROKEN +-- +2.15.0 + diff --git a/queue/sched-core-Allow-putting-thread_info-into-task_struc.patch b/queue/sched-core-Allow-putting-thread_info-into-task_struc.patch new file mode 100644 index 0000000..c584e03 --- /dev/null +++ b/queue/sched-core-Allow-putting-thread_info-into-task_struc.patch @@ -0,0 +1,219 @@ +From c65eacbe290b8141554c71b2c94489e73ade8c8d Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Tue, 13 Sep 2016 14:29:24 -0700 +Subject: [PATCH] sched/core: Allow putting thread_info into task_struct + +commit c65eacbe290b8141554c71b2c94489e73ade8c8d upstream. + +If an arch opts in by setting CONFIG_THREAD_INFO_IN_TASK_STRUCT, +then thread_info is defined as a single 'u32 flags' and is the first +entry of task_struct. thread_info::task is removed (it serves no +purpose if thread_info is embedded in task_struct), and +thread_info::cpu gets its own slot in task_struct. + +This is heavily based on a patch written by Linus. + +Originally-from: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/a0898196f0476195ca02713691a5037a14f2aac5.1473801993.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/include/linux/init_task.h b/include/linux/init_task.h +index f8834f820ec2..9c04d44eeb3c 100644 +--- a/include/linux/init_task.h ++++ b/include/linux/init_task.h +@@ -15,6 +15,8 @@ + #include <net/net_namespace.h> + #include <linux/sched/rt.h> + ++#include <asm/thread_info.h> ++ + #ifdef CONFIG_SMP + # define INIT_PUSHABLE_TASKS(tsk) \ + .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), +@@ -183,12 +185,19 @@ extern struct task_group root_task_group; + # define INIT_KASAN(tsk) + #endif + ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), ++#else ++# define INIT_TASK_TI(tsk) ++#endif ++ + /* + * INIT_TASK is used to set up the first task table, touch at + * your own risk!. Base=0, limit=0x1fffff (=2MB) + */ + #define INIT_TASK(tsk) \ + { \ ++ INIT_TASK_TI(tsk) \ + .state = 0, \ + .stack = init_stack, \ + .usage = ATOMIC_INIT(2), \ +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 20f9f47bcfd0..a287e8b13549 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1458,6 +1458,13 @@ struct tlbflush_unmap_batch { + }; + + struct task_struct { ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ /* ++ * For reasons of header soup (see current_thread_info()), this ++ * must be the first element of task_struct. ++ */ ++ struct thread_info thread_info; ++#endif + volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ + void *stack; + atomic_t usage; +@@ -1467,6 +1474,9 @@ struct task_struct { + #ifdef CONFIG_SMP + struct llist_node wake_entry; + int on_cpu; ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ unsigned int cpu; /* current CPU */ ++#endif + unsigned int wakee_flips; + unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; +@@ -2588,7 +2598,9 @@ extern void set_curr_task(int cpu, struct task_struct *p); + void yield(void); + + union thread_union { ++#ifndef CONFIG_THREAD_INFO_IN_TASK + struct thread_info thread_info; ++#endif + unsigned long stack[THREAD_SIZE/sizeof(long)]; + }; + +@@ -3076,10 +3088,26 @@ static inline void threadgroup_change_end(struct task_struct *tsk) + cgroup_threadgroup_change_end(tsk); + } + +-#ifndef __HAVE_THREAD_FUNCTIONS ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ ++static inline struct thread_info *task_thread_info(struct task_struct *task) ++{ ++ return &task->thread_info; ++} ++static inline void *task_stack_page(const struct task_struct *task) ++{ ++ return task->stack; ++} ++#define setup_thread_stack(new,old) do { } while(0) ++static inline unsigned long *end_of_stack(const struct task_struct *task) ++{ ++ return task->stack; ++} ++ ++#elif !defined(__HAVE_THREAD_FUNCTIONS) + + #define task_thread_info(task) ((struct thread_info *)(task)->stack) +-#define task_stack_page(task) ((task)->stack) ++#define task_stack_page(task) ((void *)(task)->stack) + + static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) + { +@@ -3379,7 +3407,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) + + static inline unsigned int task_cpu(const struct task_struct *p) + { ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ return p->cpu; ++#else + return task_thread_info(p)->cpu; ++#endif + } + + static inline int task_node(const struct task_struct *p) +diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h +index 2b5b10eed74f..e2d0fd81b1ba 100644 +--- a/include/linux/thread_info.h ++++ b/include/linux/thread_info.h +@@ -13,6 +13,21 @@ + struct timespec; + struct compat_timespec; + ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++struct thread_info { ++ u32 flags; /* low level flags */ ++}; ++ ++#define INIT_THREAD_INFO(tsk) \ ++{ \ ++ .flags = 0, \ ++} ++#endif ++ ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++#define current_thread_info() ((struct thread_info *)current) ++#endif ++ + /* + * System call restart block. + */ +diff --git a/init/Kconfig b/init/Kconfig +index cac3f096050d..ec8d43894b02 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -26,6 +26,13 @@ config IRQ_WORK + config BUILDTIME_EXTABLE_SORT + bool + ++config THREAD_INFO_IN_TASK ++ bool ++ help ++ Select this to move thread_info off the stack into task_struct. To ++ make this work, an arch will need to remove all thread_info fields ++ except flags and fix any runtime bugs. ++ + menu "General setup" + + config BROKEN +diff --git a/init/init_task.c b/init/init_task.c +index ba0a7f362d9e..11f83be1fa79 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -22,5 +22,8 @@ EXPORT_SYMBOL(init_task); + * Initial thread structure. Alignment of this is handled by a special + * linker map entry. + */ +-union thread_union init_thread_union __init_task_data = +- { INIT_THREAD_INFO(init_task) }; ++union thread_union init_thread_union __init_task_data = { ++#ifndef CONFIG_THREAD_INFO_IN_TASK ++ INIT_THREAD_INFO(init_task) ++#endif ++}; +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index c64fc5114004..3655c9625e5b 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -1000,7 +1000,11 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) + * per-task data have been completed by this moment. + */ + smp_wmb(); ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ p->cpu = cpu; ++#else + task_thread_info(p)->cpu = cpu; ++#endif + p->wake_cpu = cpu; + #endif + } +-- +2.15.0 + diff --git a/queue/sched-core-Free-the-stack-early-if-CONFIG_THREAD_INF.patch b/queue/sched-core-Free-the-stack-early-if-CONFIG_THREAD_INF.patch new file mode 100644 index 0000000..780badc --- /dev/null +++ b/queue/sched-core-Free-the-stack-early-if-CONFIG_THREAD_INF.patch @@ -0,0 +1,168 @@ +From 68f24b08ee892d47bdef925d676e1ae1ccc316f8 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 15 Sep 2016 22:45:48 -0700 +Subject: [PATCH] sched/core: Free the stack early if + CONFIG_THREAD_INFO_IN_TASK + +commit 68f24b08ee892d47bdef925d676e1ae1ccc316f8 upstream. + +We currently keep every task's stack around until the task_struct +itself is freed. This means that we keep the stack allocation alive +for longer than necessary and that, under load, we free stacks in +big batches whenever RCU drops the last task reference. Neither of +these is good for reuse of cache-hot memory, and freeing in batches +prevents us from usefully caching small numbers of vmalloced stacks. + +On architectures that have thread_info on the stack, we can't easily +change this, but on architectures that set THREAD_INFO_IN_TASK, we +can free it as soon as the task is dead. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Oleg Nesterov <oleg@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/08ca06cde00ebed0046c5d26cbbf3fbb7ef5b812.1474003868.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/include/linux/init_task.h b/include/linux/init_task.h +index 9c04d44eeb3c..325f649d77ff 100644 +--- a/include/linux/init_task.h ++++ b/include/linux/init_task.h +@@ -186,7 +186,9 @@ extern struct task_group root_task_group; + #endif + + #ifdef CONFIG_THREAD_INFO_IN_TASK +-# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), ++# define INIT_TASK_TI(tsk) \ ++ .thread_info = INIT_THREAD_INFO(tsk), \ ++ .stack_refcount = ATOMIC_INIT(1), + #else + # define INIT_TASK_TI(tsk) + #endif +diff --git a/include/linux/sched.h b/include/linux/sched.h +index a95867267e9f..abb795afc823 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1936,6 +1936,10 @@ struct task_struct { + #ifdef CONFIG_VMAP_STACK + struct vm_struct *stack_vm_area; + #endif ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ /* A live task holds one reference. */ ++ atomic_t stack_refcount; ++#endif + /* CPU-specific state of this task */ + struct thread_struct thread; + /* +@@ -3143,12 +3147,22 @@ static inline unsigned long *end_of_stack(struct task_struct *p) + + #endif + ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++static inline void *try_get_task_stack(struct task_struct *tsk) ++{ ++ return atomic_inc_not_zero(&tsk->stack_refcount) ? ++ task_stack_page(tsk) : NULL; ++} ++ ++extern void put_task_stack(struct task_struct *tsk); ++#else + static inline void *try_get_task_stack(struct task_struct *tsk) + { + return task_stack_page(tsk); + } + + static inline void put_task_stack(struct task_struct *tsk) {} ++#endif + + #define task_stack_end_corrupted(task) \ + (*(end_of_stack(task)) != STACK_END_MAGIC) +diff --git a/kernel/fork.c b/kernel/fork.c +index 0c240fd5beba..5dd0a516626d 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -269,11 +269,40 @@ static void account_kernel_stack(struct task_struct *tsk, int account) + } + } + +-void free_task(struct task_struct *tsk) ++static void release_task_stack(struct task_struct *tsk) + { + account_kernel_stack(tsk, -1); + arch_release_thread_stack(tsk->stack); + free_thread_stack(tsk); ++ tsk->stack = NULL; ++#ifdef CONFIG_VMAP_STACK ++ tsk->stack_vm_area = NULL; ++#endif ++} ++ ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++void put_task_stack(struct task_struct *tsk) ++{ ++ if (atomic_dec_and_test(&tsk->stack_refcount)) ++ release_task_stack(tsk); ++} ++#endif ++ ++void free_task(struct task_struct *tsk) ++{ ++#ifndef CONFIG_THREAD_INFO_IN_TASK ++ /* ++ * The task is finally done with both the stack and thread_info, ++ * so free both. ++ */ ++ release_task_stack(tsk); ++#else ++ /* ++ * If the task had a separate stack allocation, it should be gone ++ * by now. ++ */ ++ WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0); ++#endif + rt_mutex_debug_task_free(tsk); + ftrace_graph_exit_task(tsk); + put_seccomp_filter(tsk); +@@ -411,6 +440,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + #ifdef CONFIG_VMAP_STACK + tsk->stack_vm_area = stack_vm_area; + #endif ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ atomic_set(&tsk->stack_refcount, 1); ++#endif + + if (err) + goto free_stack; +@@ -1771,6 +1803,7 @@ bad_fork_cleanup_count: + atomic_dec(&p->cred->user->processes); + exit_creds(p); + bad_fork_free: ++ put_task_stack(p); + free_task(p); + fork_out: + return ERR_PTR(retval); +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 0b6238f18da2..23c6037e2d89 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -2772,6 +2772,10 @@ static struct rq *finish_task_switch(struct task_struct *prev) + * task and put them back on the free list. + */ + kprobe_flush_task(prev); ++ ++ /* Task is done with its stack. */ ++ put_task_stack(prev); ++ + put_task_struct(prev); + } + +-- +2.15.0 + diff --git a/queue/sched-core-x86-Make-struct-thread_info-arch-specific.patch b/queue/sched-core-x86-Make-struct-thread_info-arch-specific.patch new file mode 100644 index 0000000..140dced --- /dev/null +++ b/queue/sched-core-x86-Make-struct-thread_info-arch-specific.patch @@ -0,0 +1,91 @@ +From c8061485a0d7569a865a3cc3c63347b0f42b3765 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens <heiko.carstens@de.ibm.com> +Date: Wed, 19 Oct 2016 19:28:11 +0100 +Subject: [PATCH] sched/core, x86: Make struct thread_info arch specific again + +commit c8061485a0d7569a865a3cc3c63347b0f42b3765 upstream. + +The following commit: + + c65eacbe290b ("sched/core: Allow putting thread_info into task_struct") + +... made 'struct thread_info' a generic struct with only a +single ::flags member, if CONFIG_THREAD_INFO_IN_TASK_STRUCT=y is +selected. + +This change however seems to be quite x86 centric, since at least the +generic preemption code (asm-generic/preempt.h) assumes that struct +thread_info also has a preempt_count member, which apparently was not +true for x86. + +We could add a bit more #ifdefs to solve this problem too, but it seems +to be much simpler to make struct thread_info arch specific +again. This also makes the conversion to THREAD_INFO_IN_TASK_STRUCT a +bit easier for architectures that have a couple of arch specific stuff +in their thread_info definition. + +The arch specific stuff _could_ be moved to thread_struct. However +keeping them in thread_info makes it easier: accessing thread_info +members is simple, since it is at the beginning of the task_struct, +while the thread_struct is at the end. At least on s390 the offsets +needed to access members of the thread_struct (with task_struct as +base) are too large for various asm instructions. This is not a +problem when keeping these members within thread_info. + +Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> +Signed-off-by: Mark Rutland <mark.rutland@arm.com> +Acked-by: Thomas Gleixner <tglx@linutronix.de> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: keescook@chromium.org +Cc: linux-arch@vger.kernel.org +Link: http://lkml.kernel.org/r/1476901693-8492-2-git-send-email-mark.rutland@arm.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index 2aaca53c0974..ad6f5eb07a95 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -52,6 +52,15 @@ struct task_struct; + #include <asm/cpufeature.h> + #include <linux/atomic.h> + ++struct thread_info { ++ unsigned long flags; /* low level flags */ ++}; ++ ++#define INIT_THREAD_INFO(tsk) \ ++{ \ ++ .flags = 0, \ ++} ++ + #define init_stack (init_thread_union.stack) + + #else /* !__ASSEMBLY__ */ +diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h +index 45f004e9cc59..2873baf5372a 100644 +--- a/include/linux/thread_info.h ++++ b/include/linux/thread_info.h +@@ -13,17 +13,6 @@ + struct timespec; + struct compat_timespec; + +-#ifdef CONFIG_THREAD_INFO_IN_TASK +-struct thread_info { +- unsigned long flags; /* low level flags */ +-}; +- +-#define INIT_THREAD_INFO(tsk) \ +-{ \ +- .flags = 0, \ +-} +-#endif +- + #ifdef CONFIG_THREAD_INFO_IN_TASK + #define current_thread_info() ((struct thread_info *)current) + #endif +-- +2.15.0 + diff --git a/queue/sched-x86-32-kgdb-Don-t-use-thread.ip-in-sleeping_th.patch b/queue/sched-x86-32-kgdb-Don-t-use-thread.ip-in-sleeping_th.patch new file mode 100644 index 0000000..ff851c0 --- /dev/null +++ b/queue/sched-x86-32-kgdb-Don-t-use-thread.ip-in-sleeping_th.patch @@ -0,0 +1,56 @@ +From 4e047aa7f267c3449b6d323510d35864829aca70 Mon Sep 17 00:00:00 2001 +From: Brian Gerst <brgerst@gmail.com> +Date: Sat, 13 Aug 2016 12:38:16 -0400 +Subject: [PATCH] sched/x86/32, kgdb: Don't use thread.ip in + sleeping_thread_to_gdb_regs() + +commit 4e047aa7f267c3449b6d323510d35864829aca70 upstream. + +Match 64-bit and set gdb_regs[GDB_PC] to zero. thread.ip is always the +same point in the scheduler (except for newly forked processes), and will +be removed in a future patch. + +Signed-off-by: Brian Gerst <brgerst@gmail.com> +Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jason Wessel <jason.wessel@windriver.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/1471106302-10159-2-git-send-email-brgerst@gmail.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c +index 04cde527d728..fe649a5f509f 100644 +--- a/arch/x86/kernel/kgdb.c ++++ b/arch/x86/kernel/kgdb.c +@@ -172,7 +172,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) + gdb_regs[GDB_ES] = __KERNEL_DS; + gdb_regs[GDB_PS] = 0; + gdb_regs[GDB_CS] = __KERNEL_CS; +- gdb_regs[GDB_PC] = p->thread.ip; + gdb_regs[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_FS] = 0xFFFF; + gdb_regs[GDB_GS] = 0xFFFF; +@@ -180,7 +179,6 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) + gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); + gdb_regs32[GDB_CS] = __KERNEL_CS; + gdb_regs32[GDB_SS] = __KERNEL_DS; +- gdb_regs[GDB_PC] = 0; + gdb_regs[GDB_R8] = 0; + gdb_regs[GDB_R9] = 0; + gdb_regs[GDB_R10] = 0; +@@ -190,6 +188,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) + gdb_regs[GDB_R14] = 0; + gdb_regs[GDB_R15] = 0; + #endif ++ gdb_regs[GDB_PC] = 0; + gdb_regs[GDB_SP] = p->thread.sp; + } + +-- +2.15.0 + diff --git a/queue/sched-x86-Add-struct-inactive_task_frame-to-better-d.patch b/queue/sched-x86-Add-struct-inactive_task_frame-to-better-d.patch new file mode 100644 index 0000000..2291b89 --- /dev/null +++ b/queue/sched-x86-Add-struct-inactive_task_frame-to-better-d.patch @@ -0,0 +1,107 @@ +From 7b32aeadbc95d4a41402c1c0da6aa3ab51af4c10 Mon Sep 17 00:00:00 2001 +From: Brian Gerst <brgerst@gmail.com> +Date: Sat, 13 Aug 2016 12:38:18 -0400 +Subject: [PATCH] sched/x86: Add 'struct inactive_task_frame' to better + document the sleeping task stack frame + +commit 7b32aeadbc95d4a41402c1c0da6aa3ab51af4c10 upstream. + +Add 'struct inactive_task_frame', which defines the layout of the stack for +a sleeping process. For now, the only defined field is the BP register +(frame pointer). + +Signed-off-by: Brian Gerst <brgerst@gmail.com> +Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/1471106302-10159-4-git-send-email-brgerst@gmail.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h +index 0944218af9e2..7646fb2772f8 100644 +--- a/arch/x86/include/asm/stacktrace.h ++++ b/arch/x86/include/asm/stacktrace.h +@@ -8,6 +8,7 @@ + + #include <linux/uaccess.h> + #include <linux/ptrace.h> ++#include <asm/switch_to.h> + + extern int kstack_depth_to_print; + +@@ -70,8 +71,7 @@ stack_frame(struct task_struct *task, struct pt_regs *regs) + return bp; + } + +- /* bp is the last reg pushed by switch_to */ +- return *(unsigned long *)task->thread.sp; ++ return ((struct inactive_task_frame *)task->thread.sp)->bp; + } + #else + static inline unsigned long +diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h +index 14e4b20f0aaf..ec689c62c01f 100644 +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -30,6 +30,11 @@ static inline void prepare_switch_to(struct task_struct *prev, + #endif + } + ++/* data that is pointed to by thread.sp */ ++struct inactive_task_frame { ++ unsigned long bp; ++}; ++ + #ifdef CONFIG_X86_32 + + #ifdef CONFIG_CC_STACKPROTECTOR +diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c +index 5e3f294ce264..8e36f249646e 100644 +--- a/arch/x86/kernel/kgdb.c ++++ b/arch/x86/kernel/kgdb.c +@@ -50,6 +50,7 @@ + #include <asm/apicdef.h> + #include <asm/apic.h> + #include <asm/nmi.h> ++#include <asm/switch_to.h> + + struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = + { +@@ -166,7 +167,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) + gdb_regs[GDB_DX] = 0; + gdb_regs[GDB_SI] = 0; + gdb_regs[GDB_DI] = 0; +- gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; ++ gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp; + #ifdef CONFIG_X86_32 + gdb_regs[GDB_DS] = __KERNEL_DS; + gdb_regs[GDB_ES] = __KERNEL_DS; +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 62c0b0ea2ce4..0115a4a4db96 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -32,6 +32,7 @@ + #include <asm/tlbflush.h> + #include <asm/mce.h> + #include <asm/vm86.h> ++#include <asm/switch_to.h> + + /* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, +@@ -556,7 +557,7 @@ unsigned long get_wchan(struct task_struct *p) + if (sp < bottom || sp > top) + return 0; + +- fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); ++ fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); + do { + if (fp < bottom || fp > top) + return 0; +-- +2.15.0 + diff --git a/queue/sched-x86-Pass-kernel-thread-parameters-in-struct-fo.patch b/queue/sched-x86-Pass-kernel-thread-parameters-in-struct-fo.patch new file mode 100644 index 0000000..4b4cff7 --- /dev/null +++ b/queue/sched-x86-Pass-kernel-thread-parameters-in-struct-fo.patch @@ -0,0 +1,237 @@ +From 616d24835eeafa8ef3466479db028abfdfc77531 Mon Sep 17 00:00:00 2001 +From: Brian Gerst <brgerst@gmail.com> +Date: Sat, 13 Aug 2016 12:38:20 -0400 +Subject: [PATCH] sched/x86: Pass kernel thread parameters in 'struct + fork_frame' + +commit 616d24835eeafa8ef3466479db028abfdfc77531 upstream. + +Instead of setting up a fake pt_regs context, put the kernel thread +function pointer and arg into the unused callee-restored registers +of 'struct fork_frame'. + +Signed-off-by: Brian Gerst <brgerst@gmail.com> +Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/1471106302-10159-6-git-send-email-brgerst@gmail.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index bf8f221f9c94..b75a8bcd2d23 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -240,35 +240,34 @@ END(__switch_to_asm) + * A newly forked process directly context switches into this address. + * + * eax: prev task we switched from ++ * ebx: kernel thread func (NULL for user thread) ++ * edi: kernel thread arg + */ + ENTRY(ret_from_fork) + pushl %eax + call schedule_tail + popl %eax + ++ testl %ebx, %ebx ++ jnz 1f /* kernel threads are uncommon */ ++ ++2: + /* When we fork, we trace the syscall return in the child, too. */ + movl %esp, %eax + call syscall_return_slowpath + jmp restore_all +-END(ret_from_fork) +- +-ENTRY(ret_from_kernel_thread) +- pushl %eax +- call schedule_tail +- popl %eax +- movl PT_EBP(%esp), %eax +- call *PT_EBX(%esp) +- movl $0, PT_EAX(%esp) + ++ /* kernel thread */ ++1: movl %edi, %eax ++ call *%ebx + /* +- * Kernel threads return to userspace as if returning from a syscall. +- * We should check whether anything actually uses this path and, if so, +- * consider switching it over to ret_from_fork. ++ * A kernel thread is allowed to return here after successfully ++ * calling do_execve(). Exit to userspace to complete the execve() ++ * syscall. + */ +- movl %esp, %eax +- call syscall_return_slowpath +- jmp restore_all +-ENDPROC(ret_from_kernel_thread) ++ movl $0, PT_EAX(%esp) ++ jmp 2b ++END(ret_from_fork) + + /* + * Return to user mode is not as complex as all this looks, +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index c1af8acd366b..c0373d667674 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -407,37 +407,34 @@ END(__switch_to_asm) + * A newly forked process directly context switches into this address. + * + * rax: prev task we switched from ++ * rbx: kernel thread func (NULL for user thread) ++ * r12: kernel thread arg + */ + ENTRY(ret_from_fork) + movq %rax, %rdi + call schedule_tail /* rdi: 'prev' task parameter */ + +- testb $3, CS(%rsp) /* from kernel_thread? */ +- jnz 1f +- +- /* +- * We came from kernel_thread. This code path is quite twisted, and +- * someone should clean it up. +- * +- * copy_thread_tls stashes the function pointer in RBX and the +- * parameter to be passed in RBP. The called function is permitted +- * to call do_execve and thereby jump to user mode. +- */ +- movq RBP(%rsp), %rdi +- call *RBX(%rsp) +- movl $0, RAX(%rsp) +- +- /* +- * Fall through as though we're exiting a syscall. This makes a +- * twisted sort of sense if we just called do_execve. +- */ ++ testq %rbx, %rbx /* from kernel_thread? */ ++ jnz 1f /* kernel threads are uncommon */ + +-1: ++2: + movq %rsp, %rdi + call syscall_return_slowpath /* returns with IRQs disabled */ + TRACE_IRQS_ON /* user mode is traced as IRQS on */ + SWAPGS + jmp restore_regs_and_iret ++ ++1: ++ /* kernel thread */ ++ movq %r12, %rdi ++ call *%rbx ++ /* ++ * A kernel thread is allowed to return here after successfully ++ * calling do_execve(). Exit to userspace to complete the execve() ++ * syscall. ++ */ ++ movq $0, RAX(%rsp) ++ jmp 2b + END(ret_from_fork) + + /* +diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h +index 886d5ea09dba..5cb436acd463 100644 +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -34,6 +34,8 @@ static inline void prepare_switch_to(struct task_struct *prev, + #endif + } + ++asmlinkage void ret_from_fork(void); ++ + /* data that is pointed to by thread.sp */ + struct inactive_task_frame { + #ifdef CONFIG_X86_64 +diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +index 4bedbc08e53c..18714a191b2d 100644 +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -55,9 +55,6 @@ + #include <asm/switch_to.h> + #include <asm/vm86.h> + +-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); +-asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); +- + /* + * Return saved PC of a blocked thread. + */ +@@ -139,6 +136,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + int err; + + frame->bp = 0; ++ frame->ret_addr = (unsigned long) ret_from_fork; + p->thread.sp = (unsigned long) fork_frame; + p->thread.sp0 = (unsigned long) (childregs+1); + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); +@@ -146,25 +144,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); +- frame->ret_addr = (unsigned long) ret_from_kernel_thread; +- task_user_gs(p) = __KERNEL_STACK_CANARY; +- childregs->ds = __USER_DS; +- childregs->es = __USER_DS; +- childregs->fs = __KERNEL_PERCPU; +- childregs->bx = sp; /* function */ +- childregs->bp = arg; +- childregs->orig_ax = -1; +- childregs->cs = __KERNEL_CS | get_kernel_rpl(); +- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; ++ frame->bx = sp; /* function */ ++ frame->di = arg; + p->thread.io_bitmap_ptr = NULL; + return 0; + } ++ frame->bx = 0; + *childregs = *current_pt_regs(); + childregs->ax = 0; + if (sp) + childregs->sp = sp; + +- frame->ret_addr = (unsigned long) ret_from_fork; + task_user_gs(p) = get_user_gs(current_pt_regs()); + + p->thread.io_bitmap_ptr = NULL; +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +index 827eeed03e16..b812cd0d7889 100644 +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -50,8 +50,6 @@ + #include <asm/switch_to.h> + #include <asm/xen/hypervisor.h> + +-asmlinkage extern void ret_from_fork(void); +- + __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); + + /* Prints also some state that isn't saved in the pt_regs */ +@@ -165,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); +- childregs->sp = (unsigned long)childregs; +- childregs->ss = __KERNEL_DS; +- childregs->bx = sp; /* function */ +- childregs->bp = arg; +- childregs->orig_ax = -1; +- childregs->cs = __KERNEL_CS | get_kernel_rpl(); +- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; ++ frame->bx = sp; /* function */ ++ frame->r12 = arg; + return 0; + } ++ frame->bx = 0; + *childregs = *current_pt_regs(); + + childregs->ax = 0; +-- +2.15.0 + diff --git a/queue/sched-x86-Rewrite-the-switch_to-code.patch b/queue/sched-x86-Rewrite-the-switch_to-code.patch new file mode 100644 index 0000000..19c054a --- /dev/null +++ b/queue/sched-x86-Rewrite-the-switch_to-code.patch @@ -0,0 +1,455 @@ +From 0100301bfdf56a2a370c7157b5ab0fbf9313e1cd Mon Sep 17 00:00:00 2001 +From: Brian Gerst <brgerst@gmail.com> +Date: Sat, 13 Aug 2016 12:38:19 -0400 +Subject: [PATCH] sched/x86: Rewrite the switch_to() code + +commit 0100301bfdf56a2a370c7157b5ab0fbf9313e1cd upstream. + +Move the low-level context switch code to an out-of-line asm stub instead of +using complex inline asm. This allows constructing a new stack frame for the +child process to make it seamlessly flow to ret_from_fork without an extra +test and branch in __switch_to(). It also improves code generation for +__schedule() by using the C calling convention instead of clobbering all +registers. + +Signed-off-by: Brian Gerst <brgerst@gmail.com> +Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/1471106302-10159-5-git-send-email-brgerst@gmail.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index 0b56666e6039..bf8f221f9c94 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -204,6 +204,43 @@ + POP_GS_EX + .endm + ++/* ++ * %eax: prev task ++ * %edx: next task ++ */ ++ENTRY(__switch_to_asm) ++ /* ++ * Save callee-saved registers ++ * This must match the order in struct inactive_task_frame ++ */ ++ pushl %ebp ++ pushl %ebx ++ pushl %edi ++ pushl %esi ++ ++ /* switch stack */ ++ movl %esp, TASK_threadsp(%eax) ++ movl TASK_threadsp(%edx), %esp ++ ++#ifdef CONFIG_CC_STACKPROTECTOR ++ movl TASK_stack_canary(%edx), %ebx ++ movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset ++#endif ++ ++ /* restore callee-saved registers */ ++ popl %esi ++ popl %edi ++ popl %ebx ++ popl %ebp ++ ++ jmp __switch_to ++END(__switch_to_asm) ++ ++/* ++ * A newly forked process directly context switches into this address. ++ * ++ * eax: prev task we switched from ++ */ + ENTRY(ret_from_fork) + pushl %eax + call schedule_tail +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index f6b40e5c88f1..c1af8acd366b 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -367,14 +367,49 @@ END(ptregs_\func) + #define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) + #include <asm/syscalls_64.h> + ++/* ++ * %rdi: prev task ++ * %rsi: next task ++ */ ++ENTRY(__switch_to_asm) ++ /* ++ * Save callee-saved registers ++ * This must match the order in inactive_task_frame ++ */ ++ pushq %rbp ++ pushq %rbx ++ pushq %r12 ++ pushq %r13 ++ pushq %r14 ++ pushq %r15 ++ ++ /* switch stack */ ++ movq %rsp, TASK_threadsp(%rdi) ++ movq TASK_threadsp(%rsi), %rsp ++ ++#ifdef CONFIG_CC_STACKPROTECTOR ++ movq TASK_stack_canary(%rsi), %rbx ++ movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset ++#endif ++ ++ /* restore callee-saved registers */ ++ popq %r15 ++ popq %r14 ++ popq %r13 ++ popq %r12 ++ popq %rbx ++ popq %rbp ++ ++ jmp __switch_to ++END(__switch_to_asm) ++ + /* + * A newly forked process directly context switches into this address. + * +- * rdi: prev task we switched from ++ * rax: prev task we switched from + */ + ENTRY(ret_from_fork) +- LOCK ; btr $TIF_FORK, TI_flags(%r8) +- ++ movq %rax, %rdi + call schedule_tail /* rdi: 'prev' task parameter */ + + testb $3, CS(%rsp) /* from kernel_thread? */ +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index 63def9537a2d..6fee8635340b 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -389,9 +389,6 @@ struct thread_struct { + unsigned short fsindex; + unsigned short gsindex; + #endif +-#ifdef CONFIG_X86_32 +- unsigned long ip; +-#endif + #ifdef CONFIG_X86_64 + unsigned long fsbase; + unsigned long gsbase; +diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h +index ec689c62c01f..886d5ea09dba 100644 +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -2,8 +2,12 @@ + #define _ASM_X86_SWITCH_TO_H + + struct task_struct; /* one of the stranger aspects of C forward declarations */ ++ ++struct task_struct *__switch_to_asm(struct task_struct *prev, ++ struct task_struct *next); ++ + __visible struct task_struct *__switch_to(struct task_struct *prev, +- struct task_struct *next); ++ struct task_struct *next); + struct tss_struct; + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss); +@@ -32,131 +36,30 @@ static inline void prepare_switch_to(struct task_struct *prev, + + /* data that is pointed to by thread.sp */ + struct inactive_task_frame { ++#ifdef CONFIG_X86_64 ++ unsigned long r15; ++ unsigned long r14; ++ unsigned long r13; ++ unsigned long r12; ++#else ++ unsigned long si; ++ unsigned long di; ++#endif ++ unsigned long bx; + unsigned long bp; ++ unsigned long ret_addr; + }; + +-#ifdef CONFIG_X86_32 +- +-#ifdef CONFIG_CC_STACKPROTECTOR +-#define __switch_canary \ +- "movl %P[task_canary](%[next]), %%ebx\n\t" \ +- "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" +-#define __switch_canary_oparam \ +- , [stack_canary] "=m" (stack_canary.canary) +-#define __switch_canary_iparam \ +- , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +-#else /* CC_STACKPROTECTOR */ +-#define __switch_canary +-#define __switch_canary_oparam +-#define __switch_canary_iparam +-#endif /* CC_STACKPROTECTOR */ ++struct fork_frame { ++ struct inactive_task_frame frame; ++ struct pt_regs regs; ++}; + +-/* +- * Saving eflags is important. It switches not only IOPL between tasks, +- * it also protects other tasks from NT leaking through sysenter etc. +- */ + #define switch_to(prev, next, last) \ + do { \ +- /* \ +- * Context-switching clobbers all registers, so we clobber \ +- * them explicitly, via unused output variables. \ +- * (EAX and EBP is not listed because EBP is saved/restored \ +- * explicitly for wchan access and EAX is the return value of \ +- * __switch_to()) \ +- */ \ +- unsigned long ebx, ecx, edx, esi, edi; \ +- \ + prepare_switch_to(prev, next); \ + \ +- asm volatile("pushl %%ebp\n\t" /* save EBP */ \ +- "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ +- "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ +- "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ +- "pushl %[next_ip]\n\t" /* restore EIP */ \ +- __switch_canary \ +- "jmp __switch_to\n" /* regparm call */ \ +- "1:\t" \ +- "popl %%ebp\n\t" /* restore EBP */ \ +- \ +- /* output parameters */ \ +- : [prev_sp] "=m" (prev->thread.sp), \ +- [prev_ip] "=m" (prev->thread.ip), \ +- "=a" (last), \ +- \ +- /* clobbered output registers: */ \ +- "=b" (ebx), "=c" (ecx), "=d" (edx), \ +- "=S" (esi), "=D" (edi) \ +- \ +- __switch_canary_oparam \ +- \ +- /* input parameters: */ \ +- : [next_sp] "m" (next->thread.sp), \ +- [next_ip] "m" (next->thread.ip), \ +- \ +- /* regparm parameters for __switch_to(): */ \ +- [prev] "a" (prev), \ +- [next] "d" (next) \ +- \ +- __switch_canary_iparam \ +- \ +- : /* reloaded segment registers */ \ +- "memory"); \ ++ ((last) = __switch_to_asm((prev), (next))); \ + } while (0) + +-#else /* CONFIG_X86_32 */ +- +-/* frame pointer must be last for get_wchan */ +-#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" +-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t" +- +-#define __EXTRA_CLOBBER \ +- , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ +- "r12", "r13", "r14", "r15", "flags" +- +-#ifdef CONFIG_CC_STACKPROTECTOR +-#define __switch_canary \ +- "movq %P[task_canary](%%rsi),%%r8\n\t" \ +- "movq %%r8,"__percpu_arg([gs_canary])"\n\t" +-#define __switch_canary_oparam \ +- , [gs_canary] "=m" (irq_stack_union.stack_canary) +-#define __switch_canary_iparam \ +- , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +-#else /* CC_STACKPROTECTOR */ +-#define __switch_canary +-#define __switch_canary_oparam +-#define __switch_canary_iparam +-#endif /* CC_STACKPROTECTOR */ +- +-/* +- * There is no need to save or restore flags, because flags are always +- * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL +- * has no effect. +- */ +-#define switch_to(prev, next, last) \ +- prepare_switch_to(prev, next); \ +- \ +- asm volatile(SAVE_CONTEXT \ +- "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ +- "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ +- "call __switch_to\n\t" \ +- "movq "__percpu_arg([current_task])",%%rsi\n\t" \ +- __switch_canary \ +- "movq %P[thread_info](%%rsi),%%r8\n\t" \ +- "movq %%rax,%%rdi\n\t" \ +- "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ +- "jnz ret_from_fork\n\t" \ +- RESTORE_CONTEXT \ +- : "=a" (last) \ +- __switch_canary_oparam \ +- : [next] "S" (next), [prev] "D" (prev), \ +- [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ +- [ti_flags] "i" (offsetof(struct thread_info, flags)), \ +- [_tif_fork] "i" (_TIF_FORK), \ +- [thread_info] "i" (offsetof(struct task_struct, stack)), \ +- [current_task] "m" (current_task) \ +- __switch_canary_iparam \ +- : "memory", "cc" __EXTRA_CLOBBER) +- +-#endif /* CONFIG_X86_32 */ +- + #endif /* _ASM_X86_SWITCH_TO_H */ +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index 8b7c8d8e0852..494c4b5ada34 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -95,7 +95,6 @@ struct thread_info { + #define TIF_UPROBE 12 /* breakpointed or singlestepping */ + #define TIF_NOTSC 16 /* TSC is not accessible in userland */ + #define TIF_IA32 17 /* IA32 compatibility process */ +-#define TIF_FORK 18 /* ret_from_fork */ + #define TIF_NOHZ 19 /* in adaptive nohz mode */ + #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ + #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ +@@ -119,7 +118,6 @@ struct thread_info { + #define _TIF_UPROBE (1 << TIF_UPROBE) + #define _TIF_NOTSC (1 << TIF_NOTSC) + #define _TIF_IA32 (1 << TIF_IA32) +-#define _TIF_FORK (1 << TIF_FORK) + #define _TIF_NOHZ (1 << TIF_NOHZ) + #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) + #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c +index 2bd5c6ff7ee7..db3a0af9b9ec 100644 +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -28,6 +28,12 @@ + #endif + + void common(void) { ++ BLANK(); ++ OFFSET(TASK_threadsp, task_struct, thread.sp); ++#ifdef CONFIG_CC_STACKPROTECTOR ++ OFFSET(TASK_stack_canary, task_struct, stack_canary); ++#endif ++ + BLANK(); + OFFSET(TI_flags, thread_info, flags); + OFFSET(TI_status, thread_info, status); +diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c +index ecdc1d217dc0..880aa093268d 100644 +--- a/arch/x86/kernel/asm-offsets_32.c ++++ b/arch/x86/kernel/asm-offsets_32.c +@@ -57,6 +57,11 @@ void foo(void) + /* Size of SYSENTER_stack */ + DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + ++#ifdef CONFIG_CC_STACKPROTECTOR ++ BLANK(); ++ OFFSET(stack_canary_offset, stack_canary, canary); ++#endif ++ + #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) + BLANK(); + OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); +diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c +index d875f97d4e0b..210927ee2e74 100644 +--- a/arch/x86/kernel/asm-offsets_64.c ++++ b/arch/x86/kernel/asm-offsets_64.c +@@ -56,6 +56,11 @@ int main(void) + OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); + BLANK(); + ++#ifdef CONFIG_CC_STACKPROTECTOR ++ DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); ++ BLANK(); ++#endif ++ + DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); + DEFINE(NR_syscalls, sizeof(syscalls_64)); + +diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +index d86be29c38c7..4bedbc08e53c 100644 +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -133,17 +133,20 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + unsigned long arg, struct task_struct *p, unsigned long tls) + { + struct pt_regs *childregs = task_pt_regs(p); ++ struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs); ++ struct inactive_task_frame *frame = &fork_frame->frame; + struct task_struct *tsk; + int err; + +- p->thread.sp = (unsigned long) childregs; ++ frame->bp = 0; ++ p->thread.sp = (unsigned long) fork_frame; + p->thread.sp0 = (unsigned long) (childregs+1); + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + + if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); +- p->thread.ip = (unsigned long) ret_from_kernel_thread; ++ frame->ret_addr = (unsigned long) ret_from_kernel_thread; + task_user_gs(p) = __KERNEL_STACK_CANARY; + childregs->ds = __USER_DS; + childregs->es = __USER_DS; +@@ -161,7 +164,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + if (sp) + childregs->sp = sp; + +- p->thread.ip = (unsigned long) ret_from_fork; ++ frame->ret_addr = (unsigned long) ret_from_fork; + task_user_gs(p) = get_user_gs(current_pt_regs()); + + p->thread.io_bitmap_ptr = NULL; +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +index 63236d8f84bf..827eeed03e16 100644 +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -141,12 +141,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + { + int err; + struct pt_regs *childregs; ++ struct fork_frame *fork_frame; ++ struct inactive_task_frame *frame; + struct task_struct *me = current; + + p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; + childregs = task_pt_regs(p); +- p->thread.sp = (unsigned long) childregs; +- set_tsk_thread_flag(p, TIF_FORK); ++ fork_frame = container_of(childregs, struct fork_frame, regs); ++ frame = &fork_frame->frame; ++ frame->bp = 0; ++ frame->ret_addr = (unsigned long) ret_from_fork; ++ p->thread.sp = (unsigned long) fork_frame; + p->thread.io_bitmap_ptr = NULL; + + savesegment(gs, p->thread.gsindex); +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index c85d2c636092..7e52f83d3a4b 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -942,7 +942,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) + per_cpu(cpu_current_top_of_stack, cpu) = + (unsigned long)task_stack_page(idle) + THREAD_SIZE; + #else +- clear_tsk_thread_flag(idle, TIF_FORK); + initial_gs = per_cpu_offset(cpu); + #endif + } +-- +2.15.0 + diff --git a/queue/series b/queue/series new file mode 100644 index 0000000..7c0433e --- /dev/null +++ b/queue/series @@ -0,0 +1,62 @@ +# v4.9-rc1~163^2~21 +locking-static_keys-Provide-DECLARE-and-well-as-DEFI.patch +# v4.9-rc1~163^2~20 +x86-mce-Add-PCI-quirks-to-identify-Xeons-with-machin.patch +# v4.9-rc1~163^2~19 +x86-mce-Improve-memcpy_mcsafe.patch +# v4.9-rc1~163^2~18 +x86-mce-Drop-X86_FEATURE_MCE_RECOVERY-and-the-relate.patch +# v4.9-rc1~160^2~73 +x86-entry-Remove-duplicated-comment.patch +# v4.9-rc1~160^2~61 +fork-Add-generic-vmalloced-stack-support.patch +# v4.9-rc1~160^2~59 +x86-mm-64-Enable-vmapped-stacks-CONFIG_HAVE_ARCH_VMA.patch +# v4.9-rc1~160^2~50 +sched-x86-32-kgdb-Don-t-use-thread.ip-in-sleeping_th.patch +# v4.9-rc1~160^2~48 +sched-x86-Add-struct-inactive_task_frame-to-better-d.patch +# v4.9-rc1~160^2~47 +sched-x86-Rewrite-the-switch_to-code.patch +# v4.9-rc1~160^2~46 +sched-x86-Pass-kernel-thread-parameters-in-struct-fo.patch +# v4.9-rc1~160^2~33 +x86-entry-64-Clean-up-and-document-espfix64-stack-se.patch +# v4.9-rc1~160^2~27 +# Merge branch 'linus' into x86/asm, to pick up recent fixes +# v4.9-rc1~160^2~26 +x86-asm-Move-the-thread_info-status-field-to-thread_.patch +# v4.9-rc1~160^2~25 +x86-entry-Get-rid-of-pt_regs_to_thread_info.patch +# v4.9-rc1~160^2~24 +um-Stop-conflating-task_struct-stack-with-thread_inf.patch +# v4.9-rc1~160^2~23 +sched-core-Allow-putting-thread_info-into-task_struc.patch +# v4.9-rc1~160^2~22 +x86-Move-thread_info-into-task_struct.patch +# v4.9-rc1~160^2~20 +x86-entry-64-Fix-a-minor-comment-rebase-error.patch +# v4.9-rc1~160^2~19 +sched-core-Add-try_get_task_stack-and-put_task_stack.patch +# v4.9-rc1~160^2~14 +sched-core-Free-the-stack-early-if-CONFIG_THREAD_INF.patch +# v4.9-rc1~160^2~13 +fork-Optimize-task-creation-by-caching-two-thread-st.patch +# v4.9-rc1~160^2~2 +thread_info-Use-unsigned-long-for-flags.patch +# v4.9-rc1~160^2 +x86-asm-Get-rid-of-__read_cr4_safe.patch +# v4.9-rc1~89^2~3 +x86-entry-spell-EBX-register-correctly-in-documentat.patch +# v4.9-rc1~11^2~20 +EXPORT_SYMBOL-for-asm.patch +# v4.9-rc1~11^2~19 +x86-move-exports-to-actual-definitions.patch + +# v4.9-rc2~7^2~4 +x86-cpufeature-Add-AVX512_4VNNIW-and-AVX512_4FMAPS-f.patch +# v4.9-rc2~7^2~1 +sched-core-x86-Make-struct-thread_info-arch-specific.patch + +# v4.10-rc1~64^2 +x86-kbuild-enable-modversions-for-symbols-exported-f.patch diff --git a/queue/thread_info-Use-unsigned-long-for-flags.patch b/queue/thread_info-Use-unsigned-long-for-flags.patch new file mode 100644 index 0000000..600b9ee --- /dev/null +++ b/queue/thread_info-Use-unsigned-long-for-flags.patch @@ -0,0 +1,62 @@ +From 907241dccb4ce5d9413cf3c030b32b0cfc184914 Mon Sep 17 00:00:00 2001 +From: Mark Rutland <mark.rutland@arm.com> +Date: Fri, 23 Sep 2016 18:24:07 +0100 +Subject: [PATCH] thread_info: Use unsigned long for flags + +commit 907241dccb4ce5d9413cf3c030b32b0cfc184914 upstream. + +The generic THREAD_INFO_IN_TASK definition of thread_info::flags is a +u32, matching x86 prior to the introduction of THREAD_INFO_IN_TASK. + +However, common helpers like test_ti_thread_flag() implicitly assume +that thread_info::flags has at least the size and alignment of unsigned +long, and relying on padding and alignment provided by other elements of +task_struct is somewhat fragile. Additionally, some architectures use +more that 32 bits for thread_info::flags, and others may need to in +future. + +With THREAD_INFO_IN_TASK, task struct follows thread_info with a long +field, and thus we no longer save any space as we did back in commit: + + affa219b60a11b32 ("x86: change thread_info's flag field back to 32 bits") + +Given all this, it makes more sense for the generic thread_info::flags +to be an unsigned long. + +In fact given <linux/thread_info.h> contains/uses the helpers mentioned +above, BE arches *must* use unsigned long (or something of the same size) +today, or they wouldn't work. + +Make it so. + +Signed-off-by: Mark Rutland <mark.rutland@arm.com> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Kees Cook <keescook@chromium.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/1474651447-30447-1-git-send-email-mark.rutland@arm.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h +index e2d0fd81b1ba..45f004e9cc59 100644 +--- a/include/linux/thread_info.h ++++ b/include/linux/thread_info.h +@@ -15,7 +15,7 @@ struct compat_timespec; + + #ifdef CONFIG_THREAD_INFO_IN_TASK + struct thread_info { +- u32 flags; /* low level flags */ ++ unsigned long flags; /* low level flags */ + }; + + #define INIT_THREAD_INFO(tsk) \ +-- +2.15.0 + diff --git a/queue/um-Stop-conflating-task_struct-stack-with-thread_inf.patch b/queue/um-Stop-conflating-task_struct-stack-with-thread_inf.patch new file mode 100644 index 0000000..7ed6c5b --- /dev/null +++ b/queue/um-Stop-conflating-task_struct-stack-with-thread_inf.patch @@ -0,0 +1,68 @@ +From d896fa20a70c9e596438728561e058a74ed3196b Mon Sep 17 00:00:00 2001 +From: Linus Torvalds <torvalds@linux-foundation.org> +Date: Tue, 13 Sep 2016 14:29:23 -0700 +Subject: [PATCH] um/Stop conflating task_struct::stack with thread_info + +commit d896fa20a70c9e596438728561e058a74ed3196b upstream. + +thread_info may move in the future, so use the accessors. + +[ Andy Lutomirski wrote this changelog message and changed + "task_thread_info(child)->cpu" to "task_cpu(child)". ] + +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/3439705d9838940cc82733a7335fa8c654c37db8.1473801993.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c +index a7ef7b131e25..5766ead6fdb9 100644 +--- a/arch/x86/um/ptrace_32.c ++++ b/arch/x86/um/ptrace_32.c +@@ -194,7 +194,7 @@ int peek_user(struct task_struct *child, long addr, long data) + + static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) + { +- int err, n, cpu = ((struct thread_info *) child->stack)->cpu; ++ int err, n, cpu = task_cpu(child); + struct user_i387_struct fpregs; + + err = save_i387_registers(userspace_pid[cpu], +@@ -211,7 +211,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c + + static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) + { +- int n, cpu = ((struct thread_info *) child->stack)->cpu; ++ int n, cpu = task_cpu(child); + struct user_i387_struct fpregs; + + n = copy_from_user(&fpregs, buf, sizeof(fpregs)); +@@ -224,7 +224,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c + + static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) + { +- int err, n, cpu = ((struct thread_info *) child->stack)->cpu; ++ int err, n, cpu = task_cpu(child); + struct user_fxsr_struct fpregs; + + err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); +@@ -240,7 +240,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct * + + static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) + { +- int n, cpu = ((struct thread_info *) child->stack)->cpu; ++ int n, cpu = task_cpu(child); + struct user_fxsr_struct fpregs; + + n = copy_from_user(&fpregs, buf, sizeof(fpregs)); +-- +2.15.0 + diff --git a/queue/x86-Move-thread_info-into-task_struct.patch b/queue/x86-Move-thread_info-into-task_struct.patch new file mode 100644 index 0000000..a00864d --- /dev/null +++ b/queue/x86-Move-thread_info-into-task_struct.patch @@ -0,0 +1,197 @@ +From 15f4eae70d365bba26854c90b6002aaabb18c8aa Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Tue, 13 Sep 2016 14:29:25 -0700 +Subject: [PATCH] x86: Move thread_info into task_struct + +commit 15f4eae70d365bba26854c90b6002aaabb18c8aa upstream. + +Now that most of the thread_info users have been cleaned up, +this is straightforward. + +Most of this code was written by Linus. + +Originally-from: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/a50eab40abeaec9cb9a9e3cbdeafd32190206654.1473801993.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 4c3972847c2a..2a83bc8b24c6 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -157,6 +157,7 @@ config X86 + select SPARSE_IRQ + select SRCU + select SYSCTL_EXCEPTION_TRACE ++ select THREAD_INFO_IN_TASK + select USER_STACKTRACE_SUPPORT + select VIRT_TO_BUS + select X86_DEV_DMA_OPS if X86_64 +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index e7fba58f4d9c..2b46384b4a4f 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) + * If we need to do entry work or if we guess we'll need to do + * exit work, go straight to the slow path. + */ +- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) ++ movq PER_CPU_VAR(current_task), %r11 ++ testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) + jnz entry_SYSCALL64_slow_path + + entry_SYSCALL_64_fastpath: +@@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath: + */ + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF +- testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) ++ movq PER_CPU_VAR(current_task), %r11 ++ testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) + jnz 1f + + LOCKDEP_SYS_EXIT +@@ -370,6 +372,7 @@ END(ptregs_\func) + /* + * %rdi: prev task + * %rsi: next task ++ * rsi: task we're switching to + */ + ENTRY(__switch_to_asm) + /* +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index c9dcfe7c7e4b..2aaca53c0974 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -52,20 +52,6 @@ struct task_struct; + #include <asm/cpufeature.h> + #include <linux/atomic.h> + +-struct thread_info { +- struct task_struct *task; /* main task structure */ +- __u32 flags; /* low level flags */ +- __u32 cpu; /* current CPU */ +-}; +- +-#define INIT_THREAD_INFO(tsk) \ +-{ \ +- .task = &tsk, \ +- .flags = 0, \ +- .cpu = 0, \ +-} +- +-#define init_thread_info (init_thread_union.thread_info) + #define init_stack (init_thread_union.stack) + + #else /* !__ASSEMBLY__ */ +@@ -157,11 +143,6 @@ struct thread_info { + */ + #ifndef __ASSEMBLY__ + +-static inline struct thread_info *current_thread_info(void) +-{ +- return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); +-} +- + static inline unsigned long current_stack_pointer(void) + { + unsigned long sp; +@@ -223,33 +204,6 @@ static inline int arch_within_stack_frames(const void * const stack, + # define cpu_current_top_of_stack (cpu_tss + TSS_sp0) + #endif + +-/* +- * ASM operand which evaluates to a 'thread_info' address of +- * the current task, if it is known that "reg" is exactly "off" +- * bytes below the top of the stack currently. +- * +- * ( The kernel stack's size is known at build time, it is usually +- * 2 or 4 pages, and the bottom of the kernel stack contains +- * the thread_info structure. So to access the thread_info very +- * quickly from assembly code we can calculate down from the +- * top of the kernel stack to the bottom, using constant, +- * build-time calculations only. ) +- * +- * For example, to fetch the current thread_info->flags value into %eax +- * on x86-64 defconfig kernels, in syscall entry code where RSP is +- * currently at exactly SIZEOF_PTREGS bytes away from the top of the +- * stack: +- * +- * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax +- * +- * will translate to: +- * +- * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax +- * +- * which is below the current RSP by almost 16K. +- */ +-#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) +- + #endif + + #ifdef CONFIG_COMPAT +diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c +index add5f90b93d4..c62e015b126c 100644 +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -35,9 +35,7 @@ void common(void) { + #endif + + BLANK(); +- OFFSET(TI_flags, thread_info, flags); +- +- BLANK(); ++ OFFSET(TASK_TI_flags, task_struct, thread_info.flags); + OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); + + BLANK(); +diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c +index 4a7903714065..9ebd0b0e73d9 100644 +--- a/arch/x86/kernel/irq_64.c ++++ b/arch/x86/kernel/irq_64.c +@@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) + if (user_mode(regs)) + return; + +- if (regs->sp >= curbase + sizeof(struct thread_info) + +- sizeof(struct pt_regs) + STACK_TOP_MARGIN && ++ if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && + regs->sp <= curbase + THREAD_SIZE) + return; + +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index c1fa790c81cd..0b9ed8ec5226 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -549,9 +549,7 @@ unsigned long get_wchan(struct task_struct *p) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack +- * ----------- bottom = start + sizeof(thread_info) +- * thread_info +- * ----------- start ++ * ----------- bottom = start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: +@@ -562,7 +560,7 @@ unsigned long get_wchan(struct task_struct *p) + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); +- bottom = start + sizeof(struct thread_info); ++ bottom = start; + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) +-- +2.15.0 + diff --git a/queue/x86-asm-Get-rid-of-__read_cr4_safe.patch b/queue/x86-asm-Get-rid-of-__read_cr4_safe.patch new file mode 100644 index 0000000..24c3526 --- /dev/null +++ b/queue/x86-asm-Get-rid-of-__read_cr4_safe.patch @@ -0,0 +1,174 @@ +From 1ef55be16ed69538f89e0a6508be5e62fdc9851c Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 29 Sep 2016 12:48:12 -0700 +Subject: [PATCH] x86/asm: Get rid of __read_cr4_safe() + +commit 1ef55be16ed69538f89e0a6508be5e62fdc9851c upstream. + +We use __read_cr4() vs __read_cr4_safe() inconsistently. On +CR4-less CPUs, all CR4 bits are effectively clear, so we can make +the code simpler and more robust by making __read_cr4() always fix +up faults on 32-bit kernels. + +This may fix some bugs on old 486-like CPUs, but I don't have any +easy way to test that. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: david@saggiorato.net +Link: http://lkml.kernel.org/r/ea647033d357d9ce2ad2bbde5a631045f5052fb6.1475178370.git.luto@kernel.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h +index 2970d22d7766..91b6f4eed3fd 100644 +--- a/arch/x86/include/asm/paravirt.h ++++ b/arch/x86/include/asm/paravirt.h +@@ -80,10 +80,6 @@ static inline unsigned long __read_cr4(void) + { + return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); + } +-static inline unsigned long __read_cr4_safe(void) +-{ +- return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); +-} + + static inline void __write_cr4(unsigned long x) + { +diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h +index 7fa9e7740ba3..fcf243f077ac 100644 +--- a/arch/x86/include/asm/paravirt_types.h ++++ b/arch/x86/include/asm/paravirt_types.h +@@ -108,7 +108,6 @@ struct pv_cpu_ops { + unsigned long (*read_cr0)(void); + void (*write_cr0)(unsigned long); + +- unsigned long (*read_cr4_safe)(void); + unsigned long (*read_cr4)(void); + void (*write_cr4)(unsigned long); + +diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h +index 587d7914ea4b..19a2224f9e16 100644 +--- a/arch/x86/include/asm/special_insns.h ++++ b/arch/x86/include/asm/special_insns.h +@@ -59,22 +59,19 @@ static inline void native_write_cr3(unsigned long val) + static inline unsigned long native_read_cr4(void) + { + unsigned long val; +- asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); +- return val; +-} +- +-static inline unsigned long native_read_cr4_safe(void) +-{ +- unsigned long val; +- /* This could fault if %cr4 does not exist. In x86_64, a cr4 always +- * exists, so it will never fail. */ + #ifdef CONFIG_X86_32 ++ /* ++ * This could fault if CR4 does not exist. Non-existent CR4 ++ * is functionally equivalent to CR4 == 0. Keep it simple and pretend ++ * that CR4 == 0 on CPUs that don't have CR4. ++ */ + asm volatile("1: mov %%cr4, %0\n" + "2:\n" + _ASM_EXTABLE(1b, 2b) + : "=r" (val), "=m" (__force_order) : "0" (0)); + #else +- val = native_read_cr4(); ++ /* CR4 always exists on x86_64. */ ++ asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); + #endif + return val; + } +@@ -182,11 +179,6 @@ static inline unsigned long __read_cr4(void) + return native_read_cr4(); + } + +-static inline unsigned long __read_cr4_safe(void) +-{ +- return native_read_cr4_safe(); +-} +- + static inline void __write_cr4(unsigned long x) + { + native_write_cr4(x); +diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h +index dee8a70382ba..6fa85944af83 100644 +--- a/arch/x86/include/asm/tlbflush.h ++++ b/arch/x86/include/asm/tlbflush.h +@@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); + /* Initialize cr4 shadow for this CPU. */ + static inline void cr4_init_shadow(void) + { +- this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe()); ++ this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); + } + + /* Set in this cpu's CR4. */ +diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c +index bef340082d20..bbf3d5933eaa 100644 +--- a/arch/x86/kernel/paravirt.c ++++ b/arch/x86/kernel/paravirt.c +@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { + .read_cr0 = native_read_cr0, + .write_cr0 = native_write_cr0, + .read_cr4 = native_read_cr4, +- .read_cr4_safe = native_read_cr4_safe, + .write_cr4 = native_write_cr4, + #ifdef CONFIG_X86_64 + .read_cr8 = native_read_cr8, +diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +index 404efdfa083b..bd7be8efdc4c 100644 +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -90,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all) + cr0 = read_cr0(); + cr2 = read_cr2(); + cr3 = read_cr3(); +- cr4 = __read_cr4_safe(); ++ cr4 = __read_cr4(); + printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + cr0, cr2, cr3, cr4); + +diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c +index 87f2330cc805..3aabfdcbcb52 100644 +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p) + * auditing all the early-boot CR4 manipulation would be needed to + * rule it out. + */ +- mmu_cr4_features = __read_cr4_safe(); ++ mmu_cr4_features = __read_cr4(); + + memblock_set_current_limit(get_max_mapped()); + +diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c +index b12c26e2e309..53cace2ec0e2 100644 +--- a/arch/x86/power/cpu.c ++++ b/arch/x86/power/cpu.c +@@ -130,7 +130,7 @@ static void __save_processor_state(struct saved_context *ctxt) + ctxt->cr0 = read_cr0(); + ctxt->cr2 = read_cr2(); + ctxt->cr3 = read_cr3(); +- ctxt->cr4 = __read_cr4_safe(); ++ ctxt->cr4 = __read_cr4(); + #ifdef CONFIG_X86_64 + ctxt->cr8 = read_cr8(); + #endif +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index b86ebb1a9a7f..e2cf8fcea6bb 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -1237,7 +1237,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { + .write_cr0 = xen_write_cr0, + + .read_cr4 = native_read_cr4, +- .read_cr4_safe = native_read_cr4_safe, + .write_cr4 = xen_write_cr4, + + #ifdef CONFIG_X86_64 +-- +2.15.0 + diff --git a/queue/x86-asm-Move-the-thread_info-status-field-to-thread_.patch b/queue/x86-asm-Move-the-thread_info-status-field-to-thread_.patch new file mode 100644 index 0000000..54c4e3e --- /dev/null +++ b/queue/x86-asm-Move-the-thread_info-status-field-to-thread_.patch @@ -0,0 +1,252 @@ +From b9d989c7218ac922185d82ad46f3e58b27a4bea9 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Tue, 13 Sep 2016 14:29:21 -0700 +Subject: [PATCH] x86/asm: Move the thread_info::status field to thread_struct + +commit b9d989c7218ac922185d82ad46f3e58b27a4bea9 upstream. + +Because sched.h and thread_info.h are a tangled mess, I turned +in_compat_syscall() into a macro. If we had current_thread_struct() +or similar and we could use it from thread_info.h, then this would +be a bit cleaner. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/ccc8a1b2f41f9c264a41f771bb4a6539a642ad72.1473801993.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c +index 1433f6b4607d..871bbf975d4c 100644 +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -209,7 +209,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) + * special case only applies after poking regs and before the + * very next return to user mode. + */ +- ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); ++ current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + #endif + + user_enter_irqoff(); +@@ -307,7 +307,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) + unsigned int nr = (unsigned int)regs->orig_ax; + + #ifdef CONFIG_IA32_EMULATION +- ti->status |= TS_COMPAT; ++ current->thread.status |= TS_COMPAT; + #endif + + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index b22fb5a4ff3c..984a7bf17f6a 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -389,6 +389,9 @@ struct thread_struct { + unsigned short fsindex; + unsigned short gsindex; + #endif ++ ++ u32 status; /* thread synchronous flags */ ++ + #ifdef CONFIG_X86_64 + unsigned long fsbase; + unsigned long gsbase; +@@ -434,6 +437,15 @@ struct thread_struct { + */ + }; + ++/* ++ * Thread-synchronous status. ++ * ++ * This is different from the flags in that nobody else ++ * ever touches our thread-synchronous status, so we don't ++ * have to worry about atomic accesses. ++ */ ++#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ ++ + /* + * Set IOPL bits in EFLAGS from given mask + */ +diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h +index 4e23dd15c661..e3c95e8e61c5 100644 +--- a/arch/x86/include/asm/syscall.h ++++ b/arch/x86/include/asm/syscall.h +@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, + * TS_COMPAT is set for 32-bit syscall entries and then + * remains set until we return to user mode. + */ +- if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED)) ++ if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. +@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, + unsigned long *args) + { + # ifdef CONFIG_IA32_EMULATION +- if (task_thread_info(task)->status & TS_COMPAT) ++ if (task->thread.status & TS_COMPAT) + switch (i) { + case 0: + if (!n--) break; +@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, + const unsigned long *args) + { + # ifdef CONFIG_IA32_EMULATION +- if (task_thread_info(task)->status & TS_COMPAT) ++ if (task->thread.status & TS_COMPAT) + switch (i) { + case 0: + if (!n--) break; +@@ -234,18 +234,8 @@ static inline void syscall_set_arguments(struct task_struct *task, + + static inline int syscall_get_arch(void) + { +-#ifdef CONFIG_IA32_EMULATION +- /* +- * TS_COMPAT is set for 32-bit syscall entry and then +- * remains set until we return to user mode. +- * +- * x32 tasks should be considered AUDIT_ARCH_X86_64. +- */ +- if (task_thread_info(current)->status & TS_COMPAT) +- return AUDIT_ARCH_I386; +-#endif +- /* Both x32 and x86_64 are considered "64-bit". */ +- return AUDIT_ARCH_X86_64; ++ /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ ++ return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + } + #endif /* CONFIG_X86_32 */ + +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index 494c4b5ada34..c9dcfe7c7e4b 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -55,7 +55,6 @@ struct task_struct; + struct thread_info { + struct task_struct *task; /* main task structure */ + __u32 flags; /* low level flags */ +- __u32 status; /* thread synchronous flags */ + __u32 cpu; /* current CPU */ + }; + +@@ -253,31 +252,17 @@ static inline int arch_within_stack_frames(const void * const stack, + + #endif + +-/* +- * Thread-synchronous status. +- * +- * This is different from the flags in that nobody else +- * ever touches our thread-synchronous status, so we don't +- * have to worry about atomic accesses. +- */ +-#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ + #ifdef CONFIG_COMPAT + #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ + #endif +- + #ifndef __ASSEMBLY__ + +-static inline bool in_ia32_syscall(void) +-{ + #ifdef CONFIG_X86_32 +- return true; +-#endif +-#ifdef CONFIG_IA32_EMULATION +- if (current_thread_info()->status & TS_COMPAT) +- return true; ++#define in_ia32_syscall() true ++#else ++#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ ++ current->thread.status & TS_COMPAT) + #endif +- return false; +-} + + /* + * Force syscall return via IRET by making it look as if there was +diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c +index db3a0af9b9ec..add5f90b93d4 100644 +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -36,7 +36,6 @@ void common(void) { + + BLANK(); + OFFSET(TI_flags, thread_info, flags); +- OFFSET(TI_status, thread_info, status); + + BLANK(); + OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); +diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c +index 93982aebb398..2f2b8c7ccb85 100644 +--- a/arch/x86/kernel/fpu/init.c ++++ b/arch/x86/kernel/fpu/init.c +@@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void) + on_boot_cpu = 0; + + WARN_ON_FPU(current->thread.fpu.fpstate_active); +- current_thread_info()->status = 0; + + if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) + eagerfpu = ENABLE; +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +index b812cd0d7889..de9acaf2d371 100644 +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -510,7 +510,7 @@ void set_personality_ia32(bool x32) + current->personality &= ~READ_IMPLIES_EXEC; + /* in_compat_syscall() uses the presence of the x32 + syscall bit flag to determine compat status */ +- current_thread_info()->status &= ~TS_COMPAT; ++ current->thread.status &= ~TS_COMPAT; + } else { + set_thread_flag(TIF_IA32); + clear_thread_flag(TIF_X32); +@@ -518,7 +518,7 @@ void set_personality_ia32(bool x32) + current->mm->context.ia32_compat = TIF_IA32; + current->personality |= force_personality32; + /* Prepare the first "return" to user space */ +- current_thread_info()->status |= TS_COMPAT; ++ current->thread.status |= TS_COMPAT; + } + } + EXPORT_SYMBOL_GPL(set_personality_ia32); +diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c +index 5b88a1b26fc7..ce94c38cf4d6 100644 +--- a/arch/x86/kernel/ptrace.c ++++ b/arch/x86/kernel/ptrace.c +@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) + */ + regs->orig_ax = value; + if (syscall_get_nr(child, regs) >= 0) +- task_thread_info(child)->status |= TS_I386_REGS_POKED; ++ child->thread.status |= TS_I386_REGS_POKED; + break; + + case offsetof(struct user32, regs.eflags): +diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c +index 04cb3212db2d..da20ecb5397a 100644 +--- a/arch/x86/kernel/signal.c ++++ b/arch/x86/kernel/signal.c +@@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) + * than the tracee. + */ + #ifdef CONFIG_IA32_EMULATION +- if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) ++ if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + return __NR_ia32_restart_syscall; + #endif + #ifdef CONFIG_X86_X32_ABI +-- +2.15.0 + diff --git a/queue/x86-cpufeature-Add-AVX512_4VNNIW-and-AVX512_4FMAPS-f.patch b/queue/x86-cpufeature-Add-AVX512_4VNNIW-and-AVX512_4FMAPS-f.patch new file mode 100644 index 0000000..78ec401 --- /dev/null +++ b/queue/x86-cpufeature-Add-AVX512_4VNNIW-and-AVX512_4FMAPS-f.patch @@ -0,0 +1,90 @@ +From 8214899342981dbd49ae24aadbbd19e9e7830684 Mon Sep 17 00:00:00 2001 +From: Piotr Luc <piotr.luc@intel.com> +Date: Tue, 18 Oct 2016 17:01:11 +0200 +Subject: [PATCH] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features + +commit 8214899342981dbd49ae24aadbbd19e9e7830684 upstream. + +AVX512_4VNNIW - Vector instructions for deep learning enhanced word +variable precision. +AVX512_4FMAPS - Vector instructions for deep learning floating-point +single precision. + +These new instructions are to be used in future Intel Xeon & Xeon Phi +processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new +instructions are supported by a processor. + +The spec can be found in the Intel Software Developer Manual (SDM) or in +the Instruction Set Extensions Programming Reference (ISE). + +Define new feature flags to enumerate the new instructions in /proc/cpuinfo +accordingly to CPUID bits and add the required xsave extensions which are +required for proper operation. + +Signed-off-by: Piotr Luc <piotr.luc@intel.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Link: http://lkml.kernel.org/r/20161018150111.29926-1-piotr.luc@intel.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 1188bc849ee3..a39629206864 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -194,6 +194,8 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ ++#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ ++#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index 8cb57df9398d..1db8dc490b66 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) + + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, ++ { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, ++ { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, + { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, +diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c +index 124aa5c593f8..095ef7ddd6ae 100644 +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void) + setup_clear_cpu_cap(X86_FEATURE_MPX); + setup_clear_cpu_cap(X86_FEATURE_XGETBV1); + setup_clear_cpu_cap(X86_FEATURE_PKU); ++ setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); ++ setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); + } + + /* +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index 1188bc849ee3..a39629206864 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -194,6 +194,8 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ ++#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ ++#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ + + /* Virtualization flags: Linux defined, word 8 */ + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ +-- +2.15.0 + diff --git a/queue/x86-entry-64-Clean-up-and-document-espfix64-stack-se.patch b/queue/x86-entry-64-Clean-up-and-document-espfix64-stack-se.patch new file mode 100644 index 0000000..fe131ea --- /dev/null +++ b/queue/x86-entry-64-Clean-up-and-document-espfix64-stack-se.patch @@ -0,0 +1,113 @@ +From 85063fac1f72419eec4349621fe829b07f9acb1e Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Mon, 12 Sep 2016 15:05:51 -0700 +Subject: [PATCH] x86/entry/64: Clean up and document espfix64 stack setup + +commit 85063fac1f72419eec4349621fe829b07f9acb1e upstream. + +The espfix64 setup code was a bit inscrutible and contained an +unnecessary push of RAX. Remove that push, update all the stack +offsets to match, and document the whole mess. + +Reported-By: Borislav Petkov <bp@alien8.de> +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Reviewed-by: Borislav Petkov <bp@suse.de> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/e5459eb10cf1175c8b36b840bc425f210d045f35.1473717910.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index c0373d667674..e7fba58f4d9c 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -586,27 +586,69 @@ native_irq_return_iret: + + #ifdef CONFIG_X86_ESPFIX64 + native_irq_return_ldt: +- pushq %rax +- pushq %rdi ++ /* ++ * We are running with user GSBASE. All GPRs contain their user ++ * values. We have a percpu ESPFIX stack that is eight slots ++ * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom ++ * of the ESPFIX stack. ++ * ++ * We clobber RAX and RDI in this code. We stash RDI on the ++ * normal stack and RAX on the ESPFIX stack. ++ * ++ * The ESPFIX stack layout we set up looks like this: ++ * ++ * --- top of ESPFIX stack --- ++ * SS ++ * RSP ++ * RFLAGS ++ * CS ++ * RIP <-- RSP points here when we're done ++ * RAX <-- espfix_waddr points here ++ * --- bottom of ESPFIX stack --- ++ */ ++ ++ pushq %rdi /* Stash user RDI */ + SWAPGS + movq PER_CPU_VAR(espfix_waddr), %rdi +- movq %rax, (0*8)(%rdi) /* RAX */ +- movq (2*8)(%rsp), %rax /* RIP */ ++ movq %rax, (0*8)(%rdi) /* user RAX */ ++ movq (1*8)(%rsp), %rax /* user RIP */ + movq %rax, (1*8)(%rdi) +- movq (3*8)(%rsp), %rax /* CS */ ++ movq (2*8)(%rsp), %rax /* user CS */ + movq %rax, (2*8)(%rdi) +- movq (4*8)(%rsp), %rax /* RFLAGS */ ++ movq (3*8)(%rsp), %rax /* user RFLAGS */ + movq %rax, (3*8)(%rdi) +- movq (6*8)(%rsp), %rax /* SS */ ++ movq (5*8)(%rsp), %rax /* user SS */ + movq %rax, (5*8)(%rdi) +- movq (5*8)(%rsp), %rax /* RSP */ ++ movq (4*8)(%rsp), %rax /* user RSP */ + movq %rax, (4*8)(%rdi) +- andl $0xffff0000, %eax +- popq %rdi ++ /* Now RAX == RSP. */ ++ ++ andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ ++ popq %rdi /* Restore user RDI */ ++ ++ /* ++ * espfix_stack[31:16] == 0. The page tables are set up such that ++ * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of ++ * espfix_waddr for any X. That is, there are 65536 RO aliases of ++ * the same page. Set up RSP so that RSP[31:16] contains the ++ * respective 16 bits of the /userspace/ RSP and RSP nonetheless ++ * still points to an RO alias of the ESPFIX stack. ++ */ + orq PER_CPU_VAR(espfix_stack), %rax + SWAPGS + movq %rax, %rsp +- popq %rax ++ ++ /* ++ * At this point, we cannot write to the stack any more, but we can ++ * still read. ++ */ ++ popq %rax /* Restore user RAX */ ++ ++ /* ++ * RSP now points to an ordinary IRET frame, except that the page ++ * is read-only and RSP[31:16] are preloaded with the userspace ++ * values. We can now IRET back to userspace. ++ */ + jmp native_irq_return_iret + #endif + END(common_interrupt) +-- +2.15.0 + diff --git a/queue/x86-entry-64-Fix-a-minor-comment-rebase-error.patch b/queue/x86-entry-64-Fix-a-minor-comment-rebase-error.patch new file mode 100644 index 0000000..2ce03f1 --- /dev/null +++ b/queue/x86-entry-64-Fix-a-minor-comment-rebase-error.patch @@ -0,0 +1,40 @@ +From ff0071c03684485495e06f3936399eb9c93141a6 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 15 Sep 2016 22:45:42 -0700 +Subject: [PATCH] x86/entry/64: Fix a minor comment rebase error + +commit ff0071c03684485495e06f3936399eb9c93141a6 upstream. + +When I rebased my thread_info changes onto Brian's switch_to() +changes, I carefully checked that I fixed up all the code correctly, +but I missed a comment :( + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Fixes: 15f4eae70d36 ("x86: Move thread_info into task_struct") +Link: http://lkml.kernel.org/r/089fe1e1cbe8b258b064fccbb1a5a5fd23861031.1474003868.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 2b46384b4a4f..80ab68a42621 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -372,7 +372,6 @@ END(ptregs_\func) + /* + * %rdi: prev task + * %rsi: next task +- * rsi: task we're switching to + */ + ENTRY(__switch_to_asm) + /* +-- +2.15.0 + diff --git a/queue/x86-entry-Get-rid-of-pt_regs_to_thread_info.patch b/queue/x86-entry-Get-rid-of-pt_regs_to_thread_info.patch new file mode 100644 index 0000000..5b70330 --- /dev/null +++ b/queue/x86-entry-Get-rid-of-pt_regs_to_thread_info.patch @@ -0,0 +1,108 @@ +From 97245d00585d82540f4538cf72d92a1e853c7b0e Mon Sep 17 00:00:00 2001 +From: Linus Torvalds <torvalds@linux-foundation.org> +Date: Tue, 13 Sep 2016 14:29:22 -0700 +Subject: [PATCH] x86/entry: Get rid of pt_regs_to_thread_info() + +commit 97245d00585d82540f4538cf72d92a1e853c7b0e upstream. + +It was a nice optimization while it lasted, but thread_info is moving +and this optimization will no longer work. + +Quoting Linus: + + Oh Gods, Andy. That pt_regs_to_thread_info() thing made me want + to do unspeakable acts on a poor innocent wax figure that looked + _exactly_ like you. + +[ Changelog written by Andy. ] +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Jann Horn <jann@thejh.net> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/6376aa81c68798cc81631673f52bd91a3e078944.1473801993.git.luto@kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c +index 871bbf975d4c..bdd9cc59d20f 100644 +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -31,13 +31,6 @@ + #define CREATE_TRACE_POINTS + #include <trace/events/syscalls.h> + +-static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) +-{ +- unsigned long top_of_stack = +- (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; +- return (struct thread_info *)(top_of_stack - THREAD_SIZE); +-} +- + #ifdef CONFIG_CONTEXT_TRACKING + /* Called on entry from user mode with IRQs off. */ + __visible inline void enter_from_user_mode(void) +@@ -71,7 +64,7 @@ static long syscall_trace_enter(struct pt_regs *regs) + { + u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + +- struct thread_info *ti = pt_regs_to_thread_info(regs); ++ struct thread_info *ti = current_thread_info(); + unsigned long ret = 0; + bool emulated = false; + u32 work; +@@ -173,18 +166,17 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) + /* Disable IRQs and retry */ + local_irq_disable(); + +- cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); ++ cached_flags = READ_ONCE(current_thread_info()->flags); + + if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) + break; +- + } + } + + /* Called with IRQs disabled. */ + __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) + { +- struct thread_info *ti = pt_regs_to_thread_info(regs); ++ struct thread_info *ti = current_thread_info(); + u32 cached_flags; + + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) +@@ -247,7 +239,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags) + */ + __visible inline void syscall_return_slowpath(struct pt_regs *regs) + { +- struct thread_info *ti = pt_regs_to_thread_info(regs); ++ struct thread_info *ti = current_thread_info(); + u32 cached_flags = READ_ONCE(ti->flags); + + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); +@@ -270,7 +262,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) + #ifdef CONFIG_X86_64 + __visible void do_syscall_64(struct pt_regs *regs) + { +- struct thread_info *ti = pt_regs_to_thread_info(regs); ++ struct thread_info *ti = current_thread_info(); + unsigned long nr = regs->orig_ax; + + enter_from_user_mode(); +@@ -303,7 +295,7 @@ __visible void do_syscall_64(struct pt_regs *regs) + */ + static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) + { +- struct thread_info *ti = pt_regs_to_thread_info(regs); ++ struct thread_info *ti = current_thread_info(); + unsigned int nr = (unsigned int)regs->orig_ax; + + #ifdef CONFIG_IA32_EMULATION +-- +2.15.0 + diff --git a/queue/x86-entry-Remove-duplicated-comment.patch b/queue/x86-entry-Remove-duplicated-comment.patch new file mode 100644 index 0000000..c6d5388 --- /dev/null +++ b/queue/x86-entry-Remove-duplicated-comment.patch @@ -0,0 +1,40 @@ +From b3830e8d478cd9fe33e820425ce431c8ef280967 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov <bp@suse.de> +Date: Mon, 1 Aug 2016 12:05:02 +0200 +Subject: [PATCH] x86/entry: Remove duplicated comment + +commit b3830e8d478cd9fe33e820425ce431c8ef280967 upstream. + +Ok, ok, we see it is called from C :-) + +Signed-off-by: Borislav Petkov <bp@suse.de> +Cc: Andy Lutomirski <luto@amacapital.net> +Cc: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/20160801100502.29796-1-bp@alien8.de +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index b846875aeea6..8956eae04c25 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -347,8 +347,7 @@ ENTRY(stub_ptregs_64) + jmp entry_SYSCALL64_slow_path + + 1: +- /* Called from C */ +- jmp *%rax /* called from C */ ++ jmp *%rax /* Called from C */ + END(stub_ptregs_64) + + .macro ptregs_stub func +-- +2.15.0 + diff --git a/queue/x86-entry-spell-EBX-register-correctly-in-documentat.patch b/queue/x86-entry-spell-EBX-register-correctly-in-documentat.patch new file mode 100644 index 0000000..a08a569 --- /dev/null +++ b/queue/x86-entry-spell-EBX-register-correctly-in-documentat.patch @@ -0,0 +1,30 @@ +From 75ca5b22260ef7b5ce39c6d521eee8b4cba44703 Mon Sep 17 00:00:00 2001 +From: Nicolas Iooss <nicolas.iooss_linux@m4x.org> +Date: Fri, 29 Jul 2016 13:39:51 +0200 +Subject: [PATCH] x86/entry: spell EBX register correctly in documentation + +commit 75ca5b22260ef7b5ce39c6d521eee8b4cba44703 upstream. + +As EBS does not mean anything reasonable in the context it is used, it +seems like a misspelling for EBX. + +Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org> +Acked-by: Borislav Petkov <bp@suse.de> +Signed-off-by: Jiri Kosina <jkosina@suse.cz> + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 9ee0da1807ed..c8804827d436 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1058,7 +1058,7 @@ END(error_entry) + + + /* +- * On entry, EBS is a "return to kernel mode" flag: ++ * On entry, EBX is a "return to kernel mode" flag: + * 1: already in kernel mode, don't need SWAPGS + * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode + */ +-- +2.15.0 + diff --git a/queue/x86-kbuild-enable-modversions-for-symbols-exported-f.patch b/queue/x86-kbuild-enable-modversions-for-symbols-exported-f.patch new file mode 100644 index 0000000..b6b9b6f --- /dev/null +++ b/queue/x86-kbuild-enable-modversions-for-symbols-exported-f.patch @@ -0,0 +1,63 @@ +From 334bb773876403eae3457d81be0b8ea70f8e4ccc Mon Sep 17 00:00:00 2001 +From: Adam Borowski <kilobyte@angband.pl> +Date: Sun, 11 Dec 2016 02:09:18 +0100 +Subject: [PATCH] x86/kbuild: enable modversions for symbols exported from asm + +commit 334bb773876403eae3457d81be0b8ea70f8e4ccc upstream. + +Commit 4efca4ed ("kbuild: modversions for EXPORT_SYMBOL() for asm") adds +modversion support for symbols exported from asm files. Architectures +must include C-style declarations for those symbols in asm/asm-prototypes.h +in order for them to be versioned. + +Add these declarations for x86, and an architecture-independent file that +can be used for common symbols. + +With f27c2f6 reverting 8ab2ae6 ("default exported asm symbols to zero") we +produce a scary warning on x86, this commit fixes that. + +Signed-off-by: Adam Borowski <kilobyte@angband.pl> +Tested-by: Kalle Valo <kvalo@codeaurora.org> +Acked-by: Nicholas Piggin <npiggin@gmail.com> +Tested-by: Peter Wu <peter@lekensteyn.nl> +Tested-by: Oliver Hartkopp <socketcan@hartkopp.net> +Signed-off-by: Michal Marek <mmarek@suse.com> + +diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h +new file mode 100644 +index 000000000000..44b8762fa0c7 +--- /dev/null ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -0,0 +1,16 @@ ++#include <asm/ftrace.h> ++#include <asm/uaccess.h> ++#include <asm/string.h> ++#include <asm/page.h> ++#include <asm/checksum.h> ++ ++#include <asm-generic/asm-prototypes.h> ++ ++#include <asm/page.h> ++#include <asm/pgtable.h> ++#include <asm/special_insns.h> ++#include <asm/preempt.h> ++ ++#ifndef CONFIG_X86_CMPXCHG64 ++extern void cmpxchg8b_emu(void); ++#endif +diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h +new file mode 100644 +index 000000000000..df13637e4017 +--- /dev/null ++++ b/include/asm-generic/asm-prototypes.h +@@ -0,0 +1,7 @@ ++#include <linux/bitops.h> ++extern void *__memset(void *, int, __kernel_size_t); ++extern void *__memcpy(void *, const void *, __kernel_size_t); ++extern void *__memmove(void *, const void *, __kernel_size_t); ++extern void *memset(void *, int, __kernel_size_t); ++extern void *memcpy(void *, const void *, __kernel_size_t); ++extern void *memmove(void *, const void *, __kernel_size_t); +-- +2.15.0 + diff --git a/queue/x86-mce-Add-PCI-quirks-to-identify-Xeons-with-machin.patch b/queue/x86-mce-Add-PCI-quirks-to-identify-Xeons-with-machin.patch new file mode 100644 index 0000000..7261c05 --- /dev/null +++ b/queue/x86-mce-Add-PCI-quirks-to-identify-Xeons-with-machin.patch @@ -0,0 +1,121 @@ +From 3637efb00864f465baebd49464e58319fd295b65 Mon Sep 17 00:00:00 2001 +From: Tony Luck <tony.luck@intel.com> +Date: Thu, 1 Sep 2016 11:39:33 -0700 +Subject: [PATCH] x86/mce: Add PCI quirks to identify Xeons with machine check + recovery + +commit 3637efb00864f465baebd49464e58319fd295b65 upstream. + +Each Xeon includes a number of capability registers in PCI space that +describe some features not enumerated by CPUID. + +Use these to determine that we are running on a model that can recover from +machine checks. Hooks for Ivybridge ... Skylake provided. + +Signed-off-by: Tony Luck <tony.luck@intel.com> +Acked-by: Borislav Petkov <bp@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Boris Petkov <bp@suse.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Link: http://lkml.kernel.org/r/abf331dc4a3e2a2d17444129bc51127437bcf4ba.1472754711.git.tony.luck@intel.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h +index 90dbbd9666d4..877a1dfbf770 100644 +--- a/arch/x86/include/asm/string_64.h ++++ b/arch/x86/include/asm/string_64.h +@@ -2,6 +2,7 @@ + #define _ASM_X86_STRING_64_H + + #ifdef __KERNEL__ ++#include <linux/jump_label.h> + + /* Written 2002 by Andi Kleen */ + +@@ -78,6 +79,8 @@ int strcmp(const char *cs, const char *ct); + #define memset(s, c, n) __memset(s, c, n) + #endif + ++DECLARE_STATIC_KEY_FALSE(mcsafe_key); ++ + /** + * memcpy_mcsafe - copy memory with indication if a machine check happened + * +diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c +index 79d8ec849468..acccebcc836d 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -41,6 +41,7 @@ + #include <linux/debugfs.h> + #include <linux/irq_work.h> + #include <linux/export.h> ++#include <linux/jump_label.h> + + #include <asm/processor.h> + #include <asm/traps.h> +@@ -2080,6 +2081,7 @@ void mce_disable_bank(int bank) + * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. + * mce=nobootlog Don't log MCEs from before booting. + * mce=bios_cmci_threshold Don't program the CMCI threshold ++ * mce=recovery force enable memcpy_mcsafe() + */ + static int __init mcheck_enable(char *str) + { +@@ -2676,8 +2678,14 @@ static int __init mcheck_debugfs_init(void) + static int __init mcheck_debugfs_init(void) { return -EINVAL; } + #endif + ++DEFINE_STATIC_KEY_FALSE(mcsafe_key); ++EXPORT_SYMBOL_GPL(mcsafe_key); ++ + static int __init mcheck_late_init(void) + { ++ if (mca_cfg.recovery) ++ static_branch_inc(&mcsafe_key); ++ + mcheck_debugfs_init(); + + /* +diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c +index cc457ff818ad..51402a7e4ca6 100644 +--- a/arch/x86/kernel/quirks.c ++++ b/arch/x86/kernel/quirks.c +@@ -626,3 +626,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, + amd_disable_seq_and_redirect_scrub); + + #endif ++ ++#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) ++#include <linux/jump_label.h> ++#include <asm/string_64.h> ++ ++/* Ivy Bridge, Haswell, Broadwell */ ++static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) ++{ ++ u32 capid0; ++ ++ pci_read_config_dword(pdev, 0x84, &capid0); ++ ++ if (capid0 & 0x10) ++ static_branch_inc(&mcsafe_key); ++} ++ ++/* Skylake */ ++static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) ++{ ++ u32 capid0; ++ ++ pci_read_config_dword(pdev, 0x84, &capid0); ++ ++ if ((capid0 & 0xc0) == 0xc0) ++ static_branch_inc(&mcsafe_key); ++} ++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); ++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap); ++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); ++DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); ++#endif +-- +2.15.0 + diff --git a/queue/x86-mce-Drop-X86_FEATURE_MCE_RECOVERY-and-the-relate.patch b/queue/x86-mce-Drop-X86_FEATURE_MCE_RECOVERY-and-the-relate.patch new file mode 100644 index 0000000..caca953 --- /dev/null +++ b/queue/x86-mce-Drop-X86_FEATURE_MCE_RECOVERY-and-the-relate.patch @@ -0,0 +1,57 @@ +From ffb173e657fa8123bffa2a169e124b4bca0b5bc4 Mon Sep 17 00:00:00 2001 +From: Tony Luck <tony.luck@intel.com> +Date: Thu, 1 Sep 2016 11:39:33 -0700 +Subject: [PATCH] x86/mce: Drop X86_FEATURE_MCE_RECOVERY and the related model + string test + +commit ffb173e657fa8123bffa2a169e124b4bca0b5bc4 upstream. + +We now have a better way to determine if we are running on a cpu that +supports machine check recovery. Free up this feature bit. + +Signed-off-by: Tony Luck <tony.luck@intel.com> +Acked-by: Borislav Petkov <bp@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Boris Petkov <bp@suse.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Link: http://lkml.kernel.org/r/d5db39e08d46cf1012d94d3902275d08ba931926.1472754712.git.tony.luck@intel.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 92a8308b96f6..1188bc849ee3 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -106,7 +106,6 @@ + #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ + #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ + #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ +-#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ + + /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ + #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ +diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c +index acccebcc836d..7f3f0e147242 100644 +--- a/arch/x86/kernel/cpu/mcheck/mce.c ++++ b/arch/x86/kernel/cpu/mcheck/mce.c +@@ -1634,17 +1634,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) + + if (c->x86 == 6 && c->x86_model == 45) + quirk_no_way_out = quirk_sandybridge_ifu; +- /* +- * MCG_CAP.MCG_SER_P is necessary but not sufficient to know +- * whether this processor will actually generate recoverable +- * machine checks. Check to see if this is an E7 model Xeon. +- * We can't do a model number check because E5 and E7 use the +- * same model number. E5 doesn't support recovery, E7 does. +- */ +- if (mca_cfg.recovery || (mca_cfg.ser && +- !strncmp(c->x86_model_id, +- "Intel(R) Xeon(R) CPU E7-", 24))) +- set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY); + } + if (cfg->monarch_timeout < 0) + cfg->monarch_timeout = 0; +-- +2.15.0 + diff --git a/queue/x86-mce-Improve-memcpy_mcsafe.patch b/queue/x86-mce-Improve-memcpy_mcsafe.patch new file mode 100644 index 0000000..2d865ec --- /dev/null +++ b/queue/x86-mce-Improve-memcpy_mcsafe.patch @@ -0,0 +1,120 @@ +From 9a6fb28a355d2609ace4dab4e6425442c647894d Mon Sep 17 00:00:00 2001 +From: Tony Luck <tony.luck@intel.com> +Date: Thu, 1 Sep 2016 11:39:33 -0700 +Subject: [PATCH] x86/mce: Improve memcpy_mcsafe() + +commit 9a6fb28a355d2609ace4dab4e6425442c647894d upstream. + +Use the mcsafe_key defined in the previous patch to make decisions on which +copy function to use. We can't use the FEATURE bit any more because PCI +quirks run too late to affect the patching of code. So we use a static key. + +Turn memcpy_mcsafe() into an inline function to make life easier for +callers. The assembly code that actually does the copy is now named +memcpy_mcsafe_unrolled() + +Signed-off-by: Tony Luck <tony.luck@intel.com> +Acked-by: Borislav Petkov <bp@suse.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Boris Petkov <bp@suse.de> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Link: http://lkml.kernel.org/r/bfde2fc774e94f53d91b70a4321c85a0d33e7118.1472754712.git.tony.luck@intel.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h +index 643eba42d620..2c1ebeb4d737 100644 +--- a/arch/x86/include/asm/pmem.h ++++ b/arch/x86/include/asm/pmem.h +@@ -46,10 +46,7 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) + + static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) + { +- if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) +- return memcpy_mcsafe(dst, src, n); +- memcpy(dst, src, n); +- return 0; ++ return memcpy_mcsafe(dst, src, n); + } + + /** +diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h +index 877a1dfbf770..a164862d77e3 100644 +--- a/arch/x86/include/asm/string_64.h ++++ b/arch/x86/include/asm/string_64.h +@@ -79,6 +79,7 @@ int strcmp(const char *cs, const char *ct); + #define memset(s, c, n) __memset(s, c, n) + #endif + ++__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); + DECLARE_STATIC_KEY_FALSE(mcsafe_key); + + /** +@@ -89,10 +90,23 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key); + * @cnt: number of bytes to copy + * + * Low level memory copy function that catches machine checks ++ * We only call into the "safe" function on systems that can ++ * actually do machine check recovery. Everyone else can just ++ * use memcpy(). + * + * Return 0 for success, -EFAULT for fail + */ +-int memcpy_mcsafe(void *dst, const void *src, size_t cnt); ++static __always_inline __must_check int ++memcpy_mcsafe(void *dst, const void *src, size_t cnt) ++{ ++#ifdef CONFIG_X86_MCE ++ if (static_branch_unlikely(&mcsafe_key)) ++ return memcpy_mcsafe_unrolled(dst, src, cnt); ++ else ++#endif ++ memcpy(dst, src, cnt); ++ return 0; ++} + + #endif /* __KERNEL__ */ + +diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c +index 95e49f6e4fc3..b2cee3d19477 100644 +--- a/arch/x86/kernel/x8664_ksyms_64.c ++++ b/arch/x86/kernel/x8664_ksyms_64.c +@@ -38,7 +38,7 @@ EXPORT_SYMBOL(__copy_user_nocache); + EXPORT_SYMBOL(_copy_from_user); + EXPORT_SYMBOL(_copy_to_user); + +-EXPORT_SYMBOL_GPL(memcpy_mcsafe); ++EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled); + + EXPORT_SYMBOL(copy_page); + EXPORT_SYMBOL(clear_page); +diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S +index 2ec0b0abbfaa..49e6ebac7e73 100644 +--- a/arch/x86/lib/memcpy_64.S ++++ b/arch/x86/lib/memcpy_64.S +@@ -181,11 +181,11 @@ ENDPROC(memcpy_orig) + + #ifndef CONFIG_UML + /* +- * memcpy_mcsafe - memory copy with machine check exception handling ++ * memcpy_mcsafe_unrolled - memory copy with machine check exception handling + * Note that we only catch machine checks when reading the source addresses. + * Writes to target are posted and don't generate machine checks. + */ +-ENTRY(memcpy_mcsafe) ++ENTRY(memcpy_mcsafe_unrolled) + cmpl $8, %edx + /* Less than 8 bytes? Go to byte copy loop */ + jb .L_no_whole_words +@@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe) + .L_done_memcpy_trap: + xorq %rax, %rax + ret +-ENDPROC(memcpy_mcsafe) ++ENDPROC(memcpy_mcsafe_unrolled) + + .section .fixup, "ax" + /* Return -EFAULT for any failure */ +-- +2.15.0 + diff --git a/queue/x86-mm-64-Enable-vmapped-stacks-CONFIG_HAVE_ARCH_VMA.patch b/queue/x86-mm-64-Enable-vmapped-stacks-CONFIG_HAVE_ARCH_VMA.patch new file mode 100644 index 0000000..457ae3b --- /dev/null +++ b/queue/x86-mm-64-Enable-vmapped-stacks-CONFIG_HAVE_ARCH_VMA.patch @@ -0,0 +1,234 @@ +From e37e43a497d5a8b7c0cc1736d56986f432c394c9 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski <luto@kernel.org> +Date: Thu, 11 Aug 2016 02:35:23 -0700 +Subject: [PATCH] x86/mm/64: Enable vmapped stacks + (CONFIG_HAVE_ARCH_VMAP_STACK=y) + +commit e37e43a497d5a8b7c0cc1736d56986f432c394c9 upstream. + +This allows x86_64 kernels to enable vmapped stacks by setting +HAVE_ARCH_VMAP_STACK=y - which enables the CONFIG_VMAP_STACK=y +high level Kconfig option. + +There are a couple of interesting bits: + +First, x86 lazily faults in top-level paging entries for the vmalloc +area. This won't work if we get a page fault while trying to access +the stack: the CPU will promote it to a double-fault and we'll die. +To avoid this problem, probe the new stack when switching stacks and +forcibly populate the pgd entry for the stack when switching mms. + +Second, once we have guard pages around the stack, we'll want to +detect and handle stack overflow. + +I didn't enable it on x86_32. We'd need to rework the double-fault +code a bit and I'm concerned about running out of vmalloc virtual +addresses under some workloads. + +This patch, by itself, will behave somewhat erratically when the +stack overflows while RSP is still more than a few tens of bytes +above the bottom of the stack. Specifically, we'll get #PF and make +it to no_context and them oops without reliably triggering a +double-fault, and no_context doesn't know about stack overflows. +The next patch will improve that case. + +Thank you to Nadav and Brian for helping me pay enough attention to +the SDM to hopefully get this right. + +Signed-off-by: Andy Lutomirski <luto@kernel.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Brian Gerst <brgerst@gmail.com> +Cc: Denys Vlasenko <dvlasenk@redhat.com> +Cc: H. Peter Anvin <hpa@zytor.com> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Nadav Amit <nadav.amit@gmail.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/c88f3e2920b18e6cc621d772a04a62c06869037e.1470907718.git.luto@kernel.org +[ Minor edits. ] +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index c580d8c33562..21a6d0ec5983 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -94,6 +94,7 @@ config X86 + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_WITHIN_STACK_FRAMES + select HAVE_EBPF_JIT if X86_64 ++ select HAVE_ARCH_VMAP_STACK if X86_64 + select HAVE_CC_STACKPROTECTOR + select HAVE_CMPXCHG_DOUBLE + select HAVE_CMPXCHG_LOCAL +diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h +index 8f321a1b03a1..14e4b20f0aaf 100644 +--- a/arch/x86/include/asm/switch_to.h ++++ b/arch/x86/include/asm/switch_to.h +@@ -8,6 +8,28 @@ struct tss_struct; + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss); + ++/* This runs runs on the previous thread's stack. */ ++static inline void prepare_switch_to(struct task_struct *prev, ++ struct task_struct *next) ++{ ++#ifdef CONFIG_VMAP_STACK ++ /* ++ * If we switch to a stack that has a top-level paging entry ++ * that is not present in the current mm, the resulting #PF will ++ * will be promoted to a double-fault and we'll panic. Probe ++ * the new stack now so that vmalloc_fault can fix up the page ++ * tables if needed. This can only happen if we use a stack ++ * in vmap space. ++ * ++ * We assume that the stack is aligned so that it never spans ++ * more than one top-level paging entry. ++ * ++ * To minimize cache pollution, just follow the stack pointer. ++ */ ++ READ_ONCE(*(unsigned char *)next->thread.sp); ++#endif ++} ++ + #ifdef CONFIG_X86_32 + + #ifdef CONFIG_CC_STACKPROTECTOR +@@ -39,6 +61,8 @@ do { \ + */ \ + unsigned long ebx, ecx, edx, esi, edi; \ + \ ++ prepare_switch_to(prev, next); \ ++ \ + asm volatile("pushl %%ebp\n\t" /* save EBP */ \ + "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ +@@ -103,7 +127,9 @@ do { \ + * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL + * has no effect. + */ +-#define switch_to(prev, next, last) \ ++#define switch_to(prev, next, last) \ ++ prepare_switch_to(prev, next); \ ++ \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +index b70ca12dd389..907b4e4aeb5e 100644 +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) + DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) + DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) + ++#ifdef CONFIG_VMAP_STACK ++static void __noreturn handle_stack_overflow(const char *message, ++ struct pt_regs *regs, ++ unsigned long fault_address) ++{ ++ printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n", ++ (void *)fault_address, current->stack, ++ (char *)current->stack + THREAD_SIZE - 1); ++ die(message, regs, 0); ++ ++ /* Be absolutely certain we don't return. */ ++ panic(message); ++} ++#endif ++ + #ifdef CONFIG_X86_64 + /* Runs on IST stack */ + dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) + { + static const char str[] = "double fault"; + struct task_struct *tsk = current; ++#ifdef CONFIG_VMAP_STACK ++ unsigned long cr2; ++#endif + + #ifdef CONFIG_X86_ESPFIX64 + extern unsigned char native_irq_return_iret[]; +@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = X86_TRAP_DF; + ++#ifdef CONFIG_VMAP_STACK ++ /* ++ * If we overflow the stack into a guard page, the CPU will fail ++ * to deliver #PF and will send #DF instead. Similarly, if we ++ * take any non-IST exception while too close to the bottom of ++ * the stack, the processor will get a page fault while ++ * delivering the exception and will generate a double fault. ++ * ++ * According to the SDM (footnote in 6.15 under "Interrupt 14 - ++ * Page-Fault Exception (#PF): ++ * ++ * Processors update CR2 whenever a page fault is detected. If a ++ * second page fault occurs while an earlier page fault is being ++ * deliv- ered, the faulting linear address of the second fault will ++ * overwrite the contents of CR2 (replacing the previous ++ * address). These updates to CR2 occur even if the page fault ++ * results in a double fault or occurs during the delivery of a ++ * double fault. ++ * ++ * The logic below has a small possibility of incorrectly diagnosing ++ * some errors as stack overflows. For example, if the IDT or GDT ++ * gets corrupted such that #GP delivery fails due to a bad descriptor ++ * causing #GP and we hit this condition while CR2 coincidentally ++ * points to the stack guard page, we'll think we overflowed the ++ * stack. Given that we're going to panic one way or another ++ * if this happens, this isn't necessarily worth fixing. ++ * ++ * If necessary, we could improve the test by only diagnosing ++ * a stack overflow if the saved RSP points within 47 bytes of ++ * the bottom of the stack: if RSP == tsk_stack + 48 and we ++ * take an exception, the stack is already aligned and there ++ * will be enough room SS, RSP, RFLAGS, CS, RIP, and a ++ * possible error code, so a stack overflow would *not* double ++ * fault. With any less space left, exception delivery could ++ * fail, and, as a practical matter, we've overflowed the ++ * stack even if the actual trigger for the double fault was ++ * something else. ++ */ ++ cr2 = read_cr2(); ++ if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE) ++ handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2); ++#endif ++ + #ifdef CONFIG_DOUBLEFAULT + df_debug(regs, error_code); + #endif +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 4dbe65622810..a7655f6caf7d 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + unsigned cpu = smp_processor_id(); + + if (likely(prev != next)) { ++ if (IS_ENABLED(CONFIG_VMAP_STACK)) { ++ /* ++ * If our current stack is in vmalloc space and isn't ++ * mapped in the new pgd, we'll double-fault. Forcibly ++ * map it. ++ */ ++ unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); ++ ++ pgd_t *pgd = next->pgd + stack_pgd_index; ++ ++ if (unlikely(pgd_none(*pgd))) ++ set_pgd(pgd, init_mm.pgd[stack_pgd_index]); ++ } ++ + #ifdef CONFIG_SMP + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); + this_cpu_write(cpu_tlbstate.active_mm, next); + #endif ++ + cpumask_set_cpu(cpu, mm_cpumask(next)); + + /* +-- +2.15.0 + diff --git a/queue/x86-move-exports-to-actual-definitions.patch b/queue/x86-move-exports-to-actual-definitions.patch new file mode 100644 index 0000000..d822535 --- /dev/null +++ b/queue/x86-move-exports-to-actual-definitions.patch @@ -0,0 +1,723 @@ +From b0b9d354f5f52a5bc96c4a8715b69be17729d3b5 Mon Sep 17 00:00:00 2001 +From: Al Viro <viro@zeniv.linux.org.uk> +Date: Mon, 11 Jan 2016 11:04:34 -0500 +Subject: [PATCH] x86: move exports to actual definitions + +commit 784d5699eddc55878627da20d3fe0c8542e2f1a2 upstream. + +Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index 7b52e50863ff..edba8606b99a 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -44,6 +44,7 @@ + #include <asm/alternative-asm.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/export.h> + + .section .entry.text, "ax" + +@@ -991,6 +992,7 @@ trace: + jmp ftrace_stub + END(mcount) + #endif /* CONFIG_DYNAMIC_FTRACE */ ++EXPORT_SYMBOL(mcount) + #endif /* CONFIG_FUNCTION_TRACER */ + + #ifdef CONFIG_FUNCTION_GRAPH_TRACER +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index c98ec2efd750..ef766a358b37 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -35,6 +35,7 @@ + #include <asm/asm.h> + #include <asm/smap.h> + #include <asm/pgtable_types.h> ++#include <asm/export.h> + #include <linux/err.h> + + /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ +@@ -875,6 +876,7 @@ ENTRY(native_load_gs_index) + popfq + ret + END(native_load_gs_index) ++EXPORT_SYMBOL(native_load_gs_index) + + _ASM_EXTABLE(.Lgs_change, bad_gs) + .section .fixup, "ax" +diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S +index e5a17114a8c4..fee6bc79b987 100644 +--- a/arch/x86/entry/thunk_32.S ++++ b/arch/x86/entry/thunk_32.S +@@ -6,6 +6,7 @@ + */ + #include <linux/linkage.h> + #include <asm/asm.h> ++ #include <asm/export.h> + + /* put return address in eax (arg1) */ + .macro THUNK name, func, put_ret_addr_in_eax=0 +@@ -36,5 +37,7 @@ + #ifdef CONFIG_PREEMPT + THUNK ___preempt_schedule, preempt_schedule + THUNK ___preempt_schedule_notrace, preempt_schedule_notrace ++ EXPORT_SYMBOL(___preempt_schedule) ++ EXPORT_SYMBOL(___preempt_schedule_notrace) + #endif + +diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S +index 627ecbcb2e62..be36bf4e0957 100644 +--- a/arch/x86/entry/thunk_64.S ++++ b/arch/x86/entry/thunk_64.S +@@ -8,6 +8,7 @@ + #include <linux/linkage.h> + #include "calling.h" + #include <asm/asm.h> ++#include <asm/export.h> + + /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ + .macro THUNK name, func, put_ret_addr_in_rdi=0 +@@ -49,6 +50,8 @@ + #ifdef CONFIG_PREEMPT + THUNK ___preempt_schedule, preempt_schedule + THUNK ___preempt_schedule_notrace, preempt_schedule_notrace ++ EXPORT_SYMBOL(___preempt_schedule) ++ EXPORT_SYMBOL(___preempt_schedule_notrace) + #endif + + #if defined(CONFIG_TRACE_IRQFLAGS) \ +diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h +new file mode 100644 +index 000000000000..138de56b13eb +--- /dev/null ++++ b/arch/x86/include/asm/export.h +@@ -0,0 +1,4 @@ ++#ifdef CONFIG_64BIT ++#define KSYM_ALIGN 16 ++#endif ++#include <asm-generic/export.h> +diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile +index 0503f5bfb18d..d3f49c3d5aca 100644 +--- a/arch/x86/kernel/Makefile ++++ b/arch/x86/kernel/Makefile +@@ -46,9 +46,7 @@ obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o + obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o + obj-$(CONFIG_IRQ_WORK) += irq_work.o + obj-y += probe_roms.o +-obj-$(CONFIG_X86_32) += i386_ksyms_32.o +-obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o +-obj-$(CONFIG_X86_64) += mcount_64.o ++obj-$(CONFIG_X86_64) += sys_x86_64.o mcount_64.o + obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o + obj-$(CONFIG_SYSFS) += ksysfs.o + obj-y += bootflag.o e820.o +diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S +index 6f8902b0d151..4707baf94203 100644 +--- a/arch/x86/kernel/head_32.S ++++ b/arch/x86/kernel/head_32.S +@@ -23,6 +23,7 @@ + #include <asm/percpu.h> + #include <asm/nops.h> + #include <asm/bootparam.h> ++#include <asm/export.h> + + /* Physical address */ + #define pa(X) ((X) - __PAGE_OFFSET) +@@ -673,6 +674,7 @@ ENTRY(empty_zero_page) + .fill 4096,1,0 + ENTRY(swapper_pg_dir) + .fill 1024,4,0 ++EXPORT_SYMBOL(empty_zero_page) + + /* + * This starts the data section. +diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +index 9f8efc9f0075..537d913f45ec 100644 +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -21,6 +21,7 @@ + #include <asm/percpu.h> + #include <asm/nops.h> + #include "../entry/calling.h" ++#include <asm/export.h> + + #ifdef CONFIG_PARAVIRT + #include <asm/asm-offsets.h> +@@ -488,10 +489,12 @@ early_gdt_descr_base: + ENTRY(phys_base) + /* This must match the first entry in level2_kernel_pgt */ + .quad 0x0000000000000000 ++EXPORT_SYMBOL(phys_base) + + #include "../../x86/xen/xen-head.S" + + __PAGE_ALIGNED_BSS + NEXT_PAGE(empty_zero_page) + .skip PAGE_SIZE ++EXPORT_SYMBOL(empty_zero_page) + +diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c +deleted file mode 100644 +index 1f9b878ef5ef..000000000000 +--- a/arch/x86/kernel/i386_ksyms_32.c ++++ /dev/null +@@ -1,47 +0,0 @@ +-#include <linux/export.h> +-#include <linux/spinlock_types.h> +- +-#include <asm/checksum.h> +-#include <asm/pgtable.h> +-#include <asm/desc.h> +-#include <asm/ftrace.h> +- +-#ifdef CONFIG_FUNCTION_TRACER +-/* mcount is defined in assembly */ +-EXPORT_SYMBOL(mcount); +-#endif +- +-/* +- * Note, this is a prototype to get at the symbol for +- * the export, but dont use it from C code, it is used +- * by assembly code and is not using C calling convention! +- */ +-#ifndef CONFIG_X86_CMPXCHG64 +-extern void cmpxchg8b_emu(void); +-EXPORT_SYMBOL(cmpxchg8b_emu); +-#endif +- +-/* Networking helper routines. */ +-EXPORT_SYMBOL(csum_partial_copy_generic); +- +-EXPORT_SYMBOL(__get_user_1); +-EXPORT_SYMBOL(__get_user_2); +-EXPORT_SYMBOL(__get_user_4); +-EXPORT_SYMBOL(__get_user_8); +- +-EXPORT_SYMBOL(__put_user_1); +-EXPORT_SYMBOL(__put_user_2); +-EXPORT_SYMBOL(__put_user_4); +-EXPORT_SYMBOL(__put_user_8); +- +-EXPORT_SYMBOL(strstr); +- +-EXPORT_SYMBOL(csum_partial); +-EXPORT_SYMBOL(empty_zero_page); +- +-#ifdef CONFIG_PREEMPT +-EXPORT_SYMBOL(___preempt_schedule); +-EXPORT_SYMBOL(___preempt_schedule_notrace); +-#endif +- +-EXPORT_SYMBOL(__sw_hweight32); +diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S +index 61924222a9e1..efe73aacf966 100644 +--- a/arch/x86/kernel/mcount_64.S ++++ b/arch/x86/kernel/mcount_64.S +@@ -7,6 +7,7 @@ + #include <linux/linkage.h> + #include <asm/ptrace.h> + #include <asm/ftrace.h> ++#include <asm/export.h> + + + .code64 +@@ -294,6 +295,7 @@ trace: + jmp fgraph_trace + END(function_hook) + #endif /* CONFIG_DYNAMIC_FTRACE */ ++EXPORT_SYMBOL(function_hook) + #endif /* CONFIG_FUNCTION_TRACER */ + + #ifdef CONFIG_FUNCTION_GRAPH_TRACER +diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c +deleted file mode 100644 +index b2cee3d19477..000000000000 +--- a/arch/x86/kernel/x8664_ksyms_64.c ++++ /dev/null +@@ -1,85 +0,0 @@ +-/* Exports for assembly files. +- All C exports should go in the respective C files. */ +- +-#include <linux/export.h> +-#include <linux/spinlock_types.h> +-#include <linux/smp.h> +- +-#include <net/checksum.h> +- +-#include <asm/processor.h> +-#include <asm/pgtable.h> +-#include <asm/uaccess.h> +-#include <asm/desc.h> +-#include <asm/ftrace.h> +- +-#ifdef CONFIG_FUNCTION_TRACER +-/* mcount and __fentry__ are defined in assembly */ +-#ifdef CC_USING_FENTRY +-EXPORT_SYMBOL(__fentry__); +-#else +-EXPORT_SYMBOL(mcount); +-#endif +-#endif +- +-EXPORT_SYMBOL(__get_user_1); +-EXPORT_SYMBOL(__get_user_2); +-EXPORT_SYMBOL(__get_user_4); +-EXPORT_SYMBOL(__get_user_8); +-EXPORT_SYMBOL(__put_user_1); +-EXPORT_SYMBOL(__put_user_2); +-EXPORT_SYMBOL(__put_user_4); +-EXPORT_SYMBOL(__put_user_8); +- +-EXPORT_SYMBOL(copy_user_generic_string); +-EXPORT_SYMBOL(copy_user_generic_unrolled); +-EXPORT_SYMBOL(copy_user_enhanced_fast_string); +-EXPORT_SYMBOL(__copy_user_nocache); +-EXPORT_SYMBOL(_copy_from_user); +-EXPORT_SYMBOL(_copy_to_user); +- +-EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled); +- +-EXPORT_SYMBOL(copy_page); +-EXPORT_SYMBOL(clear_page); +- +-EXPORT_SYMBOL(csum_partial); +- +-EXPORT_SYMBOL(__sw_hweight32); +-EXPORT_SYMBOL(__sw_hweight64); +- +-/* +- * Export string functions. We normally rely on gcc builtin for most of these, +- * but gcc sometimes decides not to inline them. +- */ +-#undef memcpy +-#undef memset +-#undef memmove +- +-extern void *__memset(void *, int, __kernel_size_t); +-extern void *__memcpy(void *, const void *, __kernel_size_t); +-extern void *__memmove(void *, const void *, __kernel_size_t); +-extern void *memset(void *, int, __kernel_size_t); +-extern void *memcpy(void *, const void *, __kernel_size_t); +-extern void *memmove(void *, const void *, __kernel_size_t); +- +-EXPORT_SYMBOL(__memset); +-EXPORT_SYMBOL(__memcpy); +-EXPORT_SYMBOL(__memmove); +- +-EXPORT_SYMBOL(memset); +-EXPORT_SYMBOL(memcpy); +-EXPORT_SYMBOL(memmove); +- +-#ifndef CONFIG_DEBUG_VIRTUAL +-EXPORT_SYMBOL(phys_base); +-#endif +-EXPORT_SYMBOL(empty_zero_page); +-#ifndef CONFIG_PARAVIRT +-EXPORT_SYMBOL(native_load_gs_index); +-#endif +- +-#ifdef CONFIG_PREEMPT +-EXPORT_SYMBOL(___preempt_schedule); +-EXPORT_SYMBOL(___preempt_schedule_notrace); +-#endif +diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S +index c1e623209853..4d34bb548b41 100644 +--- a/arch/x86/lib/checksum_32.S ++++ b/arch/x86/lib/checksum_32.S +@@ -28,6 +28,7 @@ + #include <linux/linkage.h> + #include <asm/errno.h> + #include <asm/asm.h> ++#include <asm/export.h> + + /* + * computes a partial checksum, e.g. for TCP/UDP fragments +@@ -251,6 +252,7 @@ ENTRY(csum_partial) + ENDPROC(csum_partial) + + #endif ++EXPORT_SYMBOL(csum_partial) + + /* + unsigned int csum_partial_copy_generic (const char *src, char *dst, +@@ -490,3 +492,4 @@ ENDPROC(csum_partial_copy_generic) + #undef ROUND1 + + #endif ++EXPORT_SYMBOL(csum_partial_copy_generic) +diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S +index 65be7cfaf947..5e2af3a88cf5 100644 +--- a/arch/x86/lib/clear_page_64.S ++++ b/arch/x86/lib/clear_page_64.S +@@ -1,6 +1,7 @@ + #include <linux/linkage.h> + #include <asm/cpufeatures.h> + #include <asm/alternative-asm.h> ++#include <asm/export.h> + + /* + * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is +@@ -23,6 +24,7 @@ ENTRY(clear_page) + rep stosq + ret + ENDPROC(clear_page) ++EXPORT_SYMBOL(clear_page) + + ENTRY(clear_page_orig) + +diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S +index ad5349778490..03a186fc06ea 100644 +--- a/arch/x86/lib/cmpxchg8b_emu.S ++++ b/arch/x86/lib/cmpxchg8b_emu.S +@@ -7,6 +7,7 @@ + */ + + #include <linux/linkage.h> ++#include <asm/export.h> + + .text + +@@ -48,3 +49,4 @@ ENTRY(cmpxchg8b_emu) + ret + + ENDPROC(cmpxchg8b_emu) ++EXPORT_SYMBOL(cmpxchg8b_emu) +diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S +index 24ef1c2104d4..e8508156c99d 100644 +--- a/arch/x86/lib/copy_page_64.S ++++ b/arch/x86/lib/copy_page_64.S +@@ -3,6 +3,7 @@ + #include <linux/linkage.h> + #include <asm/cpufeatures.h> + #include <asm/alternative-asm.h> ++#include <asm/export.h> + + /* + * Some CPUs run faster using the string copy instructions (sane microcode). +@@ -17,6 +18,7 @@ ENTRY(copy_page) + rep movsq + ret + ENDPROC(copy_page) ++EXPORT_SYMBOL(copy_page) + + ENTRY(copy_page_regs) + subq $2*8, %rsp +diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S +index bf603ebbfd8e..d376e4b48f88 100644 +--- a/arch/x86/lib/copy_user_64.S ++++ b/arch/x86/lib/copy_user_64.S +@@ -14,6 +14,7 @@ + #include <asm/alternative-asm.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/export.h> + + /* Standard copy_to_user with segment limit checking */ + ENTRY(_copy_to_user) +@@ -29,6 +30,7 @@ ENTRY(_copy_to_user) + "jmp copy_user_enhanced_fast_string", \ + X86_FEATURE_ERMS + ENDPROC(_copy_to_user) ++EXPORT_SYMBOL(_copy_to_user) + + /* Standard copy_from_user with segment limit checking */ + ENTRY(_copy_from_user) +@@ -44,6 +46,8 @@ ENTRY(_copy_from_user) + "jmp copy_user_enhanced_fast_string", \ + X86_FEATURE_ERMS + ENDPROC(_copy_from_user) ++EXPORT_SYMBOL(_copy_from_user) ++ + + .section .fixup,"ax" + /* must zero dest */ +@@ -155,6 +159,7 @@ ENTRY(copy_user_generic_unrolled) + _ASM_EXTABLE(21b,50b) + _ASM_EXTABLE(22b,50b) + ENDPROC(copy_user_generic_unrolled) ++EXPORT_SYMBOL(copy_user_generic_unrolled) + + /* Some CPUs run faster using the string copy instructions. + * This is also a lot simpler. Use them when possible. +@@ -200,6 +205,7 @@ ENTRY(copy_user_generic_string) + _ASM_EXTABLE(1b,11b) + _ASM_EXTABLE(3b,12b) + ENDPROC(copy_user_generic_string) ++EXPORT_SYMBOL(copy_user_generic_string) + + /* + * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. +@@ -229,6 +235,7 @@ ENTRY(copy_user_enhanced_fast_string) + + _ASM_EXTABLE(1b,12b) + ENDPROC(copy_user_enhanced_fast_string) ++EXPORT_SYMBOL(copy_user_enhanced_fast_string) + + /* + * copy_user_nocache - Uncached memory copy with exception handling +@@ -379,3 +386,4 @@ ENTRY(__copy_user_nocache) + _ASM_EXTABLE(40b,.L_fixup_1b_copy) + _ASM_EXTABLE(41b,.L_fixup_1b_copy) + ENDPROC(__copy_user_nocache) ++EXPORT_SYMBOL(__copy_user_nocache) +diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c +index 9a7fe6a70491..378e5d5bf9b1 100644 +--- a/arch/x86/lib/csum-partial_64.c ++++ b/arch/x86/lib/csum-partial_64.c +@@ -135,6 +135,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) + return (__force __wsum)add32_with_carry(do_csum(buff, len), + (__force u32)sum); + } ++EXPORT_SYMBOL(csum_partial); + + /* + * this routine is used for miscellaneous IP-like checksums, mainly +diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S +index 0ef5128c2de8..37b62d412148 100644 +--- a/arch/x86/lib/getuser.S ++++ b/arch/x86/lib/getuser.S +@@ -32,6 +32,7 @@ + #include <asm/thread_info.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/export.h> + + .text + ENTRY(__get_user_1) +@@ -44,6 +45,7 @@ ENTRY(__get_user_1) + ASM_CLAC + ret + ENDPROC(__get_user_1) ++EXPORT_SYMBOL(__get_user_1) + + ENTRY(__get_user_2) + add $1,%_ASM_AX +@@ -57,6 +59,7 @@ ENTRY(__get_user_2) + ASM_CLAC + ret + ENDPROC(__get_user_2) ++EXPORT_SYMBOL(__get_user_2) + + ENTRY(__get_user_4) + add $3,%_ASM_AX +@@ -70,6 +73,7 @@ ENTRY(__get_user_4) + ASM_CLAC + ret + ENDPROC(__get_user_4) ++EXPORT_SYMBOL(__get_user_4) + + ENTRY(__get_user_8) + #ifdef CONFIG_X86_64 +@@ -97,6 +101,7 @@ ENTRY(__get_user_8) + ret + #endif + ENDPROC(__get_user_8) ++EXPORT_SYMBOL(__get_user_8) + + + bad_get_user: +diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S +index 8a602a1e404a..23d893cbc200 100644 +--- a/arch/x86/lib/hweight.S ++++ b/arch/x86/lib/hweight.S +@@ -1,4 +1,5 @@ + #include <linux/linkage.h> ++#include <asm/export.h> + + #include <asm/asm.h> + +@@ -32,6 +33,7 @@ ENTRY(__sw_hweight32) + __ASM_SIZE(pop,) %__ASM_REG(dx) + ret + ENDPROC(__sw_hweight32) ++EXPORT_SYMBOL(__sw_hweight32) + + ENTRY(__sw_hweight64) + #ifdef CONFIG_X86_64 +@@ -77,3 +79,4 @@ ENTRY(__sw_hweight64) + ret + #endif + ENDPROC(__sw_hweight64) ++EXPORT_SYMBOL(__sw_hweight64) +diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S +index 98dcc112b363..9a53a06e5a3e 100644 +--- a/arch/x86/lib/memcpy_64.S ++++ b/arch/x86/lib/memcpy_64.S +@@ -4,6 +4,7 @@ + #include <asm/errno.h> + #include <asm/cpufeatures.h> + #include <asm/alternative-asm.h> ++#include <asm/export.h> + + /* + * We build a jump to memcpy_orig by default which gets NOPped out on +@@ -40,6 +41,8 @@ ENTRY(memcpy) + ret + ENDPROC(memcpy) + ENDPROC(__memcpy) ++EXPORT_SYMBOL(memcpy) ++EXPORT_SYMBOL(__memcpy) + + /* + * memcpy_erms() - enhanced fast string memcpy. This is faster and +@@ -274,6 +277,7 @@ ENTRY(memcpy_mcsafe_unrolled) + xorq %rax, %rax + ret + ENDPROC(memcpy_mcsafe_unrolled) ++EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) + + .section .fixup, "ax" + /* Return -EFAULT for any failure */ +diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S +index 90ce01bee00c..15de86cd15b0 100644 +--- a/arch/x86/lib/memmove_64.S ++++ b/arch/x86/lib/memmove_64.S +@@ -8,6 +8,7 @@ + #include <linux/linkage.h> + #include <asm/cpufeatures.h> + #include <asm/alternative-asm.h> ++#include <asm/export.h> + + #undef memmove + +@@ -207,3 +208,5 @@ ENTRY(__memmove) + retq + ENDPROC(__memmove) + ENDPROC(memmove) ++EXPORT_SYMBOL(__memmove) ++EXPORT_SYMBOL(memmove) +diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S +index e1229ecd2a82..55b95db30a61 100644 +--- a/arch/x86/lib/memset_64.S ++++ b/arch/x86/lib/memset_64.S +@@ -3,6 +3,7 @@ + #include <linux/linkage.h> + #include <asm/cpufeatures.h> + #include <asm/alternative-asm.h> ++#include <asm/export.h> + + .weak memset + +@@ -43,6 +44,8 @@ ENTRY(__memset) + ret + ENDPROC(memset) + ENDPROC(__memset) ++EXPORT_SYMBOL(memset) ++EXPORT_SYMBOL(__memset) + + /* + * ISO C memset - set a memory block to a byte value. This function uses +diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S +index c891ece81e5b..cd5d716d2897 100644 +--- a/arch/x86/lib/putuser.S ++++ b/arch/x86/lib/putuser.S +@@ -15,6 +15,7 @@ + #include <asm/errno.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/export.h> + + + /* +@@ -43,6 +44,7 @@ ENTRY(__put_user_1) + xor %eax,%eax + EXIT + ENDPROC(__put_user_1) ++EXPORT_SYMBOL(__put_user_1) + + ENTRY(__put_user_2) + ENTER +@@ -55,6 +57,7 @@ ENTRY(__put_user_2) + xor %eax,%eax + EXIT + ENDPROC(__put_user_2) ++EXPORT_SYMBOL(__put_user_2) + + ENTRY(__put_user_4) + ENTER +@@ -67,6 +70,7 @@ ENTRY(__put_user_4) + xor %eax,%eax + EXIT + ENDPROC(__put_user_4) ++EXPORT_SYMBOL(__put_user_4) + + ENTRY(__put_user_8) + ENTER +@@ -82,6 +86,7 @@ ENTRY(__put_user_8) + xor %eax,%eax + EXIT + ENDPROC(__put_user_8) ++EXPORT_SYMBOL(__put_user_8) + + bad_put_user: + movl $-EFAULT,%eax +diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c +index 8e2d55f754bf..a03b1c750bfe 100644 +--- a/arch/x86/lib/strstr_32.c ++++ b/arch/x86/lib/strstr_32.c +@@ -1,4 +1,5 @@ + #include <linux/string.h> ++#include <linux/export.h> + + char *strstr(const char *cs, const char *ct) + { +@@ -28,4 +29,4 @@ __asm__ __volatile__( + : "dx", "di"); + return __res; + } +- ++EXPORT_SYMBOL(strstr); +diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile +index 3ee2bb6b440b..e7e7055a8658 100644 +--- a/arch/x86/um/Makefile ++++ b/arch/x86/um/Makefile +@@ -8,7 +8,7 @@ else + BITS := 64 + endif + +-obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ ++obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \ + ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ + stub_$(BITS).o stub_segv.o \ + sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ +diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S +index fa4b8b9841ff..b9933eb9274a 100644 +--- a/arch/x86/um/checksum_32.S ++++ b/arch/x86/um/checksum_32.S +@@ -27,6 +27,7 @@ + + #include <asm/errno.h> + #include <asm/asm.h> ++#include <asm/export.h> + + /* + * computes a partial checksum, e.g. for TCP/UDP fragments +@@ -214,3 +215,4 @@ csum_partial: + ret + + #endif ++ EXPORT_SYMBOL(csum_partial) +diff --git a/arch/x86/um/ksyms.c b/arch/x86/um/ksyms.c +deleted file mode 100644 +index 2e8f43ec6214..000000000000 +--- a/arch/x86/um/ksyms.c ++++ /dev/null +@@ -1,13 +0,0 @@ +-#include <linux/module.h> +-#include <asm/string.h> +-#include <asm/checksum.h> +- +-#ifndef CONFIG_X86_32 +-/*XXX: we need them because they would be exported by x86_64 */ +-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 +-EXPORT_SYMBOL(memcpy); +-#else +-EXPORT_SYMBOL(__memcpy); +-#endif +-#endif +-EXPORT_SYMBOL(csum_partial); +-- +2.15.0 + |