summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Gortmaker <paul.gortmaker@windriver.com>2018-02-23 12:39:07 -0500
committerPaul Gortmaker <paul.gortmaker@windriver.com>2018-02-23 12:39:07 -0500
commit3f691eccc8fa85d0011699ea40a8b852cc36e802 (patch)
treeae7a400517aadcd3afcd32f5a4cb2a1e1edea19f
parent6d9970971f8fd5ff1aa4a7378e09aec11a98d75b (diff)
downloadlongterm-queue-4.8-3f691eccc8fa85d0011699ea40a8b852cc36e802.tar.gz
patches taken directly from the stable-queue git repository
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
-rw-r--r--queue/array_index_nospec-sanitize-speculative-array-de-references.patch117
-rw-r--r--queue/documentation-document-array_index_nospec.patch125
-rw-r--r--queue/kaiser-add-nokaiser-boot-option-using-alternative.patch652
-rw-r--r--queue/kaiser-align-addition-to-x86-mm-makefile.patch26
-rw-r--r--queue/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch86
-rw-r--r--queue/kaiser-cleanups-while-trying-for-gold-link.patch134
-rw-r--r--queue/kaiser-delete-kaiser_real_switch-option.patch79
-rw-r--r--queue/kaiser-disabled-on-xen-pv.patch42
-rw-r--r--queue/kaiser-do-not-set-_page_nx-on-pgd_none.patch204
-rw-r--r--queue/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch53
-rw-r--r--queue/kaiser-enhanced-by-kernel-and-user-pcids.patch403
-rw-r--r--queue/kaiser-enomem-if-kaiser_pagetable_walk-null.patch52
-rw-r--r--queue/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch53
-rw-r--r--queue/kaiser-fix-compile-error-without-vsyscall.patch46
-rw-r--r--queue/kaiser-fix-intel_bts-perf-crashes.patch132
-rw-r--r--queue/kaiser-fix-perf-crashes.patch150
-rw-r--r--queue/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch72
-rw-r--r--queue/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch33
-rw-r--r--queue/kaiser-kaiser-depends-on-smp.patch54
-rw-r--r--queue/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch86
-rw-r--r--queue/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch50
-rw-r--r--queue/kaiser-kernel-address-isolation.patch979
-rw-r--r--queue/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch392
-rw-r--r--queue/kaiser-merged-update.patch1298
-rw-r--r--queue/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch66
-rw-r--r--queue/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch166
-rw-r--r--queue/kaiser-pcid-0-for-kernel-and-128-for-user.patch129
-rw-r--r--queue/kaiser-set-_page_nx-only-if-supported.patch118
-rw-r--r--queue/kaiser-stack-map-page_size-at-thread_size-page_size.patch139
-rw-r--r--queue/kaiser-tidied-up-asm-kaiser.h-somewhat.patch105
-rw-r--r--queue/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch50
-rw-r--r--queue/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch130
-rw-r--r--queue/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch116
-rw-r--r--queue/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch141
-rw-r--r--queue/kbuild-add-fno-stack-check-to-kernel-build-options.patch49
-rw-r--r--queue/kprobes-x86-blacklist-indirect-thunk-functions-for-kprobes.patch40
-rw-r--r--queue/kprobes-x86-disable-optimizing-on-the-function-jumps-to-indirect-thunk.patch80
-rw-r--r--queue/kpti-rename-to-page_table_isolation.patch329
-rw-r--r--queue/kpti-report-when-enabled.patch48
-rw-r--r--queue/kvm-vmx-make-indirect-call-speculation-safe.patch57
-rw-r--r--queue/kvm-x86-make-indirect-calls-in-emulator-speculation-safe.patch78
-rw-r--r--queue/map-the-vsyscall-page-with-_page_user.patch143
-rw-r--r--queue/mm-vmstat-make-nr_tlb_remote_flush_received-available-even-on-up.patch39
-rw-r--r--queue/module-add-retpoline-tag-to-vermagic.patch53
-rw-r--r--queue/module-retpoline-warn-about-missing-retpoline-in-module.patch149
-rw-r--r--queue/nl80211-sanitize-array-index-in-parse_txq_params.patch72
-rw-r--r--queue/objtool-allow-alternatives-to-be-ignored.patch163
-rw-r--r--queue/objtool-detect-jumps-to-retpoline-thunks.patch61
-rw-r--r--queue/objtool-fix-retpoline-support-for-pre-orc-objtool.patch45
-rw-r--r--queue/objtool-modules-discard-objtool-annotation-sections-for-modules.patch84
-rw-r--r--queue/retpoline-introduce-start-end-markers-of-indirect-thunk.patch71
-rw-r--r--queue/selftests-x86-add-test_vsyscall.patch569
-rw-r--r--queue/sysfs-cpu-add-vulnerability-folder.patch149
-rw-r--r--queue/sysfs-cpu-fix-typos-in-vulnerability-documentation.patch35
-rw-r--r--queue/vfs-fdtable-prevent-bounds-check-bypass-via-speculative-execution.patch54
-rw-r--r--queue/vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch72
-rw-r--r--queue/x86-alternatives-add-missing-n-at-end-of-alternative-inline-asm.patch56
-rw-r--r--queue/x86-alternatives-fix-optimize_nops-checking.patch53
-rw-r--r--queue/x86-asm-move-status-from-thread_struct-to-thread_info.patch172
-rw-r--r--queue/x86-asm-use-register-variable-to-get-stack-pointer-value.patch138
-rw-r--r--queue/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch178
-rw-r--r--queue/x86-bugs-drop-one-mitigation-from-dmesg.patch52
-rw-r--r--queue/x86-cpu-amd-make-lfence-a-serializing-instruction.patch66
-rw-r--r--queue/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch81
-rw-r--r--queue/x86-cpu-bugs-make-retpoline-module-warning-conditional.patch66
-rw-r--r--queue/x86-cpu-factor-out-application-of-forced-cpu-caps.patch79
-rw-r--r--queue/x86-cpu-implement-cpu-vulnerabilites-sysfs-functions.patch82
-rw-r--r--queue/x86-cpu-merge-bugs.c-and-bugs_64.c.patch136
-rw-r--r--queue/x86-cpu-x86-pti-do-not-enable-pti-on-amd-processors.patch46
-rw-r--r--queue/x86-cpufeature-blacklist-spec_ctrl-pred_cmd-on-early-spectre-v2-microcodes.patch167
-rw-r--r--queue/x86-cpufeature-move-processor-tracing-out-of-scattered-features.patch68
-rw-r--r--queue/x86-cpufeatures-add-amd-feature-bits-for-speculation-control.patch47
-rw-r--r--queue/x86-cpufeatures-add-cpuid_7_edx-cpuid-leaf.patch149
-rw-r--r--queue/x86-cpufeatures-add-intel-feature-bits-for-speculation-control.patch47
-rw-r--r--queue/x86-cpufeatures-add-x86_bug_cpu_insecure.patch73
-rw-r--r--queue/x86-cpufeatures-add-x86_bug_spectre_v.patch58
-rw-r--r--queue/x86-cpufeatures-clean-up-spectre-v2-related-cpuid-flags.patch171
-rw-r--r--queue/x86-cpufeatures-make-cpu-bugs-sticky.patch96
-rw-r--r--queue/x86-cpuid-fix-up-virtual-ibrs-ibpb-stibp-feature-bits-on-intel.patch122
-rw-r--r--queue/x86-documentation-add-pti-description.patch261
-rw-r--r--queue/x86-entry-64-push-extra-regs-right-away.patch46
-rw-r--r--queue/x86-entry-64-remove-the-syscall64-fast-path.patch202
-rw-r--r--queue/x86-get_user-use-pointer-masking-to-limit-speculation.patch98
-rw-r--r--queue/x86-implement-array_index_mask_nospec.patch66
-rw-r--r--queue/x86-introduce-__uaccess_begin_nospec-and-uaccess_try_nospec.patch80
-rw-r--r--queue/x86-introduce-barrier_nospec.patch66
-rw-r--r--queue/x86-kaiser-check-boottime-cmdline-params.patch123
-rw-r--r--queue/x86-kaiser-move-feature-detection-up.patch79
-rw-r--r--queue/x86-kaiser-reenable-paravirt.patch28
-rw-r--r--queue/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch97
-rw-r--r--queue/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch62
-rw-r--r--queue/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch43
-rw-r--r--queue/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch74
-rw-r--r--queue/x86-mm-disable-pcid-on-32-bit-kernels.patch78
-rw-r--r--queue/x86-mm-enable-cr4.pcide-on-supported-systems.patch108
-rw-r--r--queue/x86-mm-make-flush_tlb_mm_range-more-predictable.patch81
-rw-r--r--queue/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch105
-rw-r--r--queue/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch101
-rw-r--r--queue/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch305
-rw-r--r--queue/x86-msr-add-definitions-for-new-speculation-control-msrs.patch63
-rw-r--r--queue/x86-nospec-fix-header-guards-names.patch53
-rw-r--r--queue/x86-paravirt-dont-patch-flush_tlb_single.patch68
-rw-r--r--queue/x86-paravirt-remove-noreplace-paravirt-cmdline-option.patch91
-rw-r--r--queue/x86-pti-do-not-enable-pti-on-cpus-which-are-not-vulnerable-to-meltdown.patch112
-rw-r--r--queue/x86-pti-document-fix-wrong-index.patch32
-rw-r--r--queue/x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch76
-rw-r--r--queue/x86-pti-mark-constant-arrays-as-__initconst.patch53
-rw-r--r--queue/x86-pti-rename-bug_cpu_insecure-to-bug_cpu_meltdown.patch56
-rw-r--r--queue/x86-retpoline-add-initial-retpoline-support.patch359
-rw-r--r--queue/x86-retpoline-add-lfence-to-the-retpoline-rsb-filling-rsb-macros.patch91
-rw-r--r--queue/x86-retpoline-avoid-retpolines-for-built-in-__init-functions.patch50
-rw-r--r--queue/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch67
-rw-r--r--queue/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch125
-rw-r--r--queue/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch117
-rw-r--r--queue/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch190
-rw-r--r--queue/x86-retpoline-fill-rsb-on-context-switch-for-affected-cpus.patch170
-rw-r--r--queue/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch88
-rw-r--r--queue/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch76
-rw-r--r--queue/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch74
-rw-r--r--queue/x86-retpoline-optimize-inline-assembler-for-vmexit_fill_rsb.patch58
-rw-r--r--queue/x86-retpoline-remove-compile-time-warning.patch60
-rw-r--r--queue/x86-retpoline-remove-the-esp-rsp-thunk.patch58
-rw-r--r--queue/x86-retpoline-simplify-vmexit_fill_rsb.patch248
-rw-r--r--queue/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch60
-rw-r--r--queue/x86-smpboot-remove-stale-tlb-flush-invocations.patch65
-rw-r--r--queue/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch317
-rw-r--r--queue/x86-spectre-check-config_retpoline-in-command-line-parser.patch49
-rw-r--r--queue/x86-spectre-fix-spelling-mistake-vunerable-vulnerable.patch38
-rw-r--r--queue/x86-spectre-report-get_user-mitigation-for-spectre_v1.patch41
-rw-r--r--queue/x86-spectre-simplify-spectre_v2-command-line-parsing.patch138
-rw-r--r--queue/x86-speculation-add-basic-ibpb-indirect-branch-prediction-barrier-support.patch94
-rw-r--r--queue/x86-speculation-fix-typo-ibrs_att-which-should-be-ibrs_all.patch38
-rw-r--r--queue/x86-syscall-sanitize-syscall-table-de-references-under-speculation.patch61
-rw-r--r--queue/x86-tlb-drop-the-_gpl-from-the-cpu_tlbstate-export.patch48
-rw-r--r--queue/x86-uaccess-use-__uaccess_begin_nospec-and-uaccess_try_nospec.patch187
-rw-r--r--queue/x86-usercopy-replace-open-coded-stac-clac-with-__uaccess_-begin-end.patch70
-rw-r--r--queue/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch50
137 files changed, 16915 insertions, 0 deletions
diff --git a/queue/array_index_nospec-sanitize-speculative-array-de-references.patch b/queue/array_index_nospec-sanitize-speculative-array-de-references.patch
new file mode 100644
index 0000000..99fe9a2
--- /dev/null
+++ b/queue/array_index_nospec-sanitize-speculative-array-de-references.patch
@@ -0,0 +1,117 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:22 -0800
+Subject: array_index_nospec: Sanitize speculative array de-references
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit f3804203306e098dae9ca51540fcd5eb700d7f40)
+
+array_index_nospec() is proposed as a generic mechanism to mitigate
+against Spectre-variant-1 attacks, i.e. an attack that bypasses boundary
+checks via speculative execution. The array_index_nospec()
+implementation is expected to be safe for current generation CPUs across
+multiple architectures (ARM, x86).
+
+Based on an original implementation by Linus Torvalds, tweaked to remove
+speculative flows by Alexei Starovoitov, and tweaked again by Linus to
+introduce an x86 assembly implementation for the mask generation.
+
+Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
+Co-developed-by: Alexei Starovoitov <ast@kernel.org>
+Suggested-by: Cyril Novikov <cnovikov@lynx.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Russell King <linux@armlinux.org.uk>
+Cc: gregkh@linuxfoundation.org
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727414229.33451.18411580953862676575.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/nospec.h | 72 +++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 72 insertions(+)
+ create mode 100644 include/linux/nospec.h
+
+--- /dev/null
++++ b/include/linux/nospec.h
+@@ -0,0 +1,72 @@
++// SPDX-License-Identifier: GPL-2.0
++// Copyright(c) 2018 Linus Torvalds. All rights reserved.
++// Copyright(c) 2018 Alexei Starovoitov. All rights reserved.
++// Copyright(c) 2018 Intel Corporation. All rights reserved.
++
++#ifndef _LINUX_NOSPEC_H
++#define _LINUX_NOSPEC_H
++
++/**
++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * When @index is out of bounds (@index >= @size), the sign bit will be
++ * set. Extend the sign bit to all bits and invert, giving a result of
++ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
++ */
++#ifndef array_index_mask_nospec
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++ unsigned long size)
++{
++ /*
++ * Warn developers about inappropriate array_index_nospec() usage.
++ *
++ * Even if the CPU speculates past the WARN_ONCE branch, the
++ * sign bit of @index is taken into account when generating the
++ * mask.
++ *
++ * This warning is compiled out when the compiler can infer that
++ * @index and @size are less than LONG_MAX.
++ */
++ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
++ "array_index_nospec() limited to range of [0, LONG_MAX]\n"))
++ return 0;
++
++ /*
++ * Always calculate and emit the mask even if the compiler
++ * thinks the mask is not needed. The compiler does not take
++ * into account the value of @index under speculation.
++ */
++ OPTIMIZER_HIDE_VAR(index);
++ return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
++}
++#endif
++
++/*
++ * array_index_nospec - sanitize an array index after a bounds check
++ *
++ * For a code sequence like:
++ *
++ * if (index < size) {
++ * index = array_index_nospec(index, size);
++ * val = array[index];
++ * }
++ *
++ * ...if the CPU speculates past the bounds check then
++ * array_index_nospec() will clamp the index within the range of [0,
++ * size).
++ */
++#define array_index_nospec(index, size) \
++({ \
++ typeof(index) _i = (index); \
++ typeof(size) _s = (size); \
++ unsigned long _mask = array_index_mask_nospec(_i, _s); \
++ \
++ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
++ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
++ \
++ _i &= _mask; \
++ _i; \
++})
++#endif /* _LINUX_NOSPEC_H */
diff --git a/queue/documentation-document-array_index_nospec.patch b/queue/documentation-document-array_index_nospec.patch
new file mode 100644
index 0000000..5bad41f
--- /dev/null
+++ b/queue/documentation-document-array_index_nospec.patch
@@ -0,0 +1,125 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Mon, 29 Jan 2018 17:02:16 -0800
+Subject: Documentation: Document array_index_nospec
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+
+(cherry picked from commit f84a56f73dddaeac1dba8045b007f742f61cd2da)
+
+Document the rationale and usage of the new array_index_nospec() helper.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: linux-arch@vger.kernel.org
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: gregkh@linuxfoundation.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727413645.33451.15878817161436755393.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/speculation.txt | 90 ++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 90 insertions(+)
+ create mode 100644 Documentation/speculation.txt
+
+--- /dev/null
++++ b/Documentation/speculation.txt
+@@ -0,0 +1,90 @@
++This document explains potential effects of speculation, and how undesirable
++effects can be mitigated portably using common APIs.
++
++===========
++Speculation
++===========
++
++To improve performance and minimize average latencies, many contemporary CPUs
++employ speculative execution techniques such as branch prediction, performing
++work which may be discarded at a later stage.
++
++Typically speculative execution cannot be observed from architectural state,
++such as the contents of registers. However, in some cases it is possible to
++observe its impact on microarchitectural state, such as the presence or
++absence of data in caches. Such state may form side-channels which can be
++observed to extract secret information.
++
++For example, in the presence of branch prediction, it is possible for bounds
++checks to be ignored by code which is speculatively executed. Consider the
++following code:
++
++ int load_array(int *array, unsigned int index)
++ {
++ if (index >= MAX_ARRAY_ELEMS)
++ return 0;
++ else
++ return array[index];
++ }
++
++Which, on arm64, may be compiled to an assembly sequence such as:
++
++ CMP <index>, #MAX_ARRAY_ELEMS
++ B.LT less
++ MOV <returnval>, #0
++ RET
++ less:
++ LDR <returnval>, [<array>, <index>]
++ RET
++
++It is possible that a CPU mis-predicts the conditional branch, and
++speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This
++value will subsequently be discarded, but the speculated load may affect
++microarchitectural state which can be subsequently measured.
++
++More complex sequences involving multiple dependent memory accesses may
++result in sensitive information being leaked. Consider the following
++code, building on the prior example:
++
++ int load_dependent_arrays(int *arr1, int *arr2, int index)
++ {
++ int val1, val2,
++
++ val1 = load_array(arr1, index);
++ val2 = load_array(arr2, val1);
++
++ return val2;
++ }
++
++Under speculation, the first call to load_array() may return the value
++of an out-of-bounds address, while the second call will influence
++microarchitectural state dependent on this value. This may provide an
++arbitrary read primitive.
++
++====================================
++Mitigating speculation side-channels
++====================================
++
++The kernel provides a generic API to ensure that bounds checks are
++respected even under speculation. Architectures which are affected by
++speculation-based side-channels are expected to implement these
++primitives.
++
++The array_index_nospec() helper in <linux/nospec.h> can be used to
++prevent information from being leaked via side-channels.
++
++A call to array_index_nospec(index, size) returns a sanitized index
++value that is bounded to [0, size) even under cpu speculation
++conditions.
++
++This can be used to protect the earlier load_array() example:
++
++ int load_array(int *array, unsigned int index)
++ {
++ if (index >= MAX_ARRAY_ELEMS)
++ return 0;
++ else {
++ index = array_index_nospec(index, MAX_ARRAY_ELEMS);
++ return array[index];
++ }
++ }
diff --git a/queue/kaiser-add-nokaiser-boot-option-using-alternative.patch b/queue/kaiser-add-nokaiser-boot-option-using-alternative.patch
new file mode 100644
index 0000000..b8d1005
--- /dev/null
+++ b/queue/kaiser-add-nokaiser-boot-option-using-alternative.patch
@@ -0,0 +1,652 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 24 Sep 2017 16:59:49 -0700
+Subject: kaiser: add "nokaiser" boot option, using ALTERNATIVE
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Added "nokaiser" boot option: an early param like "noinvpcid".
+Most places now check int kaiser_enabled (#defined 0 when not
+CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S
+and entry_64_compat.S are using the ALTERNATIVE technique, which
+patches in the preferred instructions at runtime. That technique
+is tied to x86 cpu features, so X86_FEATURE_KAISER is fabricated.
+
+Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that,
+but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when
+nokaiser like when !CONFIG_KAISER, but not setting either when kaiser -
+neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL
+won't get set in some obscure corner, or something add PGE into CR4.
+By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled,
+all page table setup which uses pte_pfn() masks it out of the ptes.
+
+It's slightly shameful that the same declaration versus definition of
+kaiser_enabled appears in not one, not two, but in three header files
+(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way,
+than with #including any of those in any of the others; and did not
+feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes
+them all, so we shall hear about it if they get out of synch.
+
+Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER
+from kaiser.c; removed the unused native_get_normal_pgd(); removed
+the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some
+comments. But more interestingly, set CR4.PSE in secondary_startup_64:
+the manual is clear that it does not matter whether it's 0 or 1 when
+4-level-pts are enabled, but I was distracted to find cr4 different on
+BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask().
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt | 2 +
+ arch/x86/entry/entry_64.S | 15 ++++++-----
+ arch/x86/include/asm/cpufeatures.h | 3 ++
+ arch/x86/include/asm/kaiser.h | 27 +++++++++++++++------
+ arch/x86/include/asm/pgtable.h | 20 +++++++++++----
+ arch/x86/include/asm/pgtable_64.h | 13 +++-------
+ arch/x86/include/asm/pgtable_types.h | 4 ---
+ arch/x86/include/asm/tlbflush.h | 39 +++++++++++++++++++------------
+ arch/x86/kernel/cpu/common.c | 28 +++++++++++++++++++++-
+ arch/x86/kernel/espfix_64.c | 3 +-
+ arch/x86/kernel/head_64.S | 4 +--
+ arch/x86/mm/init.c | 2 -
+ arch/x86/mm/init_64.c | 10 +++++++
+ arch/x86/mm/kaiser.c | 26 +++++++++++++++++---
+ arch/x86/mm/pgtable.c | 8 +-----
+ arch/x86/mm/tlb.c | 4 ---
+ tools/arch/x86/include/asm/cpufeatures.h | 3 ++
+ 17 files changed, 146 insertions(+), 65 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2763,6 +2763,8 @@ bytes respectively. Such letter suffixes
+
+ nojitter [IA-64] Disables jitter checking for ITC timers.
+
++ nokaiser [X86-64] Disable KAISER isolation of kernel from user.
++
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+
+ no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1079,7 +1079,7 @@ ENTRY(paranoid_entry)
+ * unconditionally, but we need to find out whether the reverse
+ * should be done on return (conveyed to paranoid_exit in %ebx).
+ */
+- movq %cr3, %rax
++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ testl $KAISER_SHADOW_PGD_OFFSET, %eax
+ jz 2f
+ orl $2, %ebx
+@@ -1111,6 +1111,7 @@ ENTRY(paranoid_exit)
+ TRACE_IRQS_OFF_DEBUG
+ TRACE_IRQS_IRETQ_DEBUG
+ #ifdef CONFIG_KAISER
++ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz paranoid_exit_no_switch
+ SWITCH_USER_CR3
+@@ -1341,13 +1342,14 @@ ENTRY(nmi)
+ #ifdef CONFIG_KAISER
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+- movq %cr3, %rax
++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ orq x86_cr3_pcid_noflush, %rax
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ movq %rax, %cr3
++2:
+ #endif
+ call do_nmi
+
+@@ -1357,8 +1359,7 @@ ENTRY(nmi)
+ * kernel code that needs user CR3, but do we ever return
+ * to "user mode" where we need the kernel CR3?
+ */
+- popq %rax
+- mov %rax, %cr3
++ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+ #endif
+
+ /*
+@@ -1585,13 +1586,14 @@ end_repeat_nmi:
+ #ifdef CONFIG_KAISER
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+- movq %cr3, %rax
++ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ orq x86_cr3_pcid_noflush, %rax
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ movq %rax, %cr3
++2:
+ #endif
+
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+@@ -1603,8 +1605,7 @@ end_repeat_nmi:
+ * kernel code that needs user CR3, like just just before
+ * a sysret.
+ */
+- popq %rax
+- mov %rax, %cr3
++ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+ #endif
+
+ testl %ebx, %ebx /* swapgs needed? */
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -198,6 +198,9 @@
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+
++/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -46,28 +46,33 @@ movq \reg, %cr3
+ .endm
+
+ .macro SWITCH_KERNEL_CR3
+-pushq %rax
++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+ _SWITCH_TO_KERNEL_CR3 %rax
+ popq %rax
++8:
+ .endm
+
+ .macro SWITCH_USER_CR3
+-pushq %rax
++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+ _SWITCH_TO_USER_CR3 %rax %al
+ popq %rax
++8:
+ .endm
+
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++ALTERNATIVE "jmp 8f", \
++ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
++ X86_FEATURE_KAISER
+ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++8:
+ .endm
+
+ #else /* CONFIG_KAISER */
+
+-.macro SWITCH_KERNEL_CR3 reg
++.macro SWITCH_KERNEL_CR3
+ .endm
+-.macro SWITCH_USER_CR3 reg regb
++.macro SWITCH_USER_CR3
+ .endm
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_p
+
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
++extern int kaiser_enabled;
++#else
++#define kaiser_enabled 0
++#endif /* CONFIG_KAISER */
++
++/*
++ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
++ * so as to build with tests on kaiser_enabled instead of #ifdefs.
++ */
++
+ /**
+ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsign
+ */
+ extern void kaiser_init(void);
+
+-#endif /* CONFIG_KAISER */
+-
+ #endif /* __ASSEMBLY */
+
+ #endif /* _ASM_X86_KAISER_H */
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -18,6 +18,12 @@
+ #ifndef __ASSEMBLY__
+ #include <asm/x86_init.h>
+
++#ifdef CONFIG_KAISER
++extern int kaiser_enabled;
++#else
++#define kaiser_enabled 0
++#endif
++
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+ void ptdump_walk_pgd_level_checkwx(void);
+
+@@ -697,7 +703,7 @@ static inline int pgd_bad(pgd_t pgd)
+ * page table by accident; it will fault on the first
+ * instruction it tries to run. See native_set_pgd().
+ */
+- if (IS_ENABLED(CONFIG_KAISER))
++ if (kaiser_enabled)
+ ignore_flags |= _PAGE_NX;
+
+ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+@@ -913,12 +919,14 @@ static inline void pmdp_set_wrprotect(st
+ */
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+- memcpy(dst, src, count * sizeof(pgd_t));
++ memcpy(dst, src, count * sizeof(pgd_t));
+ #ifdef CONFIG_KAISER
+- /* Clone the shadow pgd part as well */
+- memcpy(native_get_shadow_pgd(dst),
+- native_get_shadow_pgd(src),
+- count * sizeof(pgd_t));
++ if (kaiser_enabled) {
++ /* Clone the shadow pgd part as well */
++ memcpy(native_get_shadow_pgd(dst),
++ native_get_shadow_pgd(src),
++ count * sizeof(pgd_t));
++ }
+ #endif
+ }
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t
+
+ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
++#ifdef CONFIG_DEBUG_VM
++ /* linux/mmdebug.h may not have been included at this point */
++ BUG_ON(!kaiser_enabled);
++#endif
+ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+ }
+-
+-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+-{
+- return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
+-}
+ #else
+ static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+@@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_p
+ BUILD_BUG_ON(1);
+ return NULL;
+ }
+-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+-{
+- return pgdp;
+-}
+ #endif /* CONFIG_KAISER */
+
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -45,11 +45,7 @@
+ #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#ifdef CONFIG_KAISER
+-#define _PAGE_GLOBAL (_AT(pteval_t, 0))
+-#else
+ #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+-#endif
+ #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+ #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -137,9 +137,11 @@ static inline void cr4_set_bits_and_upda
+ * to avoid the need for asm/kaiser.h in unexpected places.
+ */
+ #ifdef CONFIG_KAISER
++extern int kaiser_enabled;
+ extern void kaiser_setup_pcid(void);
+ extern void kaiser_flush_tlb_on_return_to_user(void);
+ #else
++#define kaiser_enabled 0
+ static inline void kaiser_setup_pcid(void)
+ {
+ }
+@@ -164,7 +166,7 @@ static inline void __native_flush_tlb(vo
+ * back:
+ */
+ preempt_disable();
+- if (this_cpu_has(X86_FEATURE_PCID))
++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ kaiser_flush_tlb_on_return_to_user();
+ native_write_cr3(native_read_cr3());
+ preempt_enable();
+@@ -175,20 +177,30 @@ static inline void __native_flush_tlb_gl
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+- /* clear PGE */
+- native_write_cr4(cr4 & ~X86_CR4_PGE);
+- /* write old PGE again and flush TLBs */
+- native_write_cr4(cr4);
++ if (cr4 & X86_CR4_PGE) {
++ /* clear PGE and flush TLB of all entries */
++ native_write_cr4(cr4 & ~X86_CR4_PGE);
++ /* restore PGE as it was before */
++ native_write_cr4(cr4);
++ } else {
++ /*
++ * x86_64 microcode update comes this way when CR4.PGE is not
++ * enabled, and it's safer for all callers to allow this case.
++ */
++ native_write_cr3(native_read_cr3());
++ }
+ }
+
+ static inline void __native_flush_tlb_global(void)
+ {
+-#ifdef CONFIG_KAISER
+- /* Globals are not used at all */
+- __native_flush_tlb();
+-#else
+ unsigned long flags;
+
++ if (kaiser_enabled) {
++ /* Globals are not used at all */
++ __native_flush_tlb();
++ return;
++ }
++
+ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+@@ -208,7 +220,6 @@ static inline void __native_flush_tlb_gl
+ raw_local_irq_save(flags);
+ __native_flush_tlb_global_irq_disabled();
+ raw_local_irq_restore(flags);
+-#endif
+ }
+
+ static inline void __native_flush_tlb_single(unsigned long addr)
+@@ -223,7 +234,7 @@ static inline void __native_flush_tlb_si
+ */
+
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+- if (this_cpu_has(X86_FEATURE_PCID))
++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+@@ -238,9 +249,9 @@ static inline void __native_flush_tlb_si
+ * Make sure to do only a single invpcid when KAISER is
+ * disabled and we have only a single ASID.
+ */
+- if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
+- invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+- invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
++ if (kaiser_enabled)
++ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
++ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+ }
+
+ static inline void __flush_tlb_all(void)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -179,6 +179,20 @@ static int __init x86_pcid_setup(char *s
+ return 1;
+ }
+ __setup("nopcid", x86_pcid_setup);
++
++static int __init x86_nokaiser_setup(char *s)
++{
++ /* nokaiser doesn't accept parameters */
++ if (s)
++ return -EINVAL;
++#ifdef CONFIG_KAISER
++ kaiser_enabled = 0;
++ setup_clear_cpu_cap(X86_FEATURE_KAISER);
++ pr_info("nokaiser: KAISER feature disabled\n");
++#endif
++ return 0;
++}
++early_param("nokaiser", x86_nokaiser_setup);
+ #endif
+
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -327,7 +341,7 @@ static __always_inline void setup_smap(s
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+- if (cpu_has(c, X86_FEATURE_PGE)) {
++ if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ /*
+ * INVPCID has two "groups" of types:
+@@ -799,6 +813,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+
+ init_scattered_cpuid_features(c);
++#ifdef CONFIG_KAISER
++ if (kaiser_enabled)
++ set_cpu_cap(c, X86_FEATURE_KAISER);
++#endif
+ }
+
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+@@ -1537,6 +1555,14 @@ void cpu_init(void)
+ * try to read it.
+ */
+ cr4_init_shadow();
++ if (!kaiser_enabled) {
++ /*
++ * secondary_startup_64() deferred setting PGE in cr4:
++ * probe_page_size_mask() sets it on the boot cpu,
++ * but it needs to be set on each secondary cpu.
++ */
++ cr4_set_bits(X86_CR4_PGE);
++ }
+
+ /*
+ * Load microcode on this cpu if a valid microcode is available.
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -132,9 +132,10 @@ void __init init_espfix_bsp(void)
+ * area to ensure it is mapped into the shadow user page
+ * tables.
+ */
+- if (IS_ENABLED(CONFIG_KAISER))
++ if (kaiser_enabled) {
+ set_pgd(native_get_shadow_pgd(pgd_p),
+ __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
++ }
+
+ /* Randomize the locations */
+ init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -190,8 +190,8 @@ ENTRY(secondary_startup_64)
+ movq $(init_level4_pgt - __START_KERNEL_map), %rax
+ 1:
+
+- /* Enable PAE mode and PGE */
+- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
++ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
++ movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx
+ movq %rcx, %cr4
+
+ /* Setup early boot stage 4 level pagetables. */
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -177,7 +177,7 @@ static void __init probe_page_size_mask(
+ cr4_set_bits_and_update_boot(X86_CR4_PSE);
+
+ /* Enable PGE if available */
+- if (boot_cpu_has(X86_FEATURE_PGE)) {
++ if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) {
+ cr4_set_bits_and_update_boot(X86_CR4_PGE);
+ __supported_pte_mask |= _PAGE_GLOBAL;
+ } else
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -324,6 +324,16 @@ void __init cleanup_highmap(void)
+ continue;
+ if (vaddr < (unsigned long) _text || vaddr > end)
+ set_pmd(pmd, __pmd(0));
++ else if (kaiser_enabled) {
++ /*
++ * level2_kernel_pgt is initialized with _PAGE_GLOBAL:
++ * clear that now. This is not important, so long as
++ * CR4.PGE remains clear, but it removes an anomaly.
++ * Physical mapping setup below avoids _PAGE_GLOBAL
++ * by use of massage_pgprot() inside pfn_pte() etc.
++ */
++ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL));
++ }
+ }
+ }
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -16,7 +16,9 @@
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+
+-#ifdef CONFIG_KAISER
++int kaiser_enabled __read_mostly = 1;
++EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
++
+ __visible
+ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+@@ -167,8 +169,8 @@ static pte_t *kaiser_pagetable_walk(unsi
+ return pte_offset_kernel(pmd, address);
+ }
+
+-int kaiser_add_user_map(const void *__start_addr, unsigned long size,
+- unsigned long flags)
++static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
++ unsigned long flags)
+ {
+ int ret = 0;
+ pte_t *pte;
+@@ -177,6 +179,15 @@ int kaiser_add_user_map(const void *__st
+ unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ unsigned long target_address;
+
++ /*
++ * It is convenient for callers to pass in __PAGE_KERNEL etc,
++ * and there is no actual harm from setting _PAGE_GLOBAL, so
++ * long as CR4.PGE is not set. But it is nonetheless troubling
++ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
++ * requires that not to be #defined to 0): so mask it off here.
++ */
++ flags &= ~_PAGE_GLOBAL;
++
+ for (; address < end_addr; address += PAGE_SIZE) {
+ target_address = get_pa_from_mapping(address);
+ if (target_address == -1) {
+@@ -263,6 +274,8 @@ void __init kaiser_init(void)
+ {
+ int cpu;
+
++ if (!kaiser_enabled)
++ return;
+ kaiser_init_all_pgds();
+
+ for_each_possible_cpu(cpu) {
+@@ -311,6 +324,8 @@ void __init kaiser_init(void)
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
++ if (!kaiser_enabled)
++ return 0;
+ return kaiser_add_user_map((const void *)addr, size, flags);
+ }
+
+@@ -322,6 +337,8 @@ void kaiser_remove_mapping(unsigned long
+ unsigned long addr, next;
+ pgd_t *pgd;
+
++ if (!kaiser_enabled)
++ return;
+ pgd = native_get_shadow_pgd(pgd_offset_k(start));
+ for (addr = start; addr < end; pgd++, addr = next) {
+ next = pgd_addr_end(addr, end);
+@@ -343,6 +360,8 @@ static inline bool is_userspace_pgd(pgd_
+
+ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++ if (!kaiser_enabled)
++ return pgd;
+ /*
+ * Do we need to also populate the shadow pgd? Check _PAGE_USER to
+ * skip cases like kexec and EFI which make temporary low mappings.
+@@ -399,4 +418,3 @@ void kaiser_flush_tlb_on_return_to_user(
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+-#endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -345,16 +345,12 @@ static inline void _pgd_free(pgd_t *pgd)
+ }
+ #else
+
+-#ifdef CONFIG_KAISER
+ /*
+- * Instead of one pmd, we aquire two pmds. Being order-1, it is
++ * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
+ * both 8k in size and 8k-aligned. That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+-#define PGD_ALLOCATION_ORDER 1
+-#else
+-#define PGD_ALLOCATION_ORDER 0
+-#endif
++#define PGD_ALLOCATION_ORDER kaiser_enabled
+
+ static inline pgd_t *_pgd_alloc(void)
+ {
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -39,8 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+-#ifdef CONFIG_KAISER
+- if (this_cpu_has(X86_FEATURE_PCID)) {
++ if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+@@ -57,7 +56,6 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ kaiser_flush_tlb_on_return_to_user();
+ }
+-#endif /* CONFIG_KAISER */
+
+ /*
+ * Caution: many callers of this function expect
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -197,6 +197,9 @@
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+
++/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/queue/kaiser-align-addition-to-x86-mm-makefile.patch b/queue/kaiser-align-addition-to-x86-mm-makefile.patch
new file mode 100644
index 0000000..d8217dd
--- /dev/null
+++ b/queue/kaiser-align-addition-to-x86-mm-makefile.patch
@@ -0,0 +1,26 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:51:10 -0700
+Subject: kaiser: align addition to x86/mm/Makefile
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Use tab not space so they line up properly, kaslr.o also.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/Makefile | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -37,5 +37,5 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulatio
+
+ obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+-obj-$(CONFIG_KAISER) += kaiser.o
++obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
++obj-$(CONFIG_KAISER) += kaiser.o
diff --git a/queue/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch b/queue/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
new file mode 100644
index 0000000..2d9ee7e
--- /dev/null
+++ b/queue/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
@@ -0,0 +1,86 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 4 Nov 2017 18:23:24 -0700
+Subject: kaiser: asm/tlbflush.h handle noPGE at lower level
+
+From: Hugh Dickins <hughd@google.com>
+
+
+I found asm/tlbflush.h too twisty, and think it safer not to avoid
+__native_flush_tlb_global_irq_disabled() in the kaiser_enabled case,
+but instead let it handle kaiser_enabled along with cr3: it can just
+use __native_flush_tlb() for that, no harm in re-disabling preemption.
+
+(This is not the same change as Kirill and Dave have suggested for
+upstream, flipping PGE in cr4: that's neat, but needs a cpu_has_pge
+check; cr3 is enough for kaiser, and thought to be cheaper than cr4.)
+
+Also delete the X86_FEATURE_INVPCID invpcid_flush_all_nonglobals()
+preference from __native_flush_tlb(): unlike the invpcid_flush_all()
+preference in __native_flush_tlb_global(), it's not seen in upstream
+4.14, and was recently reported to be surprisingly slow.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/tlbflush.h | 27 +++------------------------
+ 1 file changed, 3 insertions(+), 24 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -152,14 +152,6 @@ static inline void kaiser_flush_tlb_on_r
+
+ static inline void __native_flush_tlb(void)
+ {
+- if (this_cpu_has(X86_FEATURE_INVPCID)) {
+- /*
+- * Note, this works with CR4.PCIDE=0 or 1.
+- */
+- invpcid_flush_all_nonglobals();
+- return;
+- }
+-
+ /*
+ * If current->mm == NULL then we borrow a mm which may change during a
+ * task switch and therefore we must not be preempted while we write CR3
+@@ -183,11 +175,8 @@ static inline void __native_flush_tlb_gl
+ /* restore PGE as it was before */
+ native_write_cr4(cr4);
+ } else {
+- /*
+- * x86_64 microcode update comes this way when CR4.PGE is not
+- * enabled, and it's safer for all callers to allow this case.
+- */
+- native_write_cr3(native_read_cr3());
++ /* do it with cr3, letting kaiser flush user PCID */
++ __native_flush_tlb();
+ }
+ }
+
+@@ -195,12 +184,6 @@ static inline void __native_flush_tlb_gl
+ {
+ unsigned long flags;
+
+- if (kaiser_enabled) {
+- /* Globals are not used at all */
+- __native_flush_tlb();
+- return;
+- }
+-
+ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+@@ -256,11 +239,7 @@ static inline void __native_flush_tlb_si
+
+ static inline void __flush_tlb_all(void)
+ {
+- if (boot_cpu_has(X86_FEATURE_PGE))
+- __flush_tlb_global();
+- else
+- __flush_tlb();
+-
++ __flush_tlb_global();
+ /*
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ * we'd end up flushing kernel translations for the current ASID but
diff --git a/queue/kaiser-cleanups-while-trying-for-gold-link.patch b/queue/kaiser-cleanups-while-trying-for-gold-link.patch
new file mode 100644
index 0000000..5e95f5b
--- /dev/null
+++ b/queue/kaiser-cleanups-while-trying-for-gold-link.patch
@@ -0,0 +1,134 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 21 Aug 2017 20:11:43 -0700
+Subject: kaiser: cleanups while trying for gold link
+
+From: Hugh Dickins <hughd@google.com>
+
+
+While trying to get our gold link to work, four cleanups:
+matched the gdt_page declaration to its definition;
+in fiddling unsuccessfully with PERCPU_INPUT(), lined up backslashes;
+lined up the backslashes according to convention in percpu-defs.h;
+deleted the unused irq_stack_pointer addition to irq_stack_union.
+
+Sad to report that aligning backslashes does not appear to help gold
+align to 8192: but while these did not help, they are worth keeping.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/desc.h | 2 +-
+ arch/x86/include/asm/processor.h | 5 -----
+ include/asm-generic/vmlinux.lds.h | 18 ++++++++----------
+ include/linux/percpu-defs.h | 24 ++++++++++++------------
+ 4 files changed, 21 insertions(+), 28 deletions(-)
+
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -43,7 +43,7 @@ struct gdt_page {
+ struct desc_struct gdt[GDT_ENTRIES];
+ } __attribute__((aligned(PAGE_SIZE)));
+
+-DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
++DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);
+
+ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+ {
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -335,11 +335,6 @@ union irq_stack_union {
+ char gs_base[40];
+ unsigned long stack_canary;
+ };
+-
+- struct {
+- char irq_stack_pointer[64];
+- char unused[IRQ_STACK_SIZE - 64];
+- };
+ };
+
+ DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -778,16 +778,14 @@
+ */
+ #define PERCPU_INPUT(cacheline) \
+ VMLINUX_SYMBOL(__per_cpu_start) = .; \
+- \
+- VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
+- *(.data..percpu..first) \
+- . = ALIGN(cacheline); \
+- *(.data..percpu..user_mapped) \
+- *(.data..percpu..user_mapped..shared_aligned) \
+- . = ALIGN(PAGE_SIZE); \
+- *(.data..percpu..user_mapped..page_aligned) \
+- VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
+- \
++ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
++ *(.data..percpu..first) \
++ . = ALIGN(cacheline); \
++ *(.data..percpu..user_mapped) \
++ *(.data..percpu..user_mapped..shared_aligned) \
++ . = ALIGN(PAGE_SIZE); \
++ *(.data..percpu..user_mapped..page_aligned) \
++ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
+ . = ALIGN(PAGE_SIZE); \
+ *(.data..percpu..page_aligned) \
+ . = ALIGN(cacheline); \
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -121,10 +121,10 @@
+ #define DEFINE_PER_CPU(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "")
+
+-#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
++#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
+-#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
++#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
+ /*
+@@ -156,11 +156,11 @@
+ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
+-#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
+-#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
+@@ -185,18 +185,18 @@
+ /*
+ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
+ */
+-#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
+- DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
+- __aligned(PAGE_SIZE)
+-
+-#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
+- DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
+- __aligned(PAGE_SIZE)
++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++ __aligned(PAGE_SIZE)
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++ __aligned(PAGE_SIZE)
+
+ /*
+ * Declaration/definition used for per-CPU variables that must be read mostly.
+ */
+-#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
++#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, "..read_mostly")
+
+ #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \
diff --git a/queue/kaiser-delete-kaiser_real_switch-option.patch b/queue/kaiser-delete-kaiser_real_switch-option.patch
new file mode 100644
index 0000000..bea2039
--- /dev/null
+++ b/queue/kaiser-delete-kaiser_real_switch-option.patch
@@ -0,0 +1,79 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:30:43 -0700
+Subject: kaiser: delete KAISER_REAL_SWITCH option
+
+From: Hugh Dickins <hughd@google.com>
+
+
+We fail to see what CONFIG_KAISER_REAL_SWITCH is for: it seems to be
+left over from early development, and now just obscures tricky parts
+of the code. Delete it before adding PCIDs, or nokaiser boot option.
+
+(Or if there is some good reason to keep the option, then it needs
+a help text - and a "depends on KAISER", so that all those without
+KAISER are not asked the question. But we'd much rather delete it.)
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 4 ----
+ arch/x86/include/asm/kaiser.h | 4 ----
+ security/Kconfig | 4 ----
+ 3 files changed, 12 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1317,9 +1317,7 @@ ENTRY(nmi)
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
+ pushq %rax
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+-#endif
+ movq %rax, %cr3
+ #endif
+ call do_nmi
+@@ -1560,9 +1558,7 @@ end_repeat_nmi:
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
+ pushq %rax
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+-#endif
+ movq %rax, %cr3
+ #endif
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -21,17 +21,13 @@
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
+-#endif
+ movq \reg, %cr3
+ .endm
+
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+-#ifdef CONFIG_KAISER_REAL_SWITCH
+ orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+-#endif
+ movq \reg, %cr3
+ .endm
+
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -41,10 +41,6 @@ config KAISER
+
+ If you are unsure how to answer this question, answer Y.
+
+-config KAISER_REAL_SWITCH
+- bool "KAISER: actually switch page tables"
+- default y
+-
+ config SECURITYFS
+ bool "Enable the securityfs filesystem"
+ help
diff --git a/queue/kaiser-disabled-on-xen-pv.patch b/queue/kaiser-disabled-on-xen-pv.patch
new file mode 100644
index 0000000..c306014
--- /dev/null
+++ b/queue/kaiser-disabled-on-xen-pv.patch
@@ -0,0 +1,42 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Tue, 2 Jan 2018 14:19:49 +0100
+Subject: kaiser: disabled on Xen PV
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+
+Kaiser cannot be used on paravirtualized MMUs (namely reading and writing CR3).
+This does not work with KAISER as the CR3 switch from and to user space PGD
+would require to map the whole XEN_PV machinery into both.
+
+More importantly, enabling KAISER on Xen PV doesn't make too much sense, as PV
+guests use distinct %cr3 values for kernel and user already.
+
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -263,6 +263,9 @@ void __init kaiser_check_boottime_disabl
+ char arg[5];
+ int ret;
+
++ if (boot_cpu_has(X86_FEATURE_XENPV))
++ goto silent_disable;
++
+ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+ if (ret > 0) {
+ if (!strncmp(arg, "on", 2))
+@@ -290,6 +293,8 @@ enable:
+
+ disable:
+ pr_info("Kernel/User page tables isolation: disabled\n");
++
++silent_disable:
+ kaiser_enabled = 0;
+ setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ }
diff --git a/queue/kaiser-do-not-set-_page_nx-on-pgd_none.patch b/queue/kaiser-do-not-set-_page_nx-on-pgd_none.patch
new file mode 100644
index 0000000..63a8639
--- /dev/null
+++ b/queue/kaiser-do-not-set-_page_nx-on-pgd_none.patch
@@ -0,0 +1,204 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 5 Sep 2017 12:05:01 -0700
+Subject: kaiser: do not set _PAGE_NX on pgd_none
+
+From: Hugh Dickins <hughd@google.com>
+
+
+native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must
+avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry:
+usually that just generated a warning on exit, but sometimes
+more mysterious and damaging failures (our production machines
+could not complete booting).
+
+The original fix to this just avoided adding _PAGE_NX to
+an empty entry; but eventually more problems surfaced with kexec,
+and EFI mapping expected to be a problem too. So now instead
+change native_set_pgd() to update shadow only if _PAGE_USER:
+
+A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure)
+use set_pgd() to set up a temporary internal virtual address space, with
+physical pages remapped at what Kaiser regards as userspace addresses:
+Kaiser then assumes a shadow pgd follows, which it will try to corrupt.
+
+This appears to be responsible for the recent kexec and kdump failures;
+though it's unclear how those did not manifest as a problem before.
+Ah, the shadow pgd will only be assumed to "follow" if the requested
+pgd is on an even-numbered page: so I suppose it was going wrong 50%
+of the time all along.
+
+What we need is a flag to set_pgd(), to tell it we're dealing with
+userspace. Er, isn't that what the pgd's _PAGE_USER bit is saying?
+Add a test for that. But we cannot do the same for pgd_clear()
+(which may be called to clear corrupted entries - set aside the
+question of "corrupt in which pgd?" until later), so there just
+rely on pgd_clear() not being called in the problematic cases -
+with a WARN_ON_ONCE() which should fire half the time if it is.
+
+But this is getting too big for an inline function: move it into
+arch/x86/mm/kaiser.c (which then demands a boot/compressed mod);
+and de-void and de-space native_get_shadow/normal_pgd() while here.
+
+Also make an unnecessary change to KASLR's init_trampoline(): it was
+using set_pgd() to assign a pgd-value to a global variable (not in a
+pg directory page), which was rather scary given Kaiser's previous
+set_pgd() implementation: not a problem now, but too scary to leave
+as was, it could easily blow up if we have to change set_pgd() again.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/misc.h | 1
+ arch/x86/include/asm/pgtable_64.h | 51 +++++++++-----------------------------
+ arch/x86/mm/kaiser.c | 42 +++++++++++++++++++++++++++++++
+ arch/x86/mm/kaslr.c | 4 +-
+ 4 files changed, 58 insertions(+), 40 deletions(-)
+
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -9,6 +9,7 @@
+ */
+ #undef CONFIG_PARAVIRT
+ #undef CONFIG_PARAVIRT_SPINLOCKS
++#undef CONFIG_KAISER
+ #undef CONFIG_KASAN
+
+ #include <linux/linkage.h>
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_
+ }
+
+ #ifdef CONFIG_KAISER
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
++
++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
+- return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
++ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+ }
+
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+ {
+- return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
++ return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
+ }
+ #else
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++ return pgd;
++}
++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
+ BUILD_BUG_ON(1);
+ return NULL;
+ }
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+ {
+ return pgdp;
+ }
+ #endif /* CONFIG_KAISER */
+
+-/*
+- * Page table pages are page-aligned. The lower half of the top
+- * level is used for userspace and the top half for the kernel.
+- * This returns true for user pages that need to get copied into
+- * both the user and kernel copies of the page tables, and false
+- * for kernel pages that should only be in the kernel copy.
+- */
+-static inline bool is_userspace_pgd(void *__ptr)
+-{
+- unsigned long ptr = (unsigned long)__ptr;
+-
+- return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
+-}
+-
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+-#ifdef CONFIG_KAISER
+- pteval_t extra_kern_pgd_flags = 0;
+- /* Do we need to also populate the shadow pgd? */
+- if (is_userspace_pgd(pgdp)) {
+- native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+- /*
+- * Even if the entry is *mapping* userspace, ensure
+- * that userspace can not use it. This way, if we
+- * get out to userspace running on the kernel CR3,
+- * userspace will crash instead of running.
+- */
+- extra_kern_pgd_flags = _PAGE_NX;
+- }
+- pgdp->pgd = pgd.pgd;
+- pgdp->pgd |= extra_kern_pgd_flags;
+-#else /* CONFIG_KAISER */
+- *pgdp = pgd;
+-#endif
++ *pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
+ }
+
+ static inline void native_pgd_clear(pgd_t *pgd)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -302,4 +302,46 @@ void kaiser_remove_mapping(unsigned long
+ unmap_pud_range_nofree(pgd, addr, end);
+ }
+ }
++
++/*
++ * Page table pages are page-aligned. The lower half of the top
++ * level is used for userspace and the top half for the kernel.
++ * This returns true for user pages that need to get copied into
++ * both the user and kernel copies of the page tables, and false
++ * for kernel pages that should only be in the kernel copy.
++ */
++static inline bool is_userspace_pgd(pgd_t *pgdp)
++{
++ return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
++}
++
++pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++ /*
++ * Do we need to also populate the shadow pgd? Check _PAGE_USER to
++ * skip cases like kexec and EFI which make temporary low mappings.
++ */
++ if (pgd.pgd & _PAGE_USER) {
++ if (is_userspace_pgd(pgdp)) {
++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++ /*
++ * Even if the entry is *mapping* userspace, ensure
++ * that userspace can not use it. This way, if we
++ * get out to userspace running on the kernel CR3,
++ * userspace will crash instead of running.
++ */
++ pgd.pgd |= _PAGE_NX;
++ }
++ } else if (!pgd.pgd) {
++ /*
++ * pgd_clear() cannot check _PAGE_USER, and is even used to
++ * clear corrupted pgd entries: so just rely on cases like
++ * kexec and EFI never to be using pgd_clear().
++ */
++ if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
++ is_userspace_pgd(pgdp))
++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++ }
++ return pgd;
++}
+ #endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/kaslr.c
++++ b/arch/x86/mm/kaslr.c
+@@ -189,6 +189,6 @@ void __meminit init_trampoline(void)
+ *pud_tramp = *pud;
+ }
+
+- set_pgd(&trampoline_pgd_entry,
+- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
++ /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */
++ trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
+ }
diff --git a/queue/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch b/queue/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
new file mode 100644
index 0000000..2fed5fc
--- /dev/null
+++ b/queue/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 29 Oct 2017 11:36:19 -0700
+Subject: kaiser: drop is_atomic arg to kaiser_pagetable_walk()
+
+From: Hugh Dickins <hughd@google.com>
+
+
+I have not observed a might_sleep() warning from setup_fixmap_gdt()'s
+use of kaiser_add_mapping() in our tree (why not?), but like upstream
+we have not provided a way for that to pass is_atomic true down to
+kaiser_pagetable_walk(), and at startup it's far from a likely source
+of trouble: so just delete the walk's is_atomic arg and might_sleep().
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -107,19 +107,13 @@ static inline unsigned long get_pa_from_
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+-static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
++static pte_t *kaiser_pagetable_walk(unsigned long address)
+ {
+ pmd_t *pmd;
+ pud_t *pud;
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+- if (is_atomic) {
+- gfp &= ~GFP_KERNEL;
+- gfp |= __GFP_HIGH | __GFP_ATOMIC;
+- } else
+- might_sleep();
+-
+ if (pgd_none(*pgd)) {
+ WARN_ONCE(1, "All shadow pgds should have been populated");
+ return NULL;
+@@ -194,7 +188,7 @@ static int kaiser_add_user_map(const voi
+ ret = -EIO;
+ break;
+ }
+- pte = kaiser_pagetable_walk(address, false);
++ pte = kaiser_pagetable_walk(address);
+ if (!pte) {
+ ret = -ENOMEM;
+ break;
diff --git a/queue/kaiser-enhanced-by-kernel-and-user-pcids.patch b/queue/kaiser-enhanced-by-kernel-and-user-pcids.patch
new file mode 100644
index 0000000..84c6fd8
--- /dev/null
+++ b/queue/kaiser-enhanced-by-kernel-and-user-pcids.patch
@@ -0,0 +1,403 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 30 Aug 2017 16:23:00 -0700
+Subject: kaiser: enhanced by kernel and user PCIDs
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Merged performance improvements to Kaiser, using distinct kernel
+and user Process Context Identifiers to minimize the TLB flushing.
+
+[This work actually all from Dave Hansen 2017-08-30:
+still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.]
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 10 ++++-
+ arch/x86/entry/entry_64_compat.S | 1
+ arch/x86/include/asm/cpufeatures.h | 1
+ arch/x86/include/asm/kaiser.h | 15 ++++++-
+ arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++
+ arch/x86/include/asm/tlbflush.h | 54 +++++++++++++++++++++++-----
+ arch/x86/include/uapi/asm/processor-flags.h | 3 +
+ arch/x86/kernel/cpu/common.c | 34 +++++++++++++++++
+ arch/x86/kvm/x86.c | 3 +
+ arch/x86/mm/kaiser.c | 7 +++
+ arch/x86/mm/tlb.c | 46 ++++++++++++++++++++++-
+ 11 files changed, 182 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1317,7 +1317,10 @@ ENTRY(nmi)
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
+ pushq %rax
+- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
++ /* mask off "user" bit of pgd address and 12 PCID bits: */
++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++ /* Add back kernel PCID and "no flush" bit */
++ orq X86_CR3_PCID_KERN_VAR, %rax
+ movq %rax, %cr3
+ #endif
+ call do_nmi
+@@ -1558,7 +1561,10 @@ end_repeat_nmi:
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
+ pushq %rax
+- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
++ /* mask off "user" bit of pgd address and 12 PCID bits: */
++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++ /* Add back kernel PCID and "no flush" bit */
++ orq X86_CR3_PCID_KERN_VAR, %rax
+ movq %rax, %cr3
+ #endif
+
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -13,6 +13,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/pgtable_types.h>
+ #include <asm/kaiser.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -189,6 +189,7 @@
+
+ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
+ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
++#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
+
+ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -1,5 +1,8 @@
+ #ifndef _ASM_X86_KAISER_H
+ #define _ASM_X86_KAISER_H
++
++#include <uapi/asm/processor-flags.h> /* For PCID constants */
++
+ /*
+ * This file includes the definitions for the KAISER feature.
+ * KAISER is a counter measure against x86_64 side channel attacks on
+@@ -21,13 +24,21 @@
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+-andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
++orq X86_CR3_PCID_KERN_VAR, \reg
+ movq \reg, %cr3
+ .endm
+
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+-orq $(KAISER_SHADOW_PGD_OFFSET), \reg
++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
++/*
++ * This can obviously be one instruction by putting the
++ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
++ * But, just leave it now for simplicity.
++ */
++orq X86_CR3_PCID_USER_VAR, \reg
++orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+ movq \reg, %cr3
+ .endm
+
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -141,6 +141,32 @@
+ _PAGE_SOFT_DIRTY)
+ #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+
++/* The ASID is the lower 12 bits of CR3 */
++#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL))
++
++/* Mask for all the PCID-related bits in CR3: */
++#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
++#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
++#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL))
++#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL))
++
++#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
++#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
++#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
++#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
++#else
++#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
++#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
++/*
++ * PCIDs are unsupported on 32-bit and none of these bits can be
++ * set in CR3:
++ */
++#define X86_CR3_PCID_KERN_FLUSH (0)
++#define X86_CR3_PCID_USER_FLUSH (0)
++#define X86_CR3_PCID_KERN_NOFLUSH (0)
++#define X86_CR3_PCID_USER_NOFLUSH (0)
++#endif
++
+ /*
+ * The cache modes defined here are used to translate between pure SW usage
+ * and the HW defined cache mode bits and/or PAT entries.
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -13,7 +13,6 @@ static inline void __invpcid(unsigned lo
+ unsigned long type)
+ {
+ struct { u64 d[2]; } desc = { { pcid, addr } };
+-
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+@@ -134,14 +133,25 @@ static inline void cr4_set_bits_and_upda
+
+ static inline void __native_flush_tlb(void)
+ {
++ if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
++ /*
++ * If current->mm == NULL then we borrow a mm which may change during a
++ * task switch and therefore we must not be preempted while we write CR3
++ * back:
++ */
++ preempt_disable();
++ native_write_cr3(native_read_cr3());
++ preempt_enable();
++ return;
++ }
+ /*
+- * If current->mm == NULL then we borrow a mm which may change during a
+- * task switch and therefore we must not be preempted while we write CR3
+- * back:
+- */
+- preempt_disable();
+- native_write_cr3(native_read_cr3());
+- preempt_enable();
++ * We are no longer using globals with KAISER, so a
++ * "nonglobals" flush would work too. But, this is more
++ * conservative.
++ *
++ * Note, this works with CR4.PCIDE=0 or 1.
++ */
++ invpcid_flush_all();
+ }
+
+ static inline void __native_flush_tlb_global_irq_disabled(void)
+@@ -163,6 +173,8 @@ static inline void __native_flush_tlb_gl
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
++ *
++ * Note, this works with CR4.PCIDE=0 or 1.
+ */
+ invpcid_flush_all();
+ return;
+@@ -182,7 +194,31 @@ static inline void __native_flush_tlb_gl
+
+ static inline void __native_flush_tlb_single(unsigned long addr)
+ {
+- asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++ /*
++ * SIMICS #GP's if you run INVPCID with type 2/3
++ * and X86_CR4_PCIDE clear. Shame!
++ *
++ * The ASIDs used below are hard-coded. But, we must not
++ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
++ * invpcid in the case we are called early.
++ */
++ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
++ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++ return;
++ }
++ /* Flush the address out of both PCIDs. */
++ /*
++ * An optimization here might be to determine addresses
++ * that are only kernel-mapped and only flush the kernel
++ * ASID. But, userspace flushes are probably much more
++ * important performance-wise.
++ *
++ * Make sure to do only a single invpcid when KAISER is
++ * disabled and we have only a single ASID.
++ */
++ if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
++ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
++ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
+ }
+
+ static inline void __flush_tlb_all(void)
+--- a/arch/x86/include/uapi/asm/processor-flags.h
++++ b/arch/x86/include/uapi/asm/processor-flags.h
+@@ -77,7 +77,8 @@
+ #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
+ #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
+ #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
+-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
++#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
++#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
+
+ /*
+ * Intel CPU features in CR4
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -324,11 +324,45 @@ static __always_inline void setup_smap(s
+ }
+ }
+
++/*
++ * These can have bit 63 set, so we can not just use a plain "or"
++ * instruction to get their value or'd into CR3. It would take
++ * another register. So, we use a memory reference to these
++ * instead.
++ *
++ * This is also handy because systems that do not support
++ * PCIDs just end up or'ing a 0 into their CR3, which does
++ * no harm.
++ */
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
++
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE)) {
+ cr4_set_bits(X86_CR4_PCIDE);
++ /*
++ * These variables are used by the entry/exit
++ * code to change PCIDs.
++ */
++#ifdef CONFIG_KAISER
++ X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
++ X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
++#endif
++ /*
++ * INVPCID has two "groups" of types:
++ * 1/2: Invalidate an individual address
++ * 3/4: Invalidate all contexts
++ *
++ * 1/2 take a PCID, but 3/4 do not. So, 3/4
++ * ignore the PCID argument in the descriptor.
++ * But, we have to be careful not to call 1/2
++ * with an actual non-zero PCID in them before
++ * we do the above cr4_set_bits().
++ */
++ if (cpu_has(c, X86_FEATURE_INVPCID))
++ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, u
+ return 1;
+
+ /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
+- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
++ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) ||
++ !is_long_mode(vcpu))
+ return 1;
+ }
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(
+ } while (0)
+
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++extern unsigned long X86_CR3_PCID_KERN_VAR;
++extern unsigned long X86_CR3_PCID_USER_VAR;
+ /*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+@@ -289,6 +291,11 @@ void __init kaiser_init(void)
+ kaiser_add_user_map_early(&debug_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
++
++ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
++ __PAGE_KERNEL);
++ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
++ __PAGE_KERNEL);
+ }
+
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -34,6 +34,46 @@ struct flush_tlb_info {
+ unsigned long flush_end;
+ };
+
++static void load_new_mm_cr3(pgd_t *pgdir)
++{
++ unsigned long new_mm_cr3 = __pa(pgdir);
++
++ /*
++ * KAISER, plus PCIDs needs some extra work here. But,
++ * if either of features is not present, we need no
++ * PCIDs here and just do a normal, full TLB flush with
++ * the write_cr3()
++ */
++ if (!IS_ENABLED(CONFIG_KAISER) ||
++ !cpu_feature_enabled(X86_FEATURE_PCID))
++ goto out_set_cr3;
++ /*
++ * We reuse the same PCID for different tasks, so we must
++ * flush all the entires for the PCID out when we change
++ * tasks.
++ */
++ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
++
++ /*
++ * The flush from load_cr3() may leave old TLB entries
++ * for userspace in place. We must flush that context
++ * separately. We can theoretically delay doing this
++ * until we actually load up the userspace CR3, but
++ * that's a bit tricky. We have to have the "need to
++ * flush userspace PCID" bit per-cpu and check it in the
++ * exit-to-userspace paths.
++ */
++ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
++
++out_set_cr3:
++ /*
++ * Caution: many callers of this function expect
++ * that load_cr3() is serializing and orders TLB
++ * fills with respect to the mm_cpumask writes.
++ */
++ write_cr3(new_mm_cr3);
++}
++
+ /*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm->cpu_vm_mask.
+@@ -45,7 +85,7 @@ void leave_mm(int cpu)
+ BUG();
+ if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
+ cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
+- load_cr3(swapper_pg_dir);
++ load_new_mm_cr3(swapper_pg_dir);
+ /*
+ * This gets called in the idle path where RCU
+ * functions differently. Tracing normally
+@@ -120,7 +160,7 @@ void switch_mm_irqs_off(struct mm_struct
+ * ordering guarantee we need.
+ *
+ */
+- load_cr3(next->pgd);
++ load_new_mm_cr3(next->pgd);
+
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+
+@@ -167,7 +207,7 @@ void switch_mm_irqs_off(struct mm_struct
+ * As above, load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask write.
+ */
+- load_cr3(next->pgd);
++ load_new_mm_cr3(next->pgd);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ load_mm_cr4(next);
+ load_mm_ldt(next);
diff --git a/queue/kaiser-enomem-if-kaiser_pagetable_walk-null.patch b/queue/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
new file mode 100644
index 0000000..c2bd8bc
--- /dev/null
+++ b/queue/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
@@ -0,0 +1,52 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:48:02 -0700
+Subject: kaiser: ENOMEM if kaiser_pagetable_walk() NULL
+
+From: Hugh Dickins <hughd@google.com>
+
+
+kaiser_add_user_map() took no notice when kaiser_pagetable_walk() failed.
+And avoid its might_sleep() when atomic (though atomic at present unused).
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -98,11 +98,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+- might_sleep();
+ if (is_atomic) {
+ gfp &= ~GFP_KERNEL;
+ gfp |= __GFP_HIGH | __GFP_ATOMIC;
+- }
++ } else
++ might_sleep();
+
+ if (pgd_none(*pgd)) {
+ WARN_ONCE(1, "All shadow pgds should have been populated");
+@@ -159,13 +159,17 @@ int kaiser_add_user_map(const void *__st
+ unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ unsigned long target_address;
+
+- for (;address < end_addr; address += PAGE_SIZE) {
++ for (; address < end_addr; address += PAGE_SIZE) {
+ target_address = get_pa_from_mapping(address);
+ if (target_address == -1) {
+ ret = -EIO;
+ break;
+ }
+ pte = kaiser_pagetable_walk(address, false);
++ if (!pte) {
++ ret = -ENOMEM;
++ break;
++ }
+ if (pte_none(*pte)) {
+ set_pte(pte, __pte(flags | target_address));
+ } else {
diff --git a/queue/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch b/queue/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
new file mode 100644
index 0000000..92d279d
--- /dev/null
+++ b/queue/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 17:09:44 -0700
+Subject: kaiser: fix build and FIXME in alloc_ldt_struct()
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Include linux/kaiser.h instead of asm/kaiser.h to build ldt.c without
+CONFIG_KAISER. kaiser_add_mapping() does already return an error code,
+so fix the FIXME.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ldt.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -16,9 +16,9 @@
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+ #include <linux/uaccess.h>
++#include <linux/kaiser.h>
+
+ #include <asm/ldt.h>
+-#include <asm/kaiser.h>
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+@@ -49,7 +49,7 @@ static struct ldt_struct *alloc_ldt_stru
+ {
+ struct ldt_struct *new_ldt;
+ int alloc_size;
+- int ret = 0;
++ int ret;
+
+ if (size > LDT_ENTRIES)
+ return NULL;
+@@ -77,10 +77,8 @@ static struct ldt_struct *alloc_ldt_stru
+ return NULL;
+ }
+
+- // FIXME: make kaiser_add_mapping() return an error code
+- // when it fails
+- kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
+- __PAGE_KERNEL);
++ ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
++ __PAGE_KERNEL);
+ if (ret) {
+ __free_ldt_struct(new_ldt);
+ return NULL;
diff --git a/queue/kaiser-fix-compile-error-without-vsyscall.patch b/queue/kaiser-fix-compile-error-without-vsyscall.patch
new file mode 100644
index 0000000..1c5369d
--- /dev/null
+++ b/queue/kaiser-fix-compile-error-without-vsyscall.patch
@@ -0,0 +1,46 @@
+From foo@baz Tue Feb 13 16:45:20 CET 2018
+Date: Tue, 13 Feb 2018 16:45:20 +0100
+To: Greg KH <gregkh@linuxfoundation.org>
+From: Hugh Dickins <hughd@google.com>
+Subject: kaiser: fix compile error without vsyscall
+
+From: Hugh Dickins <hughd@google.com>
+
+Tobias noticed a compile error on 4.4.115, and it's the same on 4.9.80:
+arch/x86/mm/kaiser.c: In function ‘kaiser_init’:
+arch/x86/mm/kaiser.c:348:8: error: ‘vsyscall_pgprot’ undeclared
+ (first use in this function)
+
+It seems like his combination of kernel options doesn't work for KAISER.
+X86_VSYSCALL_EMULATION is not set on his system, while LEGACY_VSYSCALL
+is set to NONE (LEGACY_VSYSCALL_NONE=y). He managed to get things
+compiling again, by moving the 'extern unsigned long vsyscall_pgprot'
+outside of the preprocessor statement. This works because the optimizer
+removes that code (vsyscall_enabled() is always false) - and that's how
+it was done in some older backports.
+
+Reported-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/vsyscall.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -13,7 +13,6 @@ extern void map_vsyscall(void);
+ */
+ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+ extern bool vsyscall_enabled(void);
+-extern unsigned long vsyscall_pgprot;
+ #else
+ static inline void map_vsyscall(void) {}
+ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+@@ -22,5 +21,6 @@ static inline bool emulate_vsyscall(stru
+ }
+ static inline bool vsyscall_enabled(void) { return false; }
+ #endif
++extern unsigned long vsyscall_pgprot;
+
+ #endif /* _ASM_X86_VSYSCALL_H */
diff --git a/queue/kaiser-fix-intel_bts-perf-crashes.patch b/queue/kaiser-fix-intel_bts-perf-crashes.patch
new file mode 100644
index 0000000..5ea9a08
--- /dev/null
+++ b/queue/kaiser-fix-intel_bts-perf-crashes.patch
@@ -0,0 +1,132 @@
+From hughd@google.com Mon Feb 5 04:59:18 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 29 Jan 2018 18:16:55 -0800
+Subject: kaiser: fix intel_bts perf crashes
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Hugh Dickins <hughd@google.com>, Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, Andy Lutomirski <luto@amacapital.net>, Alexander Shishkin <alexander.shishkin@linux.intel.com>, Linus Torvalds <torvalds@linux-foundation.org>, Vince Weaver <vince@deater.net>, stable@vger.kernel.org, Jiri Kosina <jkosina@suse.cz>
+Message-ID: <20180130021655.229155-1-hughd@google.com>
+
+From: Hugh Dickins <hughd@google.com>
+
+Vince reported perf_fuzzer quickly locks up on 4.15-rc7 with PTI;
+Robert reported Bad RIP with KPTI and Intel BTS also on 4.15-rc7:
+honggfuzz -f /tmp/somedirectorywithatleastonefile \
+ --linux_perf_bts_edge -s -- /bin/true
+(honggfuzz from https://github.com/google/honggfuzz) crashed with
+BUG: unable to handle kernel paging request at ffff9d3215100000
+(then narrowed it down to
+perf record --per-thread -e intel_bts//u -- /bin/ls).
+
+The intel_bts driver does not use the 'normal' BTS buffer which is
+exposed through kaiser_add_mapping(), but instead uses the memory
+allocated for the perf AUX buffer.
+
+This obviously comes apart when using PTI, because then the kernel
+mapping, which includes that AUX buffer memory, disappears while
+switched to user page tables.
+
+Easily fixed in old-Kaiser backports, by applying kaiser_add_mapping()
+to those pages; perhaps not so easy for upstream, where 4.15-rc8 commit
+99a9dc98ba52 ("x86,perf: Disable intel_bts when PTI") disables for now.
+
+Slightly reorganized surrounding code in bts_buffer_setup_aux(),
+so it can better match bts_buffer_free_aux(): free_aux with an #ifdef
+to avoid the loop when PTI is off, but setup_aux needs to loop anyway
+(and kaiser_add_mapping() is cheap when PTI config is off or "pti=off").
+
+Reported-by: Vince Weaver <vincent.weaver@maine.edu>
+Reported-by: Robert Święcki <robert@swiecki.net>
+Analyzed-by: Peter Zijlstra <peterz@infradead.org>
+Analyzed-by: Stephane Eranian <eranian@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Vince Weaver <vince@deater.net>
+Cc: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/bts.c | 44 +++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 33 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/events/intel/bts.c
++++ b/arch/x86/events/intel/bts.c
+@@ -22,6 +22,7 @@
+ #include <linux/debugfs.h>
+ #include <linux/device.h>
+ #include <linux/coredump.h>
++#include <linux/kaiser.h>
+
+ #include <asm-generic/sizes.h>
+ #include <asm/perf_event.h>
+@@ -77,6 +78,23 @@ static size_t buf_size(struct page *page
+ return 1 << (PAGE_SHIFT + page_private(page));
+ }
+
++static void bts_buffer_free_aux(void *data)
++{
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++ struct bts_buffer *buf = data;
++ int nbuf;
++
++ for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) {
++ struct page *page = buf->buf[nbuf].page;
++ void *kaddr = page_address(page);
++ size_t page_size = buf_size(page);
++
++ kaiser_remove_mapping((unsigned long)kaddr, page_size);
++ }
++#endif
++ kfree(data);
++}
++
+ static void *
+ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+ {
+@@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pag
+ buf->real_size = size - size % BTS_RECORD_SIZE;
+
+ for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+- unsigned int __nr_pages;
++ void *kaddr = pages[pg];
++ size_t page_size;
++
++ page = virt_to_page(kaddr);
++ page_size = buf_size(page);
++
++ if (kaiser_add_mapping((unsigned long)kaddr,
++ page_size, __PAGE_KERNEL) < 0) {
++ buf->nr_bufs = nbuf;
++ bts_buffer_free_aux(buf);
++ return NULL;
++ }
+
+- page = virt_to_page(pages[pg]);
+- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+ buf->buf[nbuf].page = page;
+ buf->buf[nbuf].offset = offset;
+ buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+- buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
++ buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement;
+ pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
+ buf->buf[nbuf].size -= pad;
+
+- pg += __nr_pages;
+- offset += __nr_pages << PAGE_SHIFT;
++ pg += page_size >> PAGE_SHIFT;
++ offset += page_size;
+ }
+
+ return buf;
+ }
+
+-static void bts_buffer_free_aux(void *data)
+-{
+- kfree(data);
+-}
+-
+ static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+ {
+ return buf->buf[idx].offset + buf->buf[idx].displacement;
diff --git a/queue/kaiser-fix-perf-crashes.patch b/queue/kaiser-fix-perf-crashes.patch
new file mode 100644
index 0000000..6a9286c
--- /dev/null
+++ b/queue/kaiser-fix-perf-crashes.patch
@@ -0,0 +1,150 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 23 Aug 2017 14:21:14 -0700
+Subject: kaiser: fix perf crashes
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Avoid perf crashes: place debug_store in the user-mapped per-cpu area
+instead of allocating, and use page allocator plus kaiser_add_mapping()
+to keep the BTS and PEBS buffers user-mapped (that is, present in the
+user mapping, though visible only to kernel and hardware). The PEBS
+fixup buffer does not need this treatment.
+
+The need for a user-mapped struct debug_store showed up before doing
+any conscious perf testing: in a couple of kernel paging oopses on
+Westmere, implicating the debug_store offset of the per-cpu area.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/ds.c | 57 +++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 45 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -2,11 +2,15 @@
+ #include <linux/types.h>
+ #include <linux/slab.h>
+
++#include <asm/kaiser.h>
+ #include <asm/perf_event.h>
+ #include <asm/insn.h>
+
+ #include "../perf_event.h"
+
++static
++DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
++
+ /* The size of a BTS record in bytes: */
+ #define BTS_RECORD_SIZE 24
+
+@@ -268,6 +272,39 @@ void fini_debug_store_on_cpu(int cpu)
+
+ static DEFINE_PER_CPU(void *, insn_buffer);
+
++static void *dsalloc(size_t size, gfp_t flags, int node)
++{
++#ifdef CONFIG_KAISER
++ unsigned int order = get_order(size);
++ struct page *page;
++ unsigned long addr;
++
++ page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
++ if (!page)
++ return NULL;
++ addr = (unsigned long)page_address(page);
++ if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {
++ __free_pages(page, order);
++ addr = 0;
++ }
++ return (void *)addr;
++#else
++ return kmalloc_node(size, flags | __GFP_ZERO, node);
++#endif
++}
++
++static void dsfree(const void *buffer, size_t size)
++{
++#ifdef CONFIG_KAISER
++ if (!buffer)
++ return;
++ kaiser_remove_mapping((unsigned long)buffer, size);
++ free_pages((unsigned long)buffer, get_order(size));
++#else
++ kfree(buffer);
++#endif
++}
++
+ static int alloc_pebs_buffer(int cpu)
+ {
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+@@ -278,7 +315,7 @@ static int alloc_pebs_buffer(int cpu)
+ if (!x86_pmu.pebs)
+ return 0;
+
+- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
++ buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ if (unlikely(!buffer))
+ return -ENOMEM;
+
+@@ -289,7 +326,7 @@ static int alloc_pebs_buffer(int cpu)
+ if (x86_pmu.intel_cap.pebs_format < 2) {
+ ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!ibuffer) {
+- kfree(buffer);
++ dsfree(buffer, x86_pmu.pebs_buffer_size);
+ return -ENOMEM;
+ }
+ per_cpu(insn_buffer, cpu) = ibuffer;
+@@ -315,7 +352,8 @@ static void release_pebs_buffer(int cpu)
+ kfree(per_cpu(insn_buffer, cpu));
+ per_cpu(insn_buffer, cpu) = NULL;
+
+- kfree((void *)(unsigned long)ds->pebs_buffer_base);
++ dsfree((void *)(unsigned long)ds->pebs_buffer_base,
++ x86_pmu.pebs_buffer_size);
+ ds->pebs_buffer_base = 0;
+ }
+
+@@ -329,7 +367,7 @@ static int alloc_bts_buffer(int cpu)
+ if (!x86_pmu.bts)
+ return 0;
+
+- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
++ buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ if (unlikely(!buffer)) {
+ WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
+ return -ENOMEM;
+@@ -355,19 +393,15 @@ static void release_bts_buffer(int cpu)
+ if (!ds || !x86_pmu.bts)
+ return;
+
+- kfree((void *)(unsigned long)ds->bts_buffer_base);
++ dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
+ ds->bts_buffer_base = 0;
+ }
+
+ static int alloc_ds_buffer(int cpu)
+ {
+- int node = cpu_to_node(cpu);
+- struct debug_store *ds;
+-
+- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
+- if (unlikely(!ds))
+- return -ENOMEM;
++ struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
+
++ memset(ds, 0, sizeof(*ds));
+ per_cpu(cpu_hw_events, cpu).ds = ds;
+
+ return 0;
+@@ -381,7 +415,6 @@ static void release_ds_buffer(int cpu)
+ return;
+
+ per_cpu(cpu_hw_events, cpu).ds = NULL;
+- kfree(ds);
+ }
+
+ void release_ds_buffers(void)
diff --git a/queue/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch b/queue/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
new file mode 100644
index 0000000..babde9a
--- /dev/null
+++ b/queue/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
@@ -0,0 +1,72 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 21 Sep 2017 20:39:56 -0700
+Subject: kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER
+
+From: Hugh Dickins <hughd@google.com>
+
+
+pjt has observed that nmi's second (nmi_from_kernel) call to do_nmi()
+adjusted the %rdi regs arg, rightly when CONFIG_KAISER, but wrongly
+when not CONFIG_KAISER.
+
+Although the minimal change is to add an #ifdef CONFIG_KAISER around
+the addq line, that looks cluttered, and I prefer how the first call
+to do_nmi() handled it: prepare args in %rdi and %rsi before getting
+into the CONFIG_KAISER block, since it does not touch them at all.
+
+And while we're here, place the "#ifdef CONFIG_KAISER" that follows
+each, to enclose the "Unconditionally restore CR3" comment: matching
+how the "Unconditionally use kernel CR3" comment above is enclosed.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1323,12 +1323,13 @@ ENTRY(nmi)
+ movq %rax, %cr3
+ #endif
+ call do_nmi
++
++#ifdef CONFIG_KAISER
+ /*
+ * Unconditionally restore CR3. I know we return to
+ * kernel code that needs user CR3, but do we ever return
+ * to "user mode" where we need the kernel CR3?
+ */
+-#ifdef CONFIG_KAISER
+ popq %rax
+ mov %rax, %cr3
+ #endif
+@@ -1552,6 +1553,8 @@ end_repeat_nmi:
+ SWAPGS
+ xorl %ebx, %ebx
+ 1:
++ movq %rsp, %rdi
++ movq $-1, %rsi
+ #ifdef CONFIG_KAISER
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+@@ -1564,16 +1567,14 @@ end_repeat_nmi:
+ #endif
+
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+- movq %rsp, %rdi
+- addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */
+- movq $-1, %rsi
+ call do_nmi
++
++#ifdef CONFIG_KAISER
+ /*
+ * Unconditionally restore CR3. We might be returning to
+ * kernel code that needs user CR3, like just just before
+ * a sysret.
+ */
+-#ifdef CONFIG_KAISER
+ popq %rax
+ mov %rax, %cr3
+ #endif
diff --git a/queue/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch b/queue/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
new file mode 100644
index 0000000..6da60ee
--- /dev/null
+++ b/queue/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
@@ -0,0 +1,33 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 4 Dec 2017 20:13:35 -0800
+Subject: kaiser: fix unlikely error in alloc_ldt_struct()
+
+From: Hugh Dickins <hughd@google.com>
+
+
+An error from kaiser_add_mapping() here is not at all likely, but
+Eric Biggers rightly points out that __free_ldt_struct() relies on
+new_ldt->size being initialized: move that up.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/ldt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -79,11 +79,11 @@ static struct ldt_struct *alloc_ldt_stru
+
+ ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
+ __PAGE_KERNEL);
++ new_ldt->size = size;
+ if (ret) {
+ __free_ldt_struct(new_ldt);
+ return NULL;
+ }
+- new_ldt->size = size;
+ return new_ldt;
+ }
+
diff --git a/queue/kaiser-kaiser-depends-on-smp.patch b/queue/kaiser-kaiser-depends-on-smp.patch
new file mode 100644
index 0000000..d9a4854
--- /dev/null
+++ b/queue/kaiser-kaiser-depends-on-smp.patch
@@ -0,0 +1,54 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 13 Sep 2017 14:03:10 -0700
+Subject: kaiser: KAISER depends on SMP
+
+From: Hugh Dickins <hughd@google.com>
+
+
+It is absurd that KAISER should depend on SMP, but apparently nobody
+has tried a UP build before: which breaks on implicit declaration of
+function 'per_cpu_offset' in arch/x86/mm/kaiser.c.
+
+Now, you would expect that to be trivially fixed up; but looking at
+the System.map when that block is #ifdef'ed out of kaiser_init(),
+I see that in a UP build __per_cpu_user_mapped_end is precisely at
+__per_cpu_user_mapped_start, and the items carefully gathered into
+that section for user-mapping on SMP, dispersed elsewhere on UP.
+
+So, some other kind of section assignment will be needed on UP,
+but implementing that is not a priority: just make KAISER depend
+on SMP for now.
+
+Also inserted a blank line before the option, tidied up the
+brief Kconfig help message, and added an "If unsure, Y".
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/Kconfig | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,14 +30,16 @@ config SECURITY
+ model will be used.
+
+ If you are unsure how to answer this question, answer N.
++
+ config KAISER
+ bool "Remove the kernel mapping in user mode"
+ default y
+- depends on X86_64
+- depends on !PARAVIRT
++ depends on X86_64 && SMP && !PARAVIRT
+ help
+- This enforces a strict kernel and user space isolation in order to close
+- hardware side channels on kernel address information.
++ This enforces a strict kernel and user space isolation, in order
++ to close hardware side channels on kernel address information.
++
++ If you are unsure how to answer this question, answer Y.
+
+ config KAISER_REAL_SWITCH
+ bool "KAISER: actually switch page tables"
diff --git a/queue/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch b/queue/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
new file mode 100644
index 0000000..75c1365
--- /dev/null
+++ b/queue/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
@@ -0,0 +1,86 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 4 Nov 2017 18:43:06 -0700
+Subject: kaiser: kaiser_flush_tlb_on_return_to_user() check PCID
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Let kaiser_flush_tlb_on_return_to_user() do the X86_FEATURE_PCID
+check, instead of each caller doing it inline first: nobody needs
+to optimize for the noPCID case, it's clearer this way, and better
+suits later changes. Replace those no-op X86_CR3_PCID_KERN_FLUSH lines
+by a BUILD_BUG_ON() in load_new_mm_cr3(), in case something changes.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/tlbflush.h | 4 ++--
+ arch/x86/mm/kaiser.c | 6 +++---
+ arch/x86/mm/tlb.c | 8 ++++----
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -158,7 +158,7 @@ static inline void __native_flush_tlb(vo
+ * back:
+ */
+ preempt_disable();
+- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
++ if (kaiser_enabled)
+ kaiser_flush_tlb_on_return_to_user();
+ native_write_cr3(native_read_cr3());
+ preempt_enable();
+@@ -217,7 +217,7 @@ static inline void __native_flush_tlb_si
+ */
+
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
++ if (kaiser_enabled)
+ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -435,12 +435,12 @@ void kaiser_setup_pcid(void)
+
+ /*
+ * Make a note that this cpu will need to flush USER tlb on return to user.
+- * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
+- * if cpu does not, then the NOFLUSH bit will never have been set.
++ * If cpu does not have PCID, then the NOFLUSH bit will never have been set.
+ */
+ void kaiser_flush_tlb_on_return_to_user(void)
+ {
+- this_cpu_write(x86_cr3_pcid_user,
++ if (this_cpu_has(X86_FEATURE_PCID))
++ this_cpu_write(x86_cr3_pcid_user,
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -39,7 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
++ if (kaiser_enabled) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+@@ -50,10 +50,10 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
+ *
+- * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
+- * but keep that line in there in case something changes.
++ * If X86_CR3_PCID_KERN_FLUSH actually added something, then it
++ * would be needed in the write_cr3() below - if PCIDs enabled.
+ */
+- new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++ BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH);
+ kaiser_flush_tlb_on_return_to_user();
+ }
+
diff --git a/queue/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch b/queue/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
new file mode 100644
index 0000000..66fe640
--- /dev/null
+++ b/queue/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
@@ -0,0 +1,50 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 2 Oct 2017 10:57:24 -0700
+Subject: kaiser: kaiser_remove_mapping() move along the pgd
+
+From: Hugh Dickins <hughd@google.com>
+
+
+When removing the bogus comment from kaiser_remove_mapping(),
+I really ought to have checked the extent of its bogosity: as
+Neel points out, there is nothing to stop unmap_pud_range_nofree()
+from continuing beyond the end of a pud (and starting in the wrong
+position on the next).
+
+Fix kaiser_remove_mapping() to constrain the extent and advance pgd
+pointer correctly: use pgd_addr_end() macro as used throughout base
+mm (but don't assume page-rounded start and size in this case).
+
+But this bug was very unlikely to trigger in this backport: since
+any buddy allocation is contained within a single pud extent, and
+we are not using vmapped stacks (and are only mapping one page of
+stack anyway): the only way to hit this bug here would be when
+freeing a large modified ldt.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -319,11 +319,13 @@ void kaiser_remove_mapping(unsigned long
+ extern void unmap_pud_range_nofree(pgd_t *pgd,
+ unsigned long start, unsigned long end);
+ unsigned long end = start + size;
+- unsigned long addr;
++ unsigned long addr, next;
++ pgd_t *pgd;
+
+- for (addr = start; addr < end; addr += PGDIR_SIZE) {
+- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
+- unmap_pud_range_nofree(pgd, addr, end);
++ pgd = native_get_shadow_pgd(pgd_offset_k(start));
++ for (addr = start; addr < end; pgd++, addr = next) {
++ next = pgd_addr_end(addr, end);
++ unmap_pud_range_nofree(pgd, addr, next);
+ }
+ }
+
diff --git a/queue/kaiser-kernel-address-isolation.patch b/queue/kaiser-kernel-address-isolation.patch
new file mode 100644
index 0000000..079f4d6
--- /dev/null
+++ b/queue/kaiser-kernel-address-isolation.patch
@@ -0,0 +1,979 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+Date: Thu, 4 May 2017 14:26:50 +0200
+Subject: KAISER: Kernel Address Isolation
+
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+
+
+This patch introduces our implementation of KAISER (Kernel Address Isolation to
+have Side-channels Efficiently Removed), a kernel isolation technique to close
+hardware side channels on kernel address information.
+
+More information about the patch can be found on:
+
+ https://github.com/IAIK/KAISER
+
+From: Richard Fellner <richard.fellner@student.tugraz.at>
+From: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode
+Date: Thu, 4 May 2017 14:26:50 +0200
+Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2
+Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5
+
+To: <linux-kernel@vger.kernel.org>
+To: <kernel-hardening@lists.openwall.com>
+Cc: <clementine.maurice@iaik.tugraz.at>
+Cc: <moritz.lipp@iaik.tugraz.at>
+Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Cc: Richard Fellner <richard.fellner@student.tugraz.at>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: <kirill.shutemov@linux.intel.com>
+Cc: <anders.fogh@gdata-adan.de>
+
+After several recent works [1,2,3] KASLR on x86_64 was basically
+considered dead by many researchers. We have been working on an
+efficient but effective fix for this problem and found that not mapping
+the kernel space when running in user mode is the solution to this
+problem [4] (the corresponding paper [5] will be presented at ESSoS17).
+
+With this RFC patch we allow anybody to configure their kernel with the
+flag CONFIG_KAISER to add our defense mechanism.
+
+If there are any questions we would love to answer them.
+We also appreciate any comments!
+
+Cheers,
+Daniel (+ the KAISER team from Graz University of Technology)
+
+[1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf
+[2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf
+[3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf
+[4] https://github.com/IAIK/KAISER
+[5] https://gruss.cc/files/kaiser.pdf
+
+[patch based also on
+https://raw.githubusercontent.com/IAIK/KAISER/master/KAISER/0001-KAISER-Kernel-Address-Isolation.patch]
+
+Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
+Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 17 +++
+ arch/x86/entry/entry_64_compat.S | 7 +
+ arch/x86/include/asm/hw_irq.h | 2
+ arch/x86/include/asm/kaiser.h | 113 ++++++++++++++++++++++++
+ arch/x86/include/asm/pgtable.h | 4
+ arch/x86/include/asm/pgtable_64.h | 21 ++++
+ arch/x86/include/asm/pgtable_types.h | 12 ++
+ arch/x86/include/asm/processor.h | 7 +
+ arch/x86/kernel/cpu/common.c | 4
+ arch/x86/kernel/espfix_64.c | 6 +
+ arch/x86/kernel/head_64.S | 16 ++-
+ arch/x86/kernel/irqinit.c | 2
+ arch/x86/kernel/process.c | 2
+ arch/x86/mm/Makefile | 2
+ arch/x86/mm/kaiser.c | 160 +++++++++++++++++++++++++++++++++++
+ arch/x86/mm/pageattr.c | 2
+ arch/x86/mm/pgtable.c | 26 +++++
+ include/asm-generic/vmlinux.lds.h | 11 ++
+ include/linux/percpu-defs.h | 30 ++++++
+ init/main.c | 6 +
+ kernel/fork.c | 8 +
+ security/Kconfig | 7 +
+ 22 files changed, 449 insertions(+), 16 deletions(-)
+ create mode 100644 arch/x86/include/asm/kaiser.h
+ create mode 100644 arch/x86/mm/kaiser.c
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -36,6 +36,7 @@
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
+ #include <asm/export.h>
++#include <asm/kaiser.h>
+ #include <linux/err.h>
+
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+@@ -146,6 +147,7 @@ ENTRY(entry_SYSCALL_64)
+ * it is too small to ever cause noticeable irq latency.
+ */
+ SWAPGS_UNSAFE_STACK
++ SWITCH_KERNEL_CR3_NO_STACK
+ /*
+ * A hypervisor implementation might want to use a label
+ * after the swapgs, so that it can do the swapgs
+@@ -228,6 +230,7 @@ entry_SYSCALL_64_fastpath:
+ movq RIP(%rsp), %rcx
+ movq EFLAGS(%rsp), %r11
+ RESTORE_C_REGS_EXCEPT_RCX_R11
++ SWITCH_USER_CR3
+ movq RSP(%rsp), %rsp
+ USERGS_SYSRET64
+
+@@ -323,10 +326,12 @@ return_from_SYSCALL_64:
+ syscall_return_via_sysret:
+ /* rcx and r11 are already restored (see code above) */
+ RESTORE_C_REGS_EXCEPT_RCX_R11
++ SWITCH_USER_CR3
+ movq RSP(%rsp), %rsp
+ USERGS_SYSRET64
+
+ opportunistic_sysret_failed:
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_c_regs_and_iret
+ END(entry_SYSCALL_64)
+@@ -424,6 +429,7 @@ ENTRY(ret_from_fork)
+ movq %rsp, %rdi
+ call syscall_return_slowpath /* returns with IRQs disabled */
+ TRACE_IRQS_ON /* user mode is traced as IRQS on */
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_regs_and_iret
+
+@@ -478,6 +484,7 @@ END(irq_entries_start)
+ * tracking that we're in kernel mode.
+ */
+ SWAPGS
++ SWITCH_KERNEL_CR3
+
+ /*
+ * We need to tell lockdep that IRQs are off. We can't do this until
+@@ -535,6 +542,7 @@ GLOBAL(retint_user)
+ mov %rsp,%rdi
+ call prepare_exit_to_usermode
+ TRACE_IRQS_IRETQ
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_regs_and_iret
+
+@@ -612,6 +620,7 @@ native_irq_return_ldt:
+
+ pushq %rdi /* Stash user RDI */
+ SWAPGS
++ SWITCH_KERNEL_CR3
+ movq PER_CPU_VAR(espfix_waddr), %rdi
+ movq %rax, (0*8)(%rdi) /* user RAX */
+ movq (1*8)(%rsp), %rax /* user RIP */
+@@ -638,6 +647,7 @@ native_irq_return_ldt:
+ * still points to an RO alias of the ESPFIX stack.
+ */
+ orq PER_CPU_VAR(espfix_stack), %rax
++ SWITCH_USER_CR3
+ SWAPGS
+ movq %rax, %rsp
+
+@@ -1034,6 +1044,7 @@ ENTRY(paranoid_entry)
+ testl %edx, %edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
++ SWITCH_KERNEL_CR3
+ xorl %ebx, %ebx
+ 1: ret
+ END(paranoid_entry)
+@@ -1056,6 +1067,7 @@ ENTRY(paranoid_exit)
+ testl %ebx, %ebx /* swapgs needed? */
+ jnz paranoid_exit_no_swapgs
+ TRACE_IRQS_IRETQ
++ SWITCH_USER_CR3_NO_STACK
+ SWAPGS_UNSAFE_STACK
+ jmp paranoid_exit_restore
+ paranoid_exit_no_swapgs:
+@@ -1084,6 +1096,7 @@ ENTRY(error_entry)
+ * from user mode due to an IRET fault.
+ */
+ SWAPGS
++ SWITCH_KERNEL_CR3
+
+ .Lerror_entry_from_usermode_after_swapgs:
+ /*
+@@ -1135,6 +1148,7 @@ ENTRY(error_entry)
+ * Switch to kernel gsbase:
+ */
+ SWAPGS
++ SWITCH_KERNEL_CR3
+
+ /*
+ * Pretend that the exception came from user mode: set up pt_regs
+@@ -1235,6 +1249,7 @@ ENTRY(nmi)
+ */
+
+ SWAPGS_UNSAFE_STACK
++ SWITCH_KERNEL_CR3_NO_STACK
+ cld
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1275,6 +1290,7 @@ ENTRY(nmi)
+ * work, because we don't want to enable interrupts. Fortunately,
+ * do_nmi doesn't modify pt_regs.
+ */
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_c_regs_and_iret
+
+@@ -1486,6 +1502,7 @@ end_repeat_nmi:
+ testl %ebx, %ebx /* swapgs needed? */
+ jnz nmi_restore
+ nmi_swapgs:
++ SWITCH_USER_CR3_NO_STACK
+ SWAPGS_UNSAFE_STACK
+ nmi_restore:
+ RESTORE_EXTRA_REGS
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -13,6 +13,7 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/kaiser.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+
+@@ -48,6 +49,7 @@
+ ENTRY(entry_SYSENTER_compat)
+ /* Interrupts are off on entry. */
+ SWAPGS_UNSAFE_STACK
++ SWITCH_KERNEL_CR3_NO_STACK
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ /*
+@@ -184,6 +186,7 @@ ENDPROC(entry_SYSENTER_compat)
+ ENTRY(entry_SYSCALL_compat)
+ /* Interrupts are off on entry. */
+ SWAPGS_UNSAFE_STACK
++ SWITCH_KERNEL_CR3_NO_STACK
+
+ /* Stash user ESP and switch to the kernel stack. */
+ movl %esp, %r8d
+@@ -259,6 +262,7 @@ sysret32_from_system_call:
+ xorq %r8, %r8
+ xorq %r9, %r9
+ xorq %r10, %r10
++ SWITCH_USER_CR3
+ movq RSP-ORIG_RAX(%rsp), %rsp
+ swapgs
+ sysretl
+@@ -297,7 +301,7 @@ ENTRY(entry_INT80_compat)
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ ASM_CLAC /* Do this early to minimize exposure */
+ SWAPGS
+-
++ SWITCH_KERNEL_CR3_NO_STACK
+ /*
+ * User tracing code (ptrace or signal handlers) might assume that
+ * the saved RAX contains a 32-bit number when we're invoking a 32-bit
+@@ -338,6 +342,7 @@ ENTRY(entry_INT80_compat)
+
+ /* Go back to user mode. */
+ TRACE_IRQS_ON
++ SWITCH_USER_CR3_NO_STACK
+ SWAPGS
+ jmp restore_regs_and_iret
+ END(entry_INT80_compat)
+--- a/arch/x86/include/asm/hw_irq.h
++++ b/arch/x86/include/asm/hw_irq.h
+@@ -178,7 +178,7 @@ extern char irq_entries_start[];
+ #define VECTOR_RETRIGGERED ((void *)~0UL)
+
+ typedef struct irq_desc* vector_irq_t[NR_VECTORS];
+-DECLARE_PER_CPU(vector_irq_t, vector_irq);
++DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
+
+ #endif /* !ASSEMBLY_ */
+
+--- /dev/null
++++ b/arch/x86/include/asm/kaiser.h
+@@ -0,0 +1,113 @@
++#ifndef _ASM_X86_KAISER_H
++#define _ASM_X86_KAISER_H
++
++/* This file includes the definitions for the KAISER feature.
++ * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
++ * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
++ * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
++ * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
++ * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
++ *
++ * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
++ * of the user space, or the stacks.
++ */
++#ifdef __ASSEMBLY__
++#ifdef CONFIG_KAISER
++
++.macro _SWITCH_TO_KERNEL_CR3 reg
++movq %cr3, \reg
++andq $(~0x1000), \reg
++movq \reg, %cr3
++.endm
++
++.macro _SWITCH_TO_USER_CR3 reg
++movq %cr3, \reg
++orq $(0x1000), \reg
++movq \reg, %cr3
++.endm
++
++.macro SWITCH_KERNEL_CR3
++pushq %rax
++_SWITCH_TO_KERNEL_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_USER_CR3
++pushq %rax
++_SWITCH_TO_USER_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_KERNEL_CR3_NO_STACK
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_KERNEL_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++.endm
++
++
++.macro SWITCH_USER_CR3_NO_STACK
++
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_USER_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++
++.endm
++
++#else /* CONFIG_KAISER */
++
++.macro SWITCH_KERNEL_CR3 reg
++.endm
++.macro SWITCH_USER_CR3 reg
++.endm
++.macro SWITCH_USER_CR3_NO_STACK
++.endm
++.macro SWITCH_KERNEL_CR3_NO_STACK
++.endm
++
++#endif /* CONFIG_KAISER */
++#else /* __ASSEMBLY__ */
++
++
++#ifdef CONFIG_KAISER
++// Upon kernel/user mode switch, it may happen that
++// the address space has to be switched before the registers have been stored.
++// To change the address space, another register is needed.
++// A register therefore has to be stored/restored.
++//
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++#endif /* CONFIG_KAISER */
++
++/**
++ * shadowmem_add_mapping - map a virtual memory part to the shadow mapping
++ * @addr: the start address of the range
++ * @size: the size of the range
++ * @flags: The mapping flags of the pages
++ *
++ * the mapping is done on a global scope, so no bigger synchronization has to be done.
++ * the pages have to be manually unmapped again when they are not needed any longer.
++ */
++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
++
++
++/**
++ * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
++ * @addr: the start address of the range
++ * @size: the size of the range
++ */
++extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
++
++/**
++ * shadowmem_initialize_mapping - Initalize the shadow mapping
++ *
++ * most parts of the shadow mapping can be mapped upon boot time.
++ * only the thread stacks have to be mapped on runtime.
++ * the mapped regions are not unmapped at all.
++ */
++extern void kaiser_init(void);
++
++#endif
++
++
++
++#endif /* _ASM_X86_KAISER_H */
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -904,6 +904,10 @@ static inline void pmdp_set_wrprotect(st
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+ memcpy(dst, src, count * sizeof(pgd_t));
++#ifdef CONFIG_KAISER
++ // clone the shadow pgd part as well
++ memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
++#endif
+ }
+
+ #define PTE_SHIFT ilog2(PTRS_PER_PTE)
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_
+ native_set_pud(pud, native_make_pud(0));
+ }
+
++#ifdef CONFIG_KAISER
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
++ return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
++}
++
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
++ return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
++}
++#endif /* CONFIG_KAISER */
++
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++#ifdef CONFIG_KAISER
++ // We know that a pgd is page aligned.
++ // Therefore the lower indices have to be mapped to user space.
++ // These pages are mapped to the shadow mapping.
++ if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
++ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++ }
++
++ pgdp->pgd = pgd.pgd & ~_PAGE_USER;
++#else /* CONFIG_KAISER */
+ *pgdp = pgd;
++#endif
+ }
+
+ static inline void native_pgd_clear(pgd_t *pgd)
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -45,7 +45,11 @@
+ #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#ifdef CONFIG_KAISER
++#define _PAGE_GLOBAL (_AT(pteval_t, 0))
++#else
++#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#endif
+ #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+ #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+@@ -119,7 +123,11 @@
+ #define _PAGE_DEVMAP (_AT(pteval_t, 0))
+ #endif
+
+-#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#ifdef CONFIG_KAISER
++#define _PAGE_PROTNONE (_AT(pteval_t, 0))
++#else
++#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#endif
+
+ #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_DIRTY)
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -308,7 +308,7 @@ struct tss_struct {
+
+ } ____cacheline_aligned;
+
+-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
++DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
+
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+@@ -335,6 +335,11 @@ union irq_stack_union {
+ char gs_base[40];
+ unsigned long stack_canary;
+ };
++
++ struct {
++ char irq_stack_pointer[64];
++ char unused[IRQ_STACK_SIZE - 64];
++ };
+ };
+
+ DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -93,7 +93,7 @@ static const struct cpu_dev default_cpu
+
+ static const struct cpu_dev *this_cpu = &default_cpu;
+
+-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
+ #ifdef CONFIG_X86_64
+ /*
+ * We need valid kernel segments for data and code in long mode too
+@@ -1365,7 +1365,7 @@ static const unsigned int exception_stac
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
+ };
+
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+
+ /* May not be marked __init: used by software suspend */
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -41,6 +41,7 @@
+ #include <asm/pgalloc.h>
+ #include <asm/setup.h>
+ #include <asm/espfix.h>
++#include <asm/kaiser.h>
+
+ /*
+ * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
+@@ -126,6 +127,11 @@ void __init init_espfix_bsp(void)
+ /* Install the espfix pud into the kernel page directory */
+ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
++#ifdef CONFIG_KAISER
++ // add the esp stack pud to the shadow mapping here.
++ // This can be done directly, because the fixup stack has its own pud
++ set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
++#endif
+
+ /* Randomize the locations */
+ init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -405,6 +405,14 @@ GLOBAL(early_recursion_flag)
+ .balign PAGE_SIZE; \
+ GLOBAL(name)
+
++#ifdef CONFIG_KAISER
++#define NEXT_PGD_PAGE(name) \
++ .balign 2 * PAGE_SIZE; \
++GLOBAL(name)
++#else
++#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#endif
++
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+ #define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+@@ -414,7 +422,7 @@ GLOBAL(name)
+ .endr
+
+ __INITDATA
+-NEXT_PAGE(early_level4_pgt)
++NEXT_PGD_PAGE(early_level4_pgt)
+ .fill 511,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+
+@@ -424,10 +432,10 @@ NEXT_PAGE(early_dynamic_pgts)
+ .data
+
+ #ifndef CONFIG_XEN
+-NEXT_PAGE(init_level4_pgt)
+- .fill 512,8,0
++NEXT_PGD_PAGE(init_level4_pgt)
++ .fill 2*512,8,0
+ #else
+-NEXT_PAGE(init_level4_pgt)
++NEXT_PGD_PAGE(init_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+--- a/arch/x86/kernel/irqinit.c
++++ b/arch/x86/kernel/irqinit.c
+@@ -51,7 +51,7 @@ static struct irqaction irq2 = {
+ .flags = IRQF_NO_THREAD,
+ };
+
+-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
++DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
+ [0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
+ };
+
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -41,7 +41,7 @@
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
++__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
+ .x86_tss = {
+ .sp0 = TOP_OF_INIT_STACK,
+ #ifdef CONFIG_X86_32
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulatio
+ obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+-
++obj-$(CONFIG_KAISER) += kaiser.o
+--- /dev/null
++++ b/arch/x86/mm/kaiser.c
+@@ -0,0 +1,160 @@
++
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/bug.h>
++#include <linux/init.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++
++#include <linux/uaccess.h>
++#include <asm/pgtable.h>
++#include <asm/pgalloc.h>
++#include <asm/desc.h>
++#ifdef CONFIG_KAISER
++
++__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/**
++ * Get the real ppn from a address in kernel mapping.
++ * @param address The virtual adrress
++ * @return the physical address
++ */
++static inline unsigned long get_pa_from_mapping (unsigned long address)
++{
++ pgd_t *pgd;
++ pud_t *pud;
++ pmd_t *pmd;
++ pte_t *pte;
++
++ pgd = pgd_offset_k(address);
++ BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
++
++ pud = pud_offset(pgd, address);
++ BUG_ON(pud_none(*pud));
++
++ if (pud_large(*pud)) {
++ return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
++ }
++
++ pmd = pmd_offset(pud, address);
++ BUG_ON(pmd_none(*pmd));
++
++ if (pmd_large(*pmd)) {
++ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
++ }
++
++ pte = pte_offset_kernel(pmd, address);
++ BUG_ON(pte_none(*pte));
++
++ return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
++}
++
++void _kaiser_copy (unsigned long start_addr, unsigned long size,
++ unsigned long flags)
++{
++ pgd_t *pgd;
++ pud_t *pud;
++ pmd_t *pmd;
++ pte_t *pte;
++ unsigned long address;
++ unsigned long end_addr = start_addr + size;
++ unsigned long target_address;
++
++ for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
++ address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
++ target_address = get_pa_from_mapping(address);
++
++ pgd = native_get_shadow_pgd(pgd_offset_k(address));
++
++ BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
++ BUG_ON(pgd_large(*pgd));
++
++ pud = pud_offset(pgd, address);
++ if (pud_none(*pud)) {
++ set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
++ }
++ BUG_ON(pud_large(*pud));
++
++ pmd = pmd_offset(pud, address);
++ if (pmd_none(*pmd)) {
++ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
++ }
++ BUG_ON(pmd_large(*pmd));
++
++ pte = pte_offset_kernel(pmd, address);
++ if (pte_none(*pte)) {
++ set_pte(pte, __pte(flags | target_address));
++ } else {
++ BUG_ON(__pa(pte_page(*pte)) != target_address);
++ }
++ }
++}
++
++// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
++static inline void __init _kaiser_init(void)
++{
++ pgd_t *pgd;
++ int i = 0;
++
++ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
++ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
++ set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
++ }
++}
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++spinlock_t shadow_table_lock;
++void __init kaiser_init(void)
++{
++ int cpu;
++ spin_lock_init(&shadow_table_lock);
++
++ spin_lock(&shadow_table_lock);
++
++ _kaiser_init();
++
++ for_each_possible_cpu(cpu) {
++ // map the per cpu user variables
++ _kaiser_copy(
++ (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
++ (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
++ __PAGE_KERNEL);
++ }
++
++ // map the entry/exit text section, which is responsible to switch between user- and kernel mode
++ _kaiser_copy(
++ (unsigned long) __entry_text_start,
++ (unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
++ __PAGE_KERNEL_RX);
++
++ // the fixed map address of the idt_table
++ _kaiser_copy(
++ (unsigned long) idt_descr.address,
++ sizeof(gate_desc) * NR_VECTORS,
++ __PAGE_KERNEL_RO);
++
++ spin_unlock(&shadow_table_lock);
++}
++
++// add a mapping to the shadow-mapping, and synchronize the mappings
++void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++{
++ spin_lock(&shadow_table_lock);
++ _kaiser_copy(addr, size, flags);
++ spin_unlock(&shadow_table_lock);
++}
++
++extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
++void kaiser_remove_mapping(unsigned long start, unsigned long size)
++{
++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
++ spin_lock(&shadow_table_lock);
++ do {
++ unmap_pud_range(pgd, start, start + size);
++ } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
++ spin_unlock(&shadow_table_lock);
++}
++#endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -823,7 +823,7 @@ static void unmap_pmd_range(pud_t *pud,
+ pud_clear(pud);
+ }
+
+-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+ {
+ pud_t *pud = pud_offset(pgd, start);
+
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -346,12 +346,38 @@ static inline void _pgd_free(pgd_t *pgd)
+ #else
+ static inline pgd_t *_pgd_alloc(void)
+ {
++#ifdef CONFIG_KAISER
++ // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
++ // block. Therefore, we have to allocate at least 3 pages. However, the
++ // __get_free_pages returns us 4 pages. Hence, we store the base pointer at
++ // the beginning of the page of our 8kb-aligned memory block in order to
++ // correctly free it afterwars.
++
++ unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
++
++ if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
++ {
++ *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
++ return (pgd_t *) pages;
++ }
++ else
++ {
++ *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
++ return (pgd_t *) (pages + PAGE_SIZE);
++ }
++#else
+ return (pgd_t *)__get_free_page(PGALLOC_GFP);
++#endif
+ }
+
+ static inline void _pgd_free(pgd_t *pgd)
+ {
++#ifdef CONFIG_KAISER
++ unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
++ free_pages(pages, get_order(4*PAGE_SIZE));
++#else
+ free_page((unsigned long)pgd);
++#endif
+ }
+ #endif /* CONFIG_X86_PAE */
+
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -778,7 +778,16 @@
+ */
+ #define PERCPU_INPUT(cacheline) \
+ VMLINUX_SYMBOL(__per_cpu_start) = .; \
+- *(.data..percpu..first) \
++ \
++ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
++ *(.data..percpu..first) \
++ . = ALIGN(cacheline); \
++ *(.data..percpu..user_mapped) \
++ *(.data..percpu..user_mapped..shared_aligned) \
++ . = ALIGN(PAGE_SIZE); \
++ *(.data..percpu..user_mapped..page_aligned) \
++ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
++ \
+ . = ALIGN(PAGE_SIZE); \
+ *(.data..percpu..page_aligned) \
+ . = ALIGN(cacheline); \
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -35,6 +35,12 @@
+
+ #endif
+
++#ifdef CONFIG_KAISER
++#define USER_MAPPED_SECTION "..user_mapped"
++#else
++#define USER_MAPPED_SECTION ""
++#endif
++
+ /*
+ * Base implementations of per-CPU variable declarations and definitions, where
+ * the section in which the variable is to be placed is provided by the
+@@ -115,6 +121,12 @@
+ #define DEFINE_PER_CPU(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "")
+
++#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
++#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
+ /*
+ * Declaration/definition used for per-CPU variables that must come first in
+ * the set of variables.
+@@ -144,6 +156,14 @@
+ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++ ____cacheline_aligned_in_smp
++
++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++ ____cacheline_aligned_in_smp
++
+ #define DECLARE_PER_CPU_ALIGNED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \
+ ____cacheline_aligned
+@@ -162,6 +182,16 @@
+ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \
+ __aligned(PAGE_SIZE)
++/*
++ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
++ */
++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
++ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++ __aligned(PAGE_SIZE)
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
++ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++ __aligned(PAGE_SIZE)
+
+ /*
+ * Declaration/definition used for per-CPU variables that must be read mostly.
+--- a/init/main.c
++++ b/init/main.c
+@@ -86,6 +86,9 @@
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
++#ifdef CONFIG_KAISER
++#include <asm/kaiser.h>
++#endif
+
+ static int kernel_init(void *);
+
+@@ -473,6 +476,9 @@ static void __init mm_init(void)
+ pgtable_init();
+ vmalloc_init();
+ ioremap_huge_init();
++#ifdef CONFIG_KAISER
++ kaiser_init();
++#endif
+ }
+
+ asmlinkage __visible void __init start_kernel(void)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -211,8 +211,12 @@ static unsigned long *alloc_thread_stack
+ #endif
+ }
+
++extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
+ static inline void free_thread_stack(struct task_struct *tsk)
+ {
++#ifdef CONFIG_KAISER
++ kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE);
++#endif
+ #ifdef CONFIG_VMAP_STACK
+ if (task_stack_vm_area(tsk)) {
+ unsigned long flags;
+@@ -468,6 +472,7 @@ void set_task_stack_end_magic(struct tas
+ *stackend = STACK_END_MAGIC; /* for overflow detection */
+ }
+
++extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ struct task_struct *tsk;
+@@ -495,6 +500,9 @@ static struct task_struct *dup_task_stru
+ * functions again.
+ */
+ tsk->stack = stack;
++#ifdef CONFIG_KAISER
++ kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++#endif
+ #ifdef CONFIG_VMAP_STACK
+ tsk->stack_vm_area = stack_vm_area;
+ #endif
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,6 +30,13 @@ config SECURITY
+ model will be used.
+
+ If you are unsure how to answer this question, answer N.
++config KAISER
++ bool "Remove the kernel mapping in user mode"
++ depends on X86_64
++ depends on !PARAVIRT
++ help
++ This enforces a strict kernel and user space isolation in order to close
++ hardware side channels on kernel address information.
+
+ config SECURITYFS
+ bool "Enable the securityfs filesystem"
diff --git a/queue/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch b/queue/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
new file mode 100644
index 0000000..3e82917
--- /dev/null
+++ b/queue/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
@@ -0,0 +1,392 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 17 Aug 2017 15:00:37 -0700
+Subject: kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user
+
+From: Hugh Dickins <hughd@google.com>
+
+
+We have many machines (Westmere, Sandybridge, Ivybridge) supporting
+PCID but not INVPCID: on these load_new_mm_cr3() simply crashed.
+
+Flushing user context inside load_new_mm_cr3() without the use of
+invpcid is difficult: momentarily switch from kernel to user context
+and back to do so? I'm not sure whether that can be safely done at
+all, and would risk polluting user context with kernel internals,
+and kernel context with stale user externals.
+
+Instead, follow the hint in the comment that was there: change
+X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3()
+can leave a note in it, for SWITCH_USER_CR3 on return to userspace to
+flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH.
+
+Which works well enough that there's no need to do it this way only
+when invpcid is unsupported: it's a good alternative to invpcid here.
+But there's a couple of inlines in asm/tlbflush.h that need to do the
+same trick, so it's best to localize all this per-cpu business in
+mm/kaiser.c: moving that part of the initialization from setup_pcid()
+to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the
+function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a
+KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit.
+
+I did try to make the feature tests in asm/tlbflush.h more consistent
+with each other: there seem to be far too many ways of performing such
+tests, and I don't have a good grasp of their differences. At first
+I converted them all to be static_cpu_has(): but that proved to be a
+mistake, as the comment in __native_flush_tlb_single() hints; so then
+I reversed and made them all this_cpu_has(). Probably all gratuitous
+change, but that's the way it's working at present.
+
+I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR
+gets re-initialized by each cpu (before and after these changes):
+no problem when (as usual) all cpus on a machine have the same
+features, but in principle incorrect. However, my experiment
+to per-cpu-ify that one did not end well...
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h | 18 +++++++-----
+ arch/x86/include/asm/tlbflush.h | 56 +++++++++++++++++++++++++++-------------
+ arch/x86/kernel/cpu/common.c | 22 ---------------
+ arch/x86/mm/kaiser.c | 50 +++++++++++++++++++++++++++++++----
+ arch/x86/mm/tlb.c | 46 ++++++++++++--------------------
+ 5 files changed, 113 insertions(+), 79 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -32,13 +32,12 @@ movq \reg, %cr3
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-/*
+- * This can obviously be one instruction by putting the
+- * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
+- * But, just leave it now for simplicity.
+- */
+-orq X86_CR3_PCID_USER_VAR, \reg
+-orq $(KAISER_SHADOW_PGD_OFFSET), \reg
++orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
++js 9f
++// FLUSH this time, reset to NOFLUSH for next time
++// But if nopcid? Consider using 0x80 for user pcid?
++movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++9:
+ movq \reg, %cr3
+ .endm
+
+@@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
++extern unsigned long X86_CR3_PCID_KERN_VAR;
++DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++
+ /**
+ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -13,6 +13,7 @@ static inline void __invpcid(unsigned lo
+ unsigned long type)
+ {
+ struct { u64 d[2]; } desc = { { pcid, addr } };
++
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+@@ -131,27 +132,42 @@ static inline void cr4_set_bits_and_upda
+ cr4_set_bits(mask);
+ }
+
++/*
++ * Declare a couple of kaiser interfaces here for convenience,
++ * to avoid the need for asm/kaiser.h in unexpected places.
++ */
++#ifdef CONFIG_KAISER
++extern void kaiser_setup_pcid(void);
++extern void kaiser_flush_tlb_on_return_to_user(void);
++#else
++static inline void kaiser_setup_pcid(void)
++{
++}
++static inline void kaiser_flush_tlb_on_return_to_user(void)
++{
++}
++#endif
++
+ static inline void __native_flush_tlb(void)
+ {
+- if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
++ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+- * If current->mm == NULL then we borrow a mm which may change during a
+- * task switch and therefore we must not be preempted while we write CR3
+- * back:
++ * Note, this works with CR4.PCIDE=0 or 1.
+ */
+- preempt_disable();
+- native_write_cr3(native_read_cr3());
+- preempt_enable();
++ invpcid_flush_all_nonglobals();
+ return;
+ }
++
+ /*
+- * We are no longer using globals with KAISER, so a
+- * "nonglobals" flush would work too. But, this is more
+- * conservative.
+- *
+- * Note, this works with CR4.PCIDE=0 or 1.
++ * If current->mm == NULL then we borrow a mm which may change during a
++ * task switch and therefore we must not be preempted while we write CR3
++ * back:
+ */
+- invpcid_flush_all();
++ preempt_disable();
++ if (this_cpu_has(X86_FEATURE_PCID))
++ kaiser_flush_tlb_on_return_to_user();
++ native_write_cr3(native_read_cr3());
++ preempt_enable();
+ }
+
+ static inline void __native_flush_tlb_global_irq_disabled(void)
+@@ -167,9 +183,13 @@ static inline void __native_flush_tlb_gl
+
+ static inline void __native_flush_tlb_global(void)
+ {
++#ifdef CONFIG_KAISER
++ /* Globals are not used at all */
++ __native_flush_tlb();
++#else
+ unsigned long flags;
+
+- if (static_cpu_has(X86_FEATURE_INVPCID)) {
++ if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
+@@ -186,10 +206,9 @@ static inline void __native_flush_tlb_gl
+ * be called from deep inside debugging code.)
+ */
+ raw_local_irq_save(flags);
+-
+ __native_flush_tlb_global_irq_disabled();
+-
+ raw_local_irq_restore(flags);
++#endif
+ }
+
+ static inline void __native_flush_tlb_single(unsigned long addr)
+@@ -200,9 +219,12 @@ static inline void __native_flush_tlb_si
+ *
+ * The ASIDs used below are hard-coded. But, we must not
+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
+- * invpcid in the case we are called early.
++ * invlpg in the case we are called early.
+ */
++
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
++ if (this_cpu_has(X86_FEATURE_PCID))
++ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+ }
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -324,33 +324,12 @@ static __always_inline void setup_smap(s
+ }
+ }
+
+-/*
+- * These can have bit 63 set, so we can not just use a plain "or"
+- * instruction to get their value or'd into CR3. It would take
+- * another register. So, we use a memory reference to these
+- * instead.
+- *
+- * This is also handy because systems that do not support
+- * PCIDs just end up or'ing a 0 into their CR3, which does
+- * no harm.
+- */
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
+-
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE)) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ /*
+- * These variables are used by the entry/exit
+- * code to change PCIDs.
+- */
+-#ifdef CONFIG_KAISER
+- X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
+- X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
+-#endif
+- /*
+ * INVPCID has two "groups" of types:
+ * 1/2: Invalidate an individual address
+ * 3/4: Invalidate all contexts
+@@ -375,6 +354,7 @@ static void setup_pcid(struct cpuinfo_x8
+ clear_cpu_cap(c, X86_FEATURE_PCID);
+ }
+ }
++ kaiser_setup_pcid();
+ }
+
+ /*
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -11,12 +11,26 @@
+ #include <linux/uaccess.h>
+
+ #include <asm/kaiser.h>
++#include <asm/tlbflush.h> /* to verify its kaiser declarations */
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
++
+ #ifdef CONFIG_KAISER
++__visible
++DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/*
++ * These can have bit 63 set, so we can not just use a plain "or"
++ * instruction to get their value or'd into CR3. It would take
++ * another register. So, we use a memory reference to these instead.
++ *
++ * This is also handy because systems that do not support PCIDs
++ * just end up or'ing a 0 into their CR3, which does no harm.
++ */
++__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
++DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
+
+-__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ /*
+ * At runtime, the only things we map are some things for CPU
+ * hotplug, and stacks for new processes. No two CPUs will ever
+@@ -238,9 +252,6 @@ static void __init kaiser_init_all_pgds(
+ WARN_ON(__ret); \
+ } while (0)
+
+-extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+-extern unsigned long X86_CR3_PCID_KERN_VAR;
+-extern unsigned long X86_CR3_PCID_USER_VAR;
+ /*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+@@ -294,8 +305,6 @@ void __init kaiser_init(void)
+
+ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
+ __PAGE_KERNEL);
+- kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
+- __PAGE_KERNEL);
+ }
+
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+@@ -358,4 +367,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+ }
+ return pgd;
+ }
++
++void kaiser_setup_pcid(void)
++{
++ unsigned long kern_cr3 = 0;
++ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
++
++ if (this_cpu_has(X86_FEATURE_PCID)) {
++ kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
++ }
++ /*
++ * These variables are used by the entry/exit
++ * code to change PCID and pgd and TLB flushing.
++ */
++ X86_CR3_PCID_KERN_VAR = kern_cr3;
++ this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
++}
++
++/*
++ * Make a note that this cpu will need to flush USER tlb on return to user.
++ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
++ * if cpu does not, then the NOFLUSH bit will never have been set.
++ */
++void kaiser_flush_tlb_on_return_to_user(void)
++{
++ this_cpu_write(X86_CR3_PCID_USER_VAR,
++ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
++}
++EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+ #endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -6,13 +6,14 @@
+ #include <linux/interrupt.h>
+ #include <linux/export.h>
+ #include <linux/cpu.h>
++#include <linux/debugfs.h>
+
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+ #include <asm/cache.h>
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
+-#include <linux/debugfs.h>
++#include <asm/kaiser.h>
+
+ /*
+ * TLB flushing, formerly SMP-only
+@@ -38,34 +39,23 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+- /*
+- * KAISER, plus PCIDs needs some extra work here. But,
+- * if either of features is not present, we need no
+- * PCIDs here and just do a normal, full TLB flush with
+- * the write_cr3()
+- */
+- if (!IS_ENABLED(CONFIG_KAISER) ||
+- !cpu_feature_enabled(X86_FEATURE_PCID))
+- goto out_set_cr3;
+- /*
+- * We reuse the same PCID for different tasks, so we must
+- * flush all the entires for the PCID out when we change
+- * tasks.
+- */
+- new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
+-
+- /*
+- * The flush from load_cr3() may leave old TLB entries
+- * for userspace in place. We must flush that context
+- * separately. We can theoretically delay doing this
+- * until we actually load up the userspace CR3, but
+- * that's a bit tricky. We have to have the "need to
+- * flush userspace PCID" bit per-cpu and check it in the
+- * exit-to-userspace paths.
+- */
+- invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
++#ifdef CONFIG_KAISER
++ if (this_cpu_has(X86_FEATURE_PCID)) {
++ /*
++ * We reuse the same PCID for different tasks, so we must
++ * flush all the entries for the PCID out when we change tasks.
++ * Flush KERN below, flush USER when returning to userspace in
++ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
++ *
++ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
++ * do it here, but can only be used if X86_FEATURE_INVPCID is
++ * available - and many machines support pcid without invpcid.
++ */
++ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++ kaiser_flush_tlb_on_return_to_user();
++ }
++#endif /* CONFIG_KAISER */
+
+-out_set_cr3:
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
diff --git a/queue/kaiser-merged-update.patch b/queue/kaiser-merged-update.patch
new file mode 100644
index 0000000..8a0e3fe
--- /dev/null
+++ b/queue/kaiser-merged-update.patch
@@ -0,0 +1,1298 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Wed, 30 Aug 2017 16:23:00 -0700
+Subject: kaiser: merged update
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+
+Merged fixes and cleanups, rebased to 4.9.51 tree (no 5-level paging).
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 105 ++++++++++-
+ arch/x86/include/asm/kaiser.h | 43 ++--
+ arch/x86/include/asm/pgtable.h | 18 +
+ arch/x86/include/asm/pgtable_64.h | 48 ++++-
+ arch/x86/include/asm/pgtable_types.h | 6
+ arch/x86/kernel/espfix_64.c | 13 -
+ arch/x86/kernel/head_64.S | 19 +-
+ arch/x86/kernel/ldt.c | 27 ++
+ arch/x86/kernel/tracepoint.c | 2
+ arch/x86/mm/kaiser.c | 317 +++++++++++++++++++++++++----------
+ arch/x86/mm/pageattr.c | 63 +++++-
+ arch/x86/mm/pgtable.c | 40 +---
+ include/linux/kaiser.h | 26 ++
+ kernel/fork.c | 9
+ security/Kconfig | 5
+ 15 files changed, 551 insertions(+), 190 deletions(-)
+ create mode 100644 include/linux/kaiser.h
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -230,6 +230,13 @@ entry_SYSCALL_64_fastpath:
+ movq RIP(%rsp), %rcx
+ movq EFLAGS(%rsp), %r11
+ RESTORE_C_REGS_EXCEPT_RCX_R11
++ /*
++ * This opens a window where we have a user CR3, but are
++ * running in the kernel. This makes using the CS
++ * register useless for telling whether or not we need to
++ * switch CR3 in NMIs. Normal interrupts are OK because
++ * they are off here.
++ */
+ SWITCH_USER_CR3
+ movq RSP(%rsp), %rsp
+ USERGS_SYSRET64
+@@ -326,11 +333,25 @@ return_from_SYSCALL_64:
+ syscall_return_via_sysret:
+ /* rcx and r11 are already restored (see code above) */
+ RESTORE_C_REGS_EXCEPT_RCX_R11
++ /*
++ * This opens a window where we have a user CR3, but are
++ * running in the kernel. This makes using the CS
++ * register useless for telling whether or not we need to
++ * switch CR3 in NMIs. Normal interrupts are OK because
++ * they are off here.
++ */
+ SWITCH_USER_CR3
+ movq RSP(%rsp), %rsp
+ USERGS_SYSRET64
+
+ opportunistic_sysret_failed:
++ /*
++ * This opens a window where we have a user CR3, but are
++ * running in the kernel. This makes using the CS
++ * register useless for telling whether or not we need to
++ * switch CR3 in NMIs. Normal interrupts are OK because
++ * they are off here.
++ */
+ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_c_regs_and_iret
+@@ -1087,6 +1108,13 @@ ENTRY(error_entry)
+ cld
+ SAVE_C_REGS 8
+ SAVE_EXTRA_REGS 8
++ /*
++ * error_entry() always returns with a kernel gsbase and
++ * CR3. We must also have a kernel CR3/gsbase before
++ * calling TRACE_IRQS_*. Just unconditionally switch to
++ * the kernel CR3 here.
++ */
++ SWITCH_KERNEL_CR3
+ xorl %ebx, %ebx
+ testb $3, CS+8(%rsp)
+ jz .Lerror_kernelspace
+@@ -1096,7 +1124,6 @@ ENTRY(error_entry)
+ * from user mode due to an IRET fault.
+ */
+ SWAPGS
+- SWITCH_KERNEL_CR3
+
+ .Lerror_entry_from_usermode_after_swapgs:
+ /*
+@@ -1148,7 +1175,6 @@ ENTRY(error_entry)
+ * Switch to kernel gsbase:
+ */
+ SWAPGS
+- SWITCH_KERNEL_CR3
+
+ /*
+ * Pretend that the exception came from user mode: set up pt_regs
+@@ -1249,7 +1275,10 @@ ENTRY(nmi)
+ */
+
+ SWAPGS_UNSAFE_STACK
+- SWITCH_KERNEL_CR3_NO_STACK
++ /*
++ * percpu variables are mapped with user CR3, so no need
++ * to switch CR3 here.
++ */
+ cld
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+@@ -1283,14 +1312,33 @@ ENTRY(nmi)
+
+ movq %rsp, %rdi
+ movq $-1, %rsi
++#ifdef CONFIG_KAISER
++ /* Unconditionally use kernel CR3 for do_nmi() */
++ /* %rax is saved above, so OK to clobber here */
++ movq %cr3, %rax
++ pushq %rax
++#ifdef CONFIG_KAISER_REAL_SWITCH
++ andq $(~0x1000), %rax
++#endif
++ movq %rax, %cr3
++#endif
+ call do_nmi
++ /*
++ * Unconditionally restore CR3. I know we return to
++ * kernel code that needs user CR3, but do we ever return
++ * to "user mode" where we need the kernel CR3?
++ */
++#ifdef CONFIG_KAISER
++ popq %rax
++ mov %rax, %cr3
++#endif
+
+ /*
+ * Return back to user mode. We must *not* do the normal exit
+- * work, because we don't want to enable interrupts. Fortunately,
+- * do_nmi doesn't modify pt_regs.
++ * work, because we don't want to enable interrupts. Do not
++ * switch to user CR3: we might be going back to kernel code
++ * that had a user CR3 set.
+ */
+- SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_c_regs_and_iret
+
+@@ -1486,23 +1534,54 @@ end_repeat_nmi:
+ ALLOC_PT_GPREGS_ON_STACK
+
+ /*
+- * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
+- * as we should not be calling schedule in NMI context.
+- * Even with normal interrupts enabled. An NMI should not be
+- * setting NEED_RESCHED or anything that normal interrupts and
+- * exceptions might do.
++ * Use the same approach as paranoid_entry to handle SWAPGS, but
++ * without CR3 handling since we do that differently in NMIs. No
++ * need to use paranoid_exit as we should not be calling schedule
++ * in NMI context. Even with normal interrupts enabled. An NMI
++ * should not be setting NEED_RESCHED or anything that normal
++ * interrupts and exceptions might do.
+ */
+- call paranoid_entry
++ cld
++ SAVE_C_REGS
++ SAVE_EXTRA_REGS
++ movl $1, %ebx
++ movl $MSR_GS_BASE, %ecx
++ rdmsr
++ testl %edx, %edx
++ js 1f /* negative -> in kernel */
++ SWAPGS
++ xorl %ebx, %ebx
++1:
++#ifdef CONFIG_KAISER
++ /* Unconditionally use kernel CR3 for do_nmi() */
++ /* %rax is saved above, so OK to clobber here */
++ movq %cr3, %rax
++ pushq %rax
++#ifdef CONFIG_KAISER_REAL_SWITCH
++ andq $(~0x1000), %rax
++#endif
++ movq %rax, %cr3
++#endif
+
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ movq %rsp, %rdi
++ addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */
+ movq $-1, %rsi
+ call do_nmi
++ /*
++ * Unconditionally restore CR3. We might be returning to
++ * kernel code that needs user CR3, like just just before
++ * a sysret.
++ */
++#ifdef CONFIG_KAISER
++ popq %rax
++ mov %rax, %cr3
++#endif
+
+ testl %ebx, %ebx /* swapgs needed? */
+ jnz nmi_restore
+ nmi_swapgs:
+- SWITCH_USER_CR3_NO_STACK
++ /* We fixed up CR3 above, so no need to switch it here */
+ SWAPGS_UNSAFE_STACK
+ nmi_restore:
+ RESTORE_EXTRA_REGS
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -16,13 +16,17 @@
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
++#ifdef CONFIG_KAISER_REAL_SWITCH
+ andq $(~0x1000), \reg
++#endif
+ movq \reg, %cr3
+ .endm
+
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
++#ifdef CONFIG_KAISER_REAL_SWITCH
+ orq $(0x1000), \reg
++#endif
+ movq \reg, %cr3
+ .endm
+
+@@ -65,48 +69,53 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ .endm
+
+ #endif /* CONFIG_KAISER */
++
+ #else /* __ASSEMBLY__ */
+
+
+ #ifdef CONFIG_KAISER
+-// Upon kernel/user mode switch, it may happen that
+-// the address space has to be switched before the registers have been stored.
+-// To change the address space, another register is needed.
+-// A register therefore has to be stored/restored.
+-//
+-DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++/*
++ * Upon kernel/user mode switch, it may happen that the address
++ * space has to be switched before the registers have been
++ * stored. To change the address space, another register is
++ * needed. A register therefore has to be stored/restored.
++*/
+
+-#endif /* CONFIG_KAISER */
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+ /**
+- * shadowmem_add_mapping - map a virtual memory part to the shadow mapping
++ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+ * @size: the size of the range
+ * @flags: The mapping flags of the pages
+ *
+- * the mapping is done on a global scope, so no bigger synchronization has to be done.
+- * the pages have to be manually unmapped again when they are not needed any longer.
++ * The mapping is done on a global scope, so no bigger
++ * synchronization has to be done. the pages have to be
++ * manually unmapped again when they are not needed any longer.
+ */
+-extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
++extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+
+
+ /**
+- * shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
++ * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
+ * @addr: the start address of the range
+ * @size: the size of the range
+ */
+ extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+
+ /**
+- * shadowmem_initialize_mapping - Initalize the shadow mapping
++ * kaiser_initialize_mapping - Initalize the shadow mapping
+ *
+- * most parts of the shadow mapping can be mapped upon boot time.
+- * only the thread stacks have to be mapped on runtime.
+- * the mapped regions are not unmapped at all.
++ * Most parts of the shadow mapping can be mapped upon boot
++ * time. Only per-process things like the thread stacks
++ * or a new LDT have to be mapped at runtime. These boot-
++ * time mappings are permanent and nevertunmapped.
+ */
+ extern void kaiser_init(void);
+
+-#endif
++#endif /* CONFIG_KAISER */
++
++#endif /* __ASSEMBLY */
+
+
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -690,7 +690,17 @@ static inline pud_t *pud_offset(pgd_t *p
+
+ static inline int pgd_bad(pgd_t pgd)
+ {
+- return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
++ pgdval_t ignore_flags = _PAGE_USER;
++ /*
++ * We set NX on KAISER pgds that map userspace memory so
++ * that userspace can not meaningfully use the kernel
++ * page table by accident; it will fault on the first
++ * instruction it tries to run. See native_set_pgd().
++ */
++ if (IS_ENABLED(CONFIG_KAISER))
++ ignore_flags |= _PAGE_NX;
++
++ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+ }
+
+ static inline int pgd_none(pgd_t pgd)
+@@ -905,8 +915,10 @@ static inline void clone_pgd_range(pgd_t
+ {
+ memcpy(dst, src, count * sizeof(pgd_t));
+ #ifdef CONFIG_KAISER
+- // clone the shadow pgd part as well
+- memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
++ /* Clone the shadow pgd part as well */
++ memcpy(native_get_shadow_pgd(dst),
++ native_get_shadow_pgd(src),
++ count * sizeof(pgd_t));
+ #endif
+ }
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -107,26 +107,58 @@ static inline void native_pud_clear(pud_
+ }
+
+ #ifdef CONFIG_KAISER
+-static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++{
+ return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
+ }
+
+-static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++{
+ return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
+ }
++#else
++static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
++{
++ BUILD_BUG_ON(1);
++ return NULL;
++}
++static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
++{
++ return pgdp;
++}
+ #endif /* CONFIG_KAISER */
+
++/*
++ * Page table pages are page-aligned. The lower half of the top
++ * level is used for userspace and the top half for the kernel.
++ * This returns true for user pages that need to get copied into
++ * both the user and kernel copies of the page tables, and false
++ * for kernel pages that should only be in the kernel copy.
++ */
++static inline bool is_userspace_pgd(void *__ptr)
++{
++ unsigned long ptr = (unsigned long)__ptr;
++
++ return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
++}
++
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+ #ifdef CONFIG_KAISER
+- // We know that a pgd is page aligned.
+- // Therefore the lower indices have to be mapped to user space.
+- // These pages are mapped to the shadow mapping.
+- if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
++ pteval_t extra_kern_pgd_flags = 0;
++ /* Do we need to also populate the shadow pgd? */
++ if (is_userspace_pgd(pgdp)) {
+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++ /*
++ * Even if the entry is *mapping* userspace, ensure
++ * that userspace can not use it. This way, if we
++ * get out to userspace running on the kernel CR3,
++ * userspace will crash instead of running.
++ */
++ extra_kern_pgd_flags = _PAGE_NX;
+ }
+-
+- pgdp->pgd = pgd.pgd & ~_PAGE_USER;
++ pgdp->pgd = pgd.pgd;
++ pgdp->pgd |= extra_kern_pgd_flags;
+ #else /* CONFIG_KAISER */
+ *pgdp = pgd;
+ #endif
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -48,7 +48,7 @@
+ #ifdef CONFIG_KAISER
+ #define _PAGE_GLOBAL (_AT(pteval_t, 0))
+ #else
+-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+ #endif
+ #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
+@@ -123,11 +123,7 @@
+ #define _PAGE_DEVMAP (_AT(pteval_t, 0))
+ #endif
+
+-#ifdef CONFIG_KAISER
+-#define _PAGE_PROTNONE (_AT(pteval_t, 0))
+-#else
+ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+-#endif
+
+ #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_DIRTY)
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -127,11 +127,14 @@ void __init init_espfix_bsp(void)
+ /* Install the espfix pud into the kernel page directory */
+ pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
+-#ifdef CONFIG_KAISER
+- // add the esp stack pud to the shadow mapping here.
+- // This can be done directly, because the fixup stack has its own pud
+- set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
+-#endif
++ /*
++ * Just copy the top-level PGD that is mapping the espfix
++ * area to ensure it is mapped into the shadow user page
++ * tables.
++ */
++ if (IS_ENABLED(CONFIG_KAISER))
++ set_pgd(native_get_shadow_pgd(pgd_p),
++ __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
+
+ /* Randomize the locations */
+ init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -406,11 +406,24 @@ GLOBAL(early_recursion_flag)
+ GLOBAL(name)
+
+ #ifdef CONFIG_KAISER
++/*
++ * Each PGD needs to be 8k long and 8k aligned. We do not
++ * ever go out to userspace with these, so we do not
++ * strictly *need* the second page, but this allows us to
++ * have a single set_pgd() implementation that does not
++ * need to worry about whether it has 4k or 8k to work
++ * with.
++ *
++ * This ensures PGDs are 8k long:
++ */
++#define KAISER_USER_PGD_FILL 512
++/* This ensures they are 8k-aligned: */
+ #define NEXT_PGD_PAGE(name) \
+ .balign 2 * PAGE_SIZE; \
+ GLOBAL(name)
+ #else
+ #define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#define KAISER_USER_PGD_FILL 0
+ #endif
+
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+@@ -425,6 +438,7 @@ GLOBAL(name)
+ NEXT_PGD_PAGE(early_level4_pgt)
+ .fill 511,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++ .fill KAISER_USER_PGD_FILL,8,0
+
+ NEXT_PAGE(early_dynamic_pgts)
+ .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
+@@ -433,7 +447,8 @@ NEXT_PAGE(early_dynamic_pgts)
+
+ #ifndef CONFIG_XEN
+ NEXT_PGD_PAGE(init_level4_pgt)
+- .fill 2*512,8,0
++ .fill 512,8,0
++ .fill KAISER_USER_PGD_FILL,8,0
+ #else
+ NEXT_PGD_PAGE(init_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+@@ -442,6 +457,7 @@ NEXT_PGD_PAGE(init_level4_pgt)
+ .org init_level4_pgt + L4_START_KERNEL*8, 0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++ .fill KAISER_USER_PGD_FILL,8,0
+
+ NEXT_PAGE(level3_ident_pgt)
+ .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+@@ -452,6 +468,7 @@ NEXT_PAGE(level2_ident_pgt)
+ */
+ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+ #endif
++ .fill KAISER_USER_PGD_FILL,8,0
+
+ NEXT_PAGE(level3_kernel_pgt)
+ .fill L3_START_KERNEL,8,0
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -18,6 +18,7 @@
+ #include <linux/uaccess.h>
+
+ #include <asm/ldt.h>
++#include <asm/kaiser.h>
+ #include <asm/desc.h>
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+@@ -34,11 +35,21 @@ static void flush_ldt(void *current_mm)
+ set_ldt(pc->ldt->entries, pc->ldt->size);
+ }
+
++static void __free_ldt_struct(struct ldt_struct *ldt)
++{
++ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
++ vfree(ldt->entries);
++ else
++ free_page((unsigned long)ldt->entries);
++ kfree(ldt);
++}
++
+ /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
+ static struct ldt_struct *alloc_ldt_struct(int size)
+ {
+ struct ldt_struct *new_ldt;
+ int alloc_size;
++ int ret = 0;
+
+ if (size > LDT_ENTRIES)
+ return NULL;
+@@ -66,6 +77,14 @@ static struct ldt_struct *alloc_ldt_stru
+ return NULL;
+ }
+
++ // FIXME: make kaiser_add_mapping() return an error code
++ // when it fails
++ kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
++ __PAGE_KERNEL);
++ if (ret) {
++ __free_ldt_struct(new_ldt);
++ return NULL;
++ }
+ new_ldt->size = size;
+ return new_ldt;
+ }
+@@ -92,12 +111,10 @@ static void free_ldt_struct(struct ldt_s
+ if (likely(!ldt))
+ return;
+
++ kaiser_remove_mapping((unsigned long)ldt->entries,
++ ldt->size * LDT_ENTRY_SIZE);
+ paravirt_free_ldt(ldt->entries, ldt->size);
+- if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+- vfree(ldt->entries);
+- else
+- free_page((unsigned long)ldt->entries);
+- kfree(ldt);
++ __free_ldt_struct(ldt);
+ }
+
+ /*
+--- a/arch/x86/kernel/tracepoint.c
++++ b/arch/x86/kernel/tracepoint.c
+@@ -9,10 +9,12 @@
+ #include <linux/atomic.h>
+
+ atomic_t trace_idt_ctr = ATOMIC_INIT(0);
++__aligned(PAGE_SIZE)
+ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+ (unsigned long) trace_idt_table };
+
+ /* No need to be aligned, but done to keep all IDTs defined the same way. */
++__aligned(PAGE_SIZE)
+ gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+
+ static int trace_irq_vector_refcount;
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -1,160 +1,305 @@
+-
+-
++#include <linux/bug.h>
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
+ #include <linux/bug.h>
+ #include <linux/init.h>
++#include <linux/interrupt.h>
+ #include <linux/spinlock.h>
+ #include <linux/mm.h>
+-
+ #include <linux/uaccess.h>
++
++#include <asm/kaiser.h>
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+ #ifdef CONFIG_KAISER
+
+ __visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++/*
++ * At runtime, the only things we map are some things for CPU
++ * hotplug, and stacks for new processes. No two CPUs will ever
++ * be populating the same addresses, so we only need to ensure
++ * that we protect between two CPUs trying to allocate and
++ * populate the same page table page.
++ *
++ * Only take this lock when doing a set_p[4um]d(), but it is not
++ * needed for doing a set_pte(). We assume that only the *owner*
++ * of a given allocation will be doing this for _their_
++ * allocation.
++ *
++ * This ensures that once a system has been running for a while
++ * and there have been stacks all over and these page tables
++ * are fully populated, there will be no further acquisitions of
++ * this lock.
++ */
++static DEFINE_SPINLOCK(shadow_table_allocation_lock);
+
+-/**
+- * Get the real ppn from a address in kernel mapping.
+- * @param address The virtual adrress
+- * @return the physical address
++/*
++ * Returns -1 on error.
+ */
+-static inline unsigned long get_pa_from_mapping (unsigned long address)
++static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
+ {
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+- pgd = pgd_offset_k(address);
+- BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
++ pgd = pgd_offset_k(vaddr);
++ /*
++ * We made all the kernel PGDs present in kaiser_init().
++ * We expect them to stay that way.
++ */
++ BUG_ON(pgd_none(*pgd));
++ /*
++ * PGDs are either 512GB or 128TB on all x86_64
++ * configurations. We don't handle these.
++ */
++ BUG_ON(pgd_large(*pgd));
++
++ pud = pud_offset(pgd, vaddr);
++ if (pud_none(*pud)) {
++ WARN_ON_ONCE(1);
++ return -1;
++ }
+
+- pud = pud_offset(pgd, address);
+- BUG_ON(pud_none(*pud));
++ if (pud_large(*pud))
++ return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK);
+
+- if (pud_large(*pud)) {
+- return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
++ pmd = pmd_offset(pud, vaddr);
++ if (pmd_none(*pmd)) {
++ WARN_ON_ONCE(1);
++ return -1;
+ }
+
+- pmd = pmd_offset(pud, address);
+- BUG_ON(pmd_none(*pmd));
++ if (pmd_large(*pmd))
++ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK);
+
+- if (pmd_large(*pmd)) {
+- return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
++ pte = pte_offset_kernel(pmd, vaddr);
++ if (pte_none(*pte)) {
++ WARN_ON_ONCE(1);
++ return -1;
+ }
+
+- pte = pte_offset_kernel(pmd, address);
+- BUG_ON(pte_none(*pte));
+-
+- return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
++ return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+ }
+
+-void _kaiser_copy (unsigned long start_addr, unsigned long size,
+- unsigned long flags)
++/*
++ * This is a relatively normal page table walk, except that it
++ * also tries to allocate page tables pages along the way.
++ *
++ * Returns a pointer to a PTE on success, or NULL on failure.
++ */
++static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
+ {
+- pgd_t *pgd;
+- pud_t *pud;
+ pmd_t *pmd;
+- pte_t *pte;
+- unsigned long address;
+- unsigned long end_addr = start_addr + size;
+- unsigned long target_address;
++ pud_t *pud;
++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
++ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+- for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
+- address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
+- target_address = get_pa_from_mapping(address);
++ might_sleep();
++ if (is_atomic) {
++ gfp &= ~GFP_KERNEL;
++ gfp |= __GFP_HIGH | __GFP_ATOMIC;
++ }
+
+- pgd = native_get_shadow_pgd(pgd_offset_k(address));
++ if (pgd_none(*pgd)) {
++ WARN_ONCE(1, "All shadow pgds should have been populated");
++ return NULL;
++ }
++ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+- BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
+- BUG_ON(pgd_large(*pgd));
++ pud = pud_offset(pgd, address);
++ /* The shadow page tables do not use large mappings: */
++ if (pud_large(*pud)) {
++ WARN_ON(1);
++ return NULL;
++ }
++ if (pud_none(*pud)) {
++ unsigned long new_pmd_page = __get_free_page(gfp);
++ if (!new_pmd_page)
++ return NULL;
++ spin_lock(&shadow_table_allocation_lock);
++ if (pud_none(*pud))
++ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
++ else
++ free_page(new_pmd_page);
++ spin_unlock(&shadow_table_allocation_lock);
++ }
+
+- pud = pud_offset(pgd, address);
+- if (pud_none(*pud)) {
+- set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
+- }
+- BUG_ON(pud_large(*pud));
++ pmd = pmd_offset(pud, address);
++ /* The shadow page tables do not use large mappings: */
++ if (pmd_large(*pmd)) {
++ WARN_ON(1);
++ return NULL;
++ }
++ if (pmd_none(*pmd)) {
++ unsigned long new_pte_page = __get_free_page(gfp);
++ if (!new_pte_page)
++ return NULL;
++ spin_lock(&shadow_table_allocation_lock);
++ if (pmd_none(*pmd))
++ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
++ else
++ free_page(new_pte_page);
++ spin_unlock(&shadow_table_allocation_lock);
++ }
+
+- pmd = pmd_offset(pud, address);
+- if (pmd_none(*pmd)) {
+- set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
+- }
+- BUG_ON(pmd_large(*pmd));
++ return pte_offset_kernel(pmd, address);
++}
+
+- pte = pte_offset_kernel(pmd, address);
++int kaiser_add_user_map(const void *__start_addr, unsigned long size,
++ unsigned long flags)
++{
++ int ret = 0;
++ pte_t *pte;
++ unsigned long start_addr = (unsigned long )__start_addr;
++ unsigned long address = start_addr & PAGE_MASK;
++ unsigned long end_addr = PAGE_ALIGN(start_addr + size);
++ unsigned long target_address;
++
++ for (;address < end_addr; address += PAGE_SIZE) {
++ target_address = get_pa_from_mapping(address);
++ if (target_address == -1) {
++ ret = -EIO;
++ break;
++ }
++ pte = kaiser_pagetable_walk(address, false);
+ if (pte_none(*pte)) {
+ set_pte(pte, __pte(flags | target_address));
+ } else {
+- BUG_ON(__pa(pte_page(*pte)) != target_address);
++ pte_t tmp;
++ set_pte(&tmp, __pte(flags | target_address));
++ WARN_ON_ONCE(!pte_same(*pte, tmp));
+ }
+ }
++ return ret;
+ }
+
+-// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
+-static inline void __init _kaiser_init(void)
++static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags)
++{
++ unsigned long size = end - start;
++
++ return kaiser_add_user_map(start, size, flags);
++}
++
++/*
++ * Ensure that the top level of the (shadow) page tables are
++ * entirely populated. This ensures that all processes that get
++ * forked have the same entries. This way, we do not have to
++ * ever go set up new entries in older processes.
++ *
++ * Note: we never free these, so there are no updates to them
++ * after this.
++ */
++static void __init kaiser_init_all_pgds(void)
+ {
+ pgd_t *pgd;
+ int i = 0;
+
+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+- set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
++ pgd_t new_pgd;
++ pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
++ if (!pud) {
++ WARN_ON(1);
++ break;
++ }
++ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
++ /*
++ * Make sure not to stomp on some other pgd entry.
++ */
++ if (!pgd_none(pgd[i])) {
++ WARN_ON(1);
++ continue;
++ }
++ set_pgd(pgd + i, new_pgd);
+ }
+ }
+
++#define kaiser_add_user_map_early(start, size, flags) do { \
++ int __ret = kaiser_add_user_map(start, size, flags); \
++ WARN_ON(__ret); \
++} while (0)
++
++#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \
++ int __ret = kaiser_add_user_map_ptrs(start, end, flags); \
++ WARN_ON(__ret); \
++} while (0)
++
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+-spinlock_t shadow_table_lock;
++/*
++ * If anything in here fails, we will likely die on one of the
++ * first kernel->user transitions and init will die. But, we
++ * will have most of the kernel up by then and should be able to
++ * get a clean warning out of it. If we BUG_ON() here, we run
++ * the risk of being before we have good console output.
++ */
+ void __init kaiser_init(void)
+ {
+ int cpu;
+- spin_lock_init(&shadow_table_lock);
+-
+- spin_lock(&shadow_table_lock);
+
+- _kaiser_init();
++ kaiser_init_all_pgds();
+
+ for_each_possible_cpu(cpu) {
+- // map the per cpu user variables
+- _kaiser_copy(
+- (unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
+- (unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
+- __PAGE_KERNEL);
+- }
+-
+- // map the entry/exit text section, which is responsible to switch between user- and kernel mode
+- _kaiser_copy(
+- (unsigned long) __entry_text_start,
+- (unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
+- __PAGE_KERNEL_RX);
+-
+- // the fixed map address of the idt_table
+- _kaiser_copy(
+- (unsigned long) idt_descr.address,
+- sizeof(gate_desc) * NR_VECTORS,
+- __PAGE_KERNEL_RO);
++ void *percpu_vaddr = __per_cpu_user_mapped_start +
++ per_cpu_offset(cpu);
++ unsigned long percpu_sz = __per_cpu_user_mapped_end -
++ __per_cpu_user_mapped_start;
++ kaiser_add_user_map_early(percpu_vaddr, percpu_sz,
++ __PAGE_KERNEL);
++ }
+
+- spin_unlock(&shadow_table_lock);
++ /*
++ * Map the entry/exit text section, which is needed at
++ * switches from user to and from kernel.
++ */
++ kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end,
++ __PAGE_KERNEL_RX);
++
++#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
++ kaiser_add_user_map_ptrs_early(__irqentry_text_start,
++ __irqentry_text_end,
++ __PAGE_KERNEL_RX);
++#endif
++ kaiser_add_user_map_early((void *)idt_descr.address,
++ sizeof(gate_desc) * NR_VECTORS,
++ __PAGE_KERNEL_RO);
++#ifdef CONFIG_TRACING
++ kaiser_add_user_map_early(&trace_idt_descr,
++ sizeof(trace_idt_descr),
++ __PAGE_KERNEL);
++ kaiser_add_user_map_early(&trace_idt_table,
++ sizeof(gate_desc) * NR_VECTORS,
++ __PAGE_KERNEL);
++#endif
++ kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr),
++ __PAGE_KERNEL);
++ kaiser_add_user_map_early(&debug_idt_table,
++ sizeof(gate_desc) * NR_VECTORS,
++ __PAGE_KERNEL);
+ }
+
++extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end);
+ // add a mapping to the shadow-mapping, and synchronize the mappings
+-void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
+- spin_lock(&shadow_table_lock);
+- _kaiser_copy(addr, size, flags);
+- spin_unlock(&shadow_table_lock);
++ return kaiser_add_user_map((const void *)addr, size, flags);
+ }
+
+-extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
+ void kaiser_remove_mapping(unsigned long start, unsigned long size)
+ {
+- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
+- spin_lock(&shadow_table_lock);
+- do {
+- unmap_pud_range(pgd, start, start + size);
+- } while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
+- spin_unlock(&shadow_table_lock);
++ unsigned long end = start + size;
++ unsigned long addr;
++
++ for (addr = start; addr < end; addr += PGDIR_SIZE) {
++ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
++ /*
++ * unmap_p4d_range() handles > P4D_SIZE unmaps,
++ * so no need to trim 'end'.
++ */
++ unmap_pud_range_nofree(pgd, addr, end);
++ }
+ }
+ #endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock);
+ #define CPA_FLUSHTLB 1
+ #define CPA_ARRAY 2
+ #define CPA_PAGES_ARRAY 4
++#define CPA_FREE_PAGETABLES 8
+
+ #ifdef CONFIG_PROC_FS
+ static unsigned long direct_pages_count[PG_LEVEL_NUM];
+@@ -729,10 +730,13 @@ static int split_large_page(struct cpa_d
+ return 0;
+ }
+
+-static bool try_to_free_pte_page(pte_t *pte)
++static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte)
+ {
+ int i;
+
++ if (!(cpa->flags & CPA_FREE_PAGETABLES))
++ return false;
++
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ if (!pte_none(pte[i]))
+ return false;
+@@ -741,10 +745,13 @@ static bool try_to_free_pte_page(pte_t *
+ return true;
+ }
+
+-static bool try_to_free_pmd_page(pmd_t *pmd)
++static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd)
+ {
+ int i;
+
++ if (!(cpa->flags & CPA_FREE_PAGETABLES))
++ return false;
++
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ if (!pmd_none(pmd[i]))
+ return false;
+@@ -753,7 +760,9 @@ static bool try_to_free_pmd_page(pmd_t *
+ return true;
+ }
+
+-static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
++static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd,
++ unsigned long start,
++ unsigned long end)
+ {
+ pte_t *pte = pte_offset_kernel(pmd, start);
+
+@@ -764,22 +773,23 @@ static bool unmap_pte_range(pmd_t *pmd,
+ pte++;
+ }
+
+- if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
++ if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) {
+ pmd_clear(pmd);
+ return true;
+ }
+ return false;
+ }
+
+-static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
++static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd,
+ unsigned long start, unsigned long end)
+ {
+- if (unmap_pte_range(pmd, start, end))
+- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
++ if (unmap_pte_range(cpa, pmd, start, end))
++ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
+ pud_clear(pud);
+ }
+
+-static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
++static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud,
++ unsigned long start, unsigned long end)
+ {
+ pmd_t *pmd = pmd_offset(pud, start);
+
+@@ -790,7 +800,7 @@ static void unmap_pmd_range(pud_t *pud,
+ unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
+ unsigned long pre_end = min_t(unsigned long, end, next_page);
+
+- __unmap_pmd_range(pud, pmd, start, pre_end);
++ __unmap_pmd_range(cpa, pud, pmd, start, pre_end);
+
+ start = pre_end;
+ pmd++;
+@@ -803,7 +813,8 @@ static void unmap_pmd_range(pud_t *pud,
+ if (pmd_large(*pmd))
+ pmd_clear(pmd);
+ else
+- __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
++ __unmap_pmd_range(cpa, pud, pmd,
++ start, start + PMD_SIZE);
+
+ start += PMD_SIZE;
+ pmd++;
+@@ -813,17 +824,19 @@ static void unmap_pmd_range(pud_t *pud,
+ * 4K leftovers?
+ */
+ if (start < end)
+- return __unmap_pmd_range(pud, pmd, start, end);
++ return __unmap_pmd_range(cpa, pud, pmd, start, end);
+
+ /*
+ * Try again to free the PMD page if haven't succeeded above.
+ */
+ if (!pud_none(*pud))
+- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
++ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
+ pud_clear(pud);
+ }
+
+-void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd,
++ unsigned long start,
++ unsigned long end)
+ {
+ pud_t *pud = pud_offset(pgd, start);
+
+@@ -834,7 +847,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
+ unsigned long pre_end = min_t(unsigned long, end, next_page);
+
+- unmap_pmd_range(pud, start, pre_end);
++ unmap_pmd_range(cpa, pud, start, pre_end);
+
+ start = pre_end;
+ pud++;
+@@ -848,7 +861,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ if (pud_large(*pud))
+ pud_clear(pud);
+ else
+- unmap_pmd_range(pud, start, start + PUD_SIZE);
++ unmap_pmd_range(cpa, pud, start, start + PUD_SIZE);
+
+ start += PUD_SIZE;
+ pud++;
+@@ -858,7 +871,7 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ * 2M leftovers?
+ */
+ if (start < end)
+- unmap_pmd_range(pud, start, end);
++ unmap_pmd_range(cpa, pud, start, end);
+
+ /*
+ * No need to try to free the PUD page because we'll free it in
+@@ -866,6 +879,24 @@ void unmap_pud_range(pgd_t *pgd, unsigne
+ */
+ }
+
++static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++{
++ struct cpa_data cpa = {
++ .flags = CPA_FREE_PAGETABLES,
++ };
++
++ __unmap_pud_range(&cpa, pgd, start, end);
++}
++
++void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end)
++{
++ struct cpa_data cpa = {
++ .flags = 0,
++ };
++
++ __unmap_pud_range(&cpa, pgd, start, end);
++}
++
+ static int alloc_pte_page(pmd_t *pmd)
+ {
+ pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -344,40 +344,26 @@ static inline void _pgd_free(pgd_t *pgd)
+ kmem_cache_free(pgd_cache, pgd);
+ }
+ #else
+-static inline pgd_t *_pgd_alloc(void)
+-{
+-#ifdef CONFIG_KAISER
+- // Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
+- // block. Therefore, we have to allocate at least 3 pages. However, the
+- // __get_free_pages returns us 4 pages. Hence, we store the base pointer at
+- // the beginning of the page of our 8kb-aligned memory block in order to
+- // correctly free it afterwars.
+
+- unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
+-
+- if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
+- {
+- *((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
+- return (pgd_t *) pages;
+- }
+- else
+- {
+- *((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
+- return (pgd_t *) (pages + PAGE_SIZE);
+- }
++#ifdef CONFIG_KAISER
++/*
++ * Instead of one pmd, we aquire two pmds. Being order-1, it is
++ * both 8k in size and 8k-aligned. That lets us just flip bit 12
++ * in a pointer to swap between the two 4k halves.
++ */
++#define PGD_ALLOCATION_ORDER 1
+ #else
+- return (pgd_t *)__get_free_page(PGALLOC_GFP);
++#define PGD_ALLOCATION_ORDER 0
+ #endif
++
++static inline pgd_t *_pgd_alloc(void)
++{
++ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+ }
+
+ static inline void _pgd_free(pgd_t *pgd)
+ {
+-#ifdef CONFIG_KAISER
+- unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
+- free_pages(pages, get_order(4*PAGE_SIZE));
+-#else
+- free_page((unsigned long)pgd);
+-#endif
++ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
+ }
+ #endif /* CONFIG_X86_PAE */
+
+--- /dev/null
++++ b/include/linux/kaiser.h
+@@ -0,0 +1,26 @@
++#ifndef _INCLUDE_KAISER_H
++#define _INCLUDE_KAISER_H
++
++#ifdef CONFIG_KAISER
++#include <asm/kaiser.h>
++#else
++
++/*
++ * These stubs are used whenever CONFIG_KAISER is off, which
++ * includes architectures that support KAISER, but have it
++ * disabled.
++ */
++
++static inline void kaiser_init(void)
++{
++}
++static inline void kaiser_remove_mapping(unsigned long start, unsigned long size)
++{
++}
++static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++{
++ return 0;
++}
++
++#endif /* !CONFIG_KAISER */
++#endif /* _INCLUDE_KAISER_H */
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -58,6 +58,7 @@
+ #include <linux/tsacct_kern.h>
+ #include <linux/cn_proc.h>
+ #include <linux/freezer.h>
++#include <linux/kaiser.h>
+ #include <linux/delayacct.h>
+ #include <linux/taskstats_kern.h>
+ #include <linux/random.h>
+@@ -472,7 +473,6 @@ void set_task_stack_end_magic(struct tas
+ *stackend = STACK_END_MAGIC; /* for overflow detection */
+ }
+
+-extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ struct task_struct *tsk;
+@@ -500,9 +500,10 @@ static struct task_struct *dup_task_stru
+ * functions again.
+ */
+ tsk->stack = stack;
+-#ifdef CONFIG_KAISER
+- kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
+-#endif
++
++ err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++ if (err)
++ goto free_stack;
+ #ifdef CONFIG_VMAP_STACK
+ tsk->stack_vm_area = stack_vm_area;
+ #endif
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -32,12 +32,17 @@ config SECURITY
+ If you are unsure how to answer this question, answer N.
+ config KAISER
+ bool "Remove the kernel mapping in user mode"
++ default y
+ depends on X86_64
+ depends on !PARAVIRT
+ help
+ This enforces a strict kernel and user space isolation in order to close
+ hardware side channels on kernel address information.
+
++config KAISER_REAL_SWITCH
++ bool "KAISER: actually switch page tables"
++ default y
++
+ config SECURITYFS
+ bool "Enable the securityfs filesystem"
+ help
diff --git a/queue/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch b/queue/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
new file mode 100644
index 0000000..447c040
--- /dev/null
+++ b/queue/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
@@ -0,0 +1,66 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 9 Sep 2017 17:31:18 -0700
+Subject: kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET
+
+From: Hugh Dickins <hughd@google.com>
+
+
+There's a 0x1000 in various places, which looks better with a name.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 4 ++--
+ arch/x86/include/asm/kaiser.h | 7 +++++--
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1318,7 +1318,7 @@ ENTRY(nmi)
+ movq %cr3, %rax
+ pushq %rax
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+- andq $(~0x1000), %rax
++ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+ #endif
+ movq %rax, %cr3
+ #endif
+@@ -1561,7 +1561,7 @@ end_repeat_nmi:
+ movq %cr3, %rax
+ pushq %rax
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+- andq $(~0x1000), %rax
++ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+ #endif
+ movq %rax, %cr3
+ #endif
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -13,13 +13,16 @@
+ * A minimalistic kernel mapping holds the parts needed to be mapped in user
+ * mode, such as the entry/exit functions of the user space, or the stacks.
+ */
++
++#define KAISER_SHADOW_PGD_OFFSET 0x1000
++
+ #ifdef __ASSEMBLY__
+ #ifdef CONFIG_KAISER
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+-andq $(~0x1000), \reg
++andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
+ #endif
+ movq \reg, %cr3
+ .endm
+@@ -27,7 +30,7 @@ movq \reg, %cr3
+ .macro _SWITCH_TO_USER_CR3 reg
+ movq %cr3, \reg
+ #ifdef CONFIG_KAISER_REAL_SWITCH
+-orq $(0x1000), \reg
++orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+ #endif
+ movq \reg, %cr3
+ .endm
diff --git a/queue/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch b/queue/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
new file mode 100644
index 0000000..f0ec889
--- /dev/null
+++ b/queue/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
@@ -0,0 +1,166 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 26 Sep 2017 18:43:07 -0700
+Subject: kaiser: paranoid_entry pass cr3 need to paranoid_exit
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Neel Natu points out that paranoid_entry() was wrong to assume that
+an entry that did not need swapgs would not need SWITCH_KERNEL_CR3:
+paranoid_entry (used for debug breakpoint, int3, double fault or MCE;
+though I think it's only the MCE case that is cause for concern here)
+can break in at an awkward time, between cr3 switch and swapgs, but
+its handling always needs kernel gs and kernel cr3.
+
+Easy to fix in itself, but paranoid_entry() also needs to convey to
+paranoid_exit() (and my reading of macro idtentry says paranoid_entry
+and paranoid_exit are always paired) how to restore the prior state.
+The swapgs state is already conveyed by %ebx (0 or 1), so extend that
+also to convey when SWITCH_USER_CR3 will be needed (2 or 3).
+
+(Yes, I'd much prefer that 0 meant no swapgs, whereas it's the other
+way round: and a convention shared with error_entry() and error_exit(),
+which I don't want to touch. Perhaps I should have inverted the bit
+for switch cr3 too, but did not.)
+
+paranoid_exit() would be straightforward, except for TRACE_IRQS: it
+did TRACE_IRQS_IRETQ when doing swapgs, but TRACE_IRQS_IRETQ_DEBUG
+when not: which is it supposed to use when SWITCH_USER_CR3 is split
+apart from that? As best as I can determine, commit 5963e317b1e9
+("ftrace/x86: Do not change stacks in DEBUG when calling lockdep")
+missed the swapgs case, and should have used TRACE_IRQS_IRETQ_DEBUG
+there too (the discrepancy has nothing to do with the liberal use
+of _NO_STACK and _UNSAFE_STACK hereabouts: TRACE_IRQS_OFF_DEBUG has
+just been used in all cases); discrepancy lovingly preserved across
+several paranoid_exit() cleanups, but I'm now removing it.
+
+Neel further indicates that to use SWITCH_USER_CR3_NO_STACK there in
+paranoid_exit() is now not only unnecessary but unsafe: might corrupt
+syscall entry's unsafe_stack_register_backup of %rax. Just use
+SWITCH_USER_CR3: and delete SWITCH_USER_CR3_NO_STACK altogether,
+before we make the mistake of using it again.
+
+hughd adds: this commit fixes an issue in the Kaiser-without-PCIDs
+part of the series, and ought to be moved earlier, if you decided
+to make a release of Kaiser-without-PCIDs.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 46 ++++++++++++++++++++++++++++++---------
+ arch/x86/entry/entry_64_compat.S | 2 -
+ arch/x86/include/asm/kaiser.h | 8 ------
+ 3 files changed, 37 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1053,7 +1053,11 @@ idtentry machine_check has_error_cod
+ /*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
++ *
++ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
++ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
++ * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
++ * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
+ */
+ ENTRY(paranoid_entry)
+ cld
+@@ -1065,9 +1069,26 @@ ENTRY(paranoid_entry)
+ testl %edx, %edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+- SWITCH_KERNEL_CR3
+ xorl %ebx, %ebx
+-1: ret
++1:
++#ifdef CONFIG_KAISER
++ /*
++ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
++ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
++ * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
++ * unconditionally, but we need to find out whether the reverse
++ * should be done on return (conveyed to paranoid_exit in %ebx).
++ */
++ movq %cr3, %rax
++ testl $KAISER_SHADOW_PGD_OFFSET, %eax
++ jz 2f
++ orl $2, %ebx
++ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++ orq x86_cr3_pcid_noflush, %rax
++ movq %rax, %cr3
++2:
++#endif
++ ret
+ END(paranoid_entry)
+
+ /*
+@@ -1080,20 +1101,25 @@ END(paranoid_entry)
+ * be complicated. Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ *
+- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
++ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
++ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3
++ * ebx=2: needs both swapgs and SWITCH_USER_CR3
++ * ebx=3: needs SWITCH_USER_CR3 but not swapgs
+ */
+ ENTRY(paranoid_exit)
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF_DEBUG
+- testl %ebx, %ebx /* swapgs needed? */
++ TRACE_IRQS_IRETQ_DEBUG
++#ifdef CONFIG_KAISER
++ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
++ jz paranoid_exit_no_switch
++ SWITCH_USER_CR3
++paranoid_exit_no_switch:
++#endif
++ testl $1, %ebx /* swapgs needed? */
+ jnz paranoid_exit_no_swapgs
+- TRACE_IRQS_IRETQ
+- SWITCH_USER_CR3_NO_STACK
+ SWAPGS_UNSAFE_STACK
+- jmp paranoid_exit_restore
+ paranoid_exit_no_swapgs:
+- TRACE_IRQS_IRETQ_DEBUG
+-paranoid_exit_restore:
+ RESTORE_EXTRA_REGS
+ RESTORE_C_REGS
+ REMOVE_PT_GPREGS_FROM_STACK 8
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -343,7 +343,7 @@ ENTRY(entry_INT80_compat)
+
+ /* Go back to user mode. */
+ TRACE_IRQS_ON
+- SWITCH_USER_CR3_NO_STACK
++ SWITCH_USER_CR3
+ SWAPGS
+ jmp restore_regs_and_iret
+ END(entry_INT80_compat)
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -63,20 +63,12 @@ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+
+-.macro SWITCH_USER_CR3_NO_STACK
+-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+-_SWITCH_TO_USER_CR3 %rax %al
+-movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+-.endm
+-
+ #else /* CONFIG_KAISER */
+
+ .macro SWITCH_KERNEL_CR3 reg
+ .endm
+ .macro SWITCH_USER_CR3 reg regb
+ .endm
+-.macro SWITCH_USER_CR3_NO_STACK
+-.endm
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+
diff --git a/queue/kaiser-pcid-0-for-kernel-and-128-for-user.patch b/queue/kaiser-pcid-0-for-kernel-and-128-for-user.patch
new file mode 100644
index 0000000..7e8f7e2
--- /dev/null
+++ b/queue/kaiser-pcid-0-for-kernel-and-128-for-user.patch
@@ -0,0 +1,129 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 8 Sep 2017 19:26:30 -0700
+Subject: kaiser: PCID 0 for kernel and 128 for user
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Why was 4 chosen for kernel PCID and 6 for user PCID?
+No good reason in a backport where PCIDs are only used for Kaiser.
+
+If we continue with those, then we shall need to add Andy Lutomirski's
+4.13 commit 6c690ee1039b ("x86/mm: Split read_cr3() into read_cr3_pa()
+and __read_cr3()"), which deals with the problem of read_cr3() callers
+finding stray bits in the cr3 that they expected to be page-aligned;
+and for hibernation, his 4.14 commit f34902c5c6c0 ("x86/hibernate/64:
+Mask off CR3's PCID bits in the saved CR3").
+
+But if 0 is used for kernel PCID, then there's no need to add in those
+commits - whenever the kernel looks, it sees 0 in the lower bits; and
+0 for kernel seems an obvious choice.
+
+And I naughtily propose 128 for user PCID. Because there's a place
+in _SWITCH_TO_USER_CR3 where it takes note of the need for TLB FLUSH,
+but needs to reset that to NOFLUSH for the next occasion. Currently
+it does so with a "movb $(0x80)" into the high byte of the per-cpu
+quadword, but that will cause a machine without PCID support to crash.
+Now, if %al just happened to have 0x80 in it at that point, on a
+machine with PCID support, but 0 on a machine without PCID support...
+
+(That will go badly wrong once the pgd can be at a physical address
+above 2^56, but even with 5-level paging, physical goes up to 2^52.)
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h | 19 ++++++++++++-------
+ arch/x86/include/asm/pgtable_types.h | 7 ++++---
+ arch/x86/mm/tlb.c | 3 +++
+ 3 files changed, 19 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -29,14 +29,19 @@ orq X86_CR3_PCID_KERN_VAR, \reg
+ movq \reg, %cr3
+ .endm
+
+-.macro _SWITCH_TO_USER_CR3 reg
++.macro _SWITCH_TO_USER_CR3 reg regb
++/*
++ * regb must be the low byte portion of reg: because we have arranged
++ * for the low byte of the user PCID to serve as the high byte of NOFLUSH
++ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are
++ * not enabled): so that the one register can update both memory and cr3.
++ */
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+ orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
+ js 9f
+-// FLUSH this time, reset to NOFLUSH for next time
+-// But if nopcid? Consider using 0x80 for user pcid?
+-movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
++movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
+ 9:
+ movq \reg, %cr3
+ .endm
+@@ -49,7 +54,7 @@ popq %rax
+
+ .macro SWITCH_USER_CR3
+ pushq %rax
+-_SWITCH_TO_USER_CR3 %rax
++_SWITCH_TO_USER_CR3 %rax %al
+ popq %rax
+ .endm
+
+@@ -61,7 +66,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+
+ .macro SWITCH_USER_CR3_NO_STACK
+ movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+-_SWITCH_TO_USER_CR3 %rax
++_SWITCH_TO_USER_CR3 %rax %al
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+
+@@ -69,7 +74,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+
+ .macro SWITCH_KERNEL_CR3 reg
+ .endm
+-.macro SWITCH_USER_CR3 reg
++.macro SWITCH_USER_CR3 reg regb
+ .endm
+ .macro SWITCH_USER_CR3_NO_STACK
+ .endm
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -146,16 +146,17 @@
+
+ /* Mask for all the PCID-related bits in CR3: */
+ #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
++#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
++
+ #if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
+-#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL))
+-#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL))
++/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
++#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL))
+
+ #define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
+ #define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
+ #define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
+ #define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
+ #else
+-#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
+ #define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
+ /*
+ * PCIDs are unsupported on 32-bit and none of these bits can be
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -50,6 +50,9 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
++ *
++ * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
++ * but keep that line in there in case something changes.
+ */
+ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ kaiser_flush_tlb_on_return_to_user();
diff --git a/queue/kaiser-set-_page_nx-only-if-supported.patch b/queue/kaiser-set-_page_nx-only-if-supported.patch
new file mode 100644
index 0000000..0e59d80
--- /dev/null
+++ b/queue/kaiser-set-_page_nx-only-if-supported.patch
@@ -0,0 +1,118 @@
+From: Guenter Roeck <groeck@chromium.org>
+Date: Thu, 4 Jan 2018 13:41:55 -0800
+Subject: kaiser: Set _PAGE_NX only if supported
+
+From: Guenter Roeck <groeck@chromium.org>
+
+This resolves a crash if loaded under qemu + haxm under windows.
+See https://www.spinics.net/lists/kernel/msg2689835.html for details.
+Here is a boot log (the log is from chromeos-4.4, but Tao Wu says that
+the same log is also seen with vanilla v4.4.110-rc1).
+
+[ 0.712750] Freeing unused kernel memory: 552K
+[ 0.721821] init: Corrupted page table at address 57b029b332e0
+[ 0.722761] PGD 80000000bb238067 PUD bc36a067 PMD bc369067 PTE 45d2067
+[ 0.722761] Bad pagetable: 000b [#1] PREEMPT SMP
+[ 0.722761] Modules linked in:
+[ 0.722761] CPU: 1 PID: 1 Comm: init Not tainted 4.4.96 #31
+[ 0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
+rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014
+[ 0.722761] task: ffff8800bc290000 ti: ffff8800bc28c000 task.ti: ffff8800bc28c000
+[ 0.722761] RIP: 0010:[<ffffffff83f4129e>] [<ffffffff83f4129e>] __clear_user+0x42/0x67
+[ 0.722761] RSP: 0000:ffff8800bc28fcf8 EFLAGS: 00010202
+[ 0.722761] RAX: 0000000000000000 RBX: 00000000000001a4 RCX: 00000000000001a4
+[ 0.722761] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000057b029b332e0
+[ 0.722761] RBP: ffff8800bc28fd08 R08: ffff8800bc290000 R09: ffff8800bb2f4000
+[ 0.722761] R10: ffff8800bc290000 R11: ffff8800bb2f4000 R12: 000057b029b332e0
+[ 0.722761] R13: 0000000000000000 R14: 000057b029b33340 R15: ffff8800bb1e2a00
+[ 0.722761] FS: 0000000000000000(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000
+[ 0.722761] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+[ 0.722761] CR2: 000057b029b332e0 CR3: 00000000bb2f8000 CR4: 00000000000006e0
+[ 0.722761] Stack:
+[ 0.722761] 000057b029b332e0 ffff8800bb95fa80 ffff8800bc28fd18 ffffffff83f4120c
+[ 0.722761] ffff8800bc28fe18 ffffffff83e9e7a1 ffff8800bc28fd68 0000000000000000
+[ 0.722761] ffff8800bc290000 ffff8800bc290000 ffff8800bc290000 ffff8800bc290000
+[ 0.722761] Call Trace:
+[ 0.722761] [<ffffffff83f4120c>] clear_user+0x2e/0x30
+[ 0.722761] [<ffffffff83e9e7a1>] load_elf_binary+0xa7f/0x18f7
+[ 0.722761] [<ffffffff83de2088>] search_binary_handler+0x86/0x19c
+[ 0.722761] [<ffffffff83de389e>] do_execveat_common.isra.26+0x909/0xf98
+[ 0.722761] [<ffffffff844febe0>] ? rest_init+0x87/0x87
+[ 0.722761] [<ffffffff83de40be>] do_execve+0x23/0x25
+[ 0.722761] [<ffffffff83c002e3>] run_init_process+0x2b/0x2d
+[ 0.722761] [<ffffffff844fec4d>] kernel_init+0x6d/0xda
+[ 0.722761] [<ffffffff84505b2f>] ret_from_fork+0x3f/0x70
+[ 0.722761] [<ffffffff844febe0>] ? rest_init+0x87/0x87
+[ 0.722761] Code: 86 84 be 12 00 00 00 e8 87 0d e8 ff 66 66 90 48 89 d8 48 c1
+eb 03 4c 89 e7 83 e0 07 48 89 d9 be 08 00 00 00 31 d2 48 85 c9 74 0a <48> 89 17
+48 01 f7 ff c9 75 f6 48 89 c1 85 c9 74 09 88 17 48 ff
+[ 0.722761] RIP [<ffffffff83f4129e>] __clear_user+0x42/0x67
+[ 0.722761] RSP <ffff8800bc28fcf8>
+[ 0.722761] ---[ end trace def703879b4ff090 ]---
+[ 0.722761] BUG: sleeping function called from invalid context at /mnt/host/source/src/third_party/kernel/v4.4/kernel/locking/rwsem.c:21
+[ 0.722761] in_atomic(): 0, irqs_disabled(): 1, pid: 1, name: init
+[ 0.722761] CPU: 1 PID: 1 Comm: init Tainted: G D 4.4.96 #31
+[ 0.722761] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014
+[ 0.722761] 0000000000000086 dcb5d76098c89836 ffff8800bc28fa30 ffffffff83f34004
+[ 0.722761] ffffffff84839dc2 0000000000000015 ffff8800bc28fa40 ffffffff83d57dc9
+[ 0.722761] ffff8800bc28fa68 ffffffff83d57e6a ffffffff84a53640 0000000000000000
+[ 0.722761] Call Trace:
+[ 0.722761] [<ffffffff83f34004>] dump_stack+0x4d/0x63
+[ 0.722761] [<ffffffff83d57dc9>] ___might_sleep+0x13a/0x13c
+[ 0.722761] [<ffffffff83d57e6a>] __might_sleep+0x9f/0xa6
+[ 0.722761] [<ffffffff84502788>] down_read+0x20/0x31
+[ 0.722761] [<ffffffff83cc5d9b>] __blocking_notifier_call_chain+0x35/0x63
+[ 0.722761] [<ffffffff83cc5ddd>] blocking_notifier_call_chain+0x14/0x16
+[ 0.800374] usb 1-1: new full-speed USB device number 2 using uhci_hcd
+[ 0.722761] [<ffffffff83cefe97>] profile_task_exit+0x1a/0x1c
+[ 0.802309] [<ffffffff83cac84e>] do_exit+0x39/0xe7f
+[ 0.802309] [<ffffffff83ce5938>] ? vprintk_default+0x1d/0x1f
+[ 0.802309] [<ffffffff83d7bb95>] ? printk+0x57/0x73
+[ 0.802309] [<ffffffff83c46e25>] oops_end+0x80/0x85
+[ 0.802309] [<ffffffff83c7b747>] pgtable_bad+0x8a/0x95
+[ 0.802309] [<ffffffff83ca7f4a>] __do_page_fault+0x8c/0x352
+[ 0.802309] [<ffffffff83eefba5>] ? file_has_perm+0xc4/0xe5
+[ 0.802309] [<ffffffff83ca821c>] do_page_fault+0xc/0xe
+[ 0.802309] [<ffffffff84507682>] page_fault+0x22/0x30
+[ 0.802309] [<ffffffff83f4129e>] ? __clear_user+0x42/0x67
+[ 0.802309] [<ffffffff83f4127f>] ? __clear_user+0x23/0x67
+[ 0.802309] [<ffffffff83f4120c>] clear_user+0x2e/0x30
+[ 0.802309] [<ffffffff83e9e7a1>] load_elf_binary+0xa7f/0x18f7
+[ 0.802309] [<ffffffff83de2088>] search_binary_handler+0x86/0x19c
+[ 0.802309] [<ffffffff83de389e>] do_execveat_common.isra.26+0x909/0xf98
+[ 0.802309] [<ffffffff844febe0>] ? rest_init+0x87/0x87
+[ 0.802309] [<ffffffff83de40be>] do_execve+0x23/0x25
+[ 0.802309] [<ffffffff83c002e3>] run_init_process+0x2b/0x2d
+[ 0.802309] [<ffffffff844fec4d>] kernel_init+0x6d/0xda
+[ 0.802309] [<ffffffff84505b2f>] ret_from_fork+0x3f/0x70
+[ 0.802309] [<ffffffff844febe0>] ? rest_init+0x87/0x87
+[ 0.830559] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009
+[ 0.830559]
+[ 0.831305] Kernel Offset: 0x2c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
+[ 0.831305] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009
+
+The crash part of this problem may be solved with the following patch
+(thanks to Hugh for the hint). There is still another problem, though -
+with this patch applied, the qemu session aborts with "VCPU Shutdown
+request", whatever that means.
+
+Cc: lepton <ytht.net@gmail.com>
+Signed-off-by: Guenter Roeck <groeck@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/kaiser.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -413,7 +413,8 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+ * get out to userspace running on the kernel CR3,
+ * userspace will crash instead of running.
+ */
+- pgd.pgd |= _PAGE_NX;
++ if (__supported_pte_mask & _PAGE_NX)
++ pgd.pgd |= _PAGE_NX;
+ }
+ } else if (!pgd.pgd) {
+ /*
diff --git a/queue/kaiser-stack-map-page_size-at-thread_size-page_size.patch b/queue/kaiser-stack-map-page_size-at-thread_size-page_size.patch
new file mode 100644
index 0000000..6ea3930
--- /dev/null
+++ b/queue/kaiser-stack-map-page_size-at-thread_size-page_size.patch
@@ -0,0 +1,139 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 18:57:03 -0700
+Subject: kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Kaiser only needs to map one page of the stack; and
+kernel/fork.c did not build on powerpc (no __PAGE_KERNEL).
+It's all cleaner if linux/kaiser.h provides kaiser_map_thread_stack()
+and kaiser_unmap_thread_stack() wrappers around asm/kaiser.h's
+kaiser_add_mapping() and kaiser_remove_mapping(). And use
+linux/kaiser.h in init/main.c to avoid the #ifdefs there.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kaiser.h | 40 +++++++++++++++++++++++++++++++++-------
+ init/main.c | 6 +-----
+ kernel/fork.c | 7 ++-----
+ 3 files changed, 36 insertions(+), 17 deletions(-)
+
+--- a/include/linux/kaiser.h
++++ b/include/linux/kaiser.h
+@@ -1,26 +1,52 @@
+-#ifndef _INCLUDE_KAISER_H
+-#define _INCLUDE_KAISER_H
++#ifndef _LINUX_KAISER_H
++#define _LINUX_KAISER_H
+
+ #ifdef CONFIG_KAISER
+ #include <asm/kaiser.h>
++
++static inline int kaiser_map_thread_stack(void *stack)
++{
++ /*
++ * Map that page of kernel stack on which we enter from user context.
++ */
++ return kaiser_add_mapping((unsigned long)stack +
++ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL);
++}
++
++static inline void kaiser_unmap_thread_stack(void *stack)
++{
++ /*
++ * Note: may be called even when kaiser_map_thread_stack() failed.
++ */
++ kaiser_remove_mapping((unsigned long)stack +
++ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE);
++}
+ #else
+
+ /*
+ * These stubs are used whenever CONFIG_KAISER is off, which
+- * includes architectures that support KAISER, but have it
+- * disabled.
++ * includes architectures that support KAISER, but have it disabled.
+ */
+
+ static inline void kaiser_init(void)
+ {
+ }
+-static inline void kaiser_remove_mapping(unsigned long start, unsigned long size)
++static inline int kaiser_add_mapping(unsigned long addr,
++ unsigned long size, unsigned long flags)
++{
++ return 0;
++}
++static inline void kaiser_remove_mapping(unsigned long start,
++ unsigned long size)
+ {
+ }
+-static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++static inline int kaiser_map_thread_stack(void *stack)
+ {
+ return 0;
+ }
++static inline void kaiser_unmap_thread_stack(void *stack)
++{
++}
+
+ #endif /* !CONFIG_KAISER */
+-#endif /* _INCLUDE_KAISER_H */
++#endif /* _LINUX_KAISER_H */
+--- a/init/main.c
++++ b/init/main.c
+@@ -80,15 +80,13 @@
+ #include <linux/integrity.h>
+ #include <linux/proc_ns.h>
+ #include <linux/io.h>
++#include <linux/kaiser.h>
+
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
+-#ifdef CONFIG_KAISER
+-#include <asm/kaiser.h>
+-#endif
+
+ static int kernel_init(void *);
+
+@@ -476,9 +474,7 @@ static void __init mm_init(void)
+ pgtable_init();
+ vmalloc_init();
+ ioremap_huge_init();
+-#ifdef CONFIG_KAISER
+ kaiser_init();
+-#endif
+ }
+
+ asmlinkage __visible void __init start_kernel(void)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -212,12 +212,9 @@ static unsigned long *alloc_thread_stack
+ #endif
+ }
+
+-extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
+ static inline void free_thread_stack(struct task_struct *tsk)
+ {
+-#ifdef CONFIG_KAISER
+- kaiser_remove_mapping((unsigned long)tsk->stack, THREAD_SIZE);
+-#endif
++ kaiser_unmap_thread_stack(tsk->stack);
+ #ifdef CONFIG_VMAP_STACK
+ if (task_stack_vm_area(tsk)) {
+ unsigned long flags;
+@@ -501,7 +498,7 @@ static struct task_struct *dup_task_stru
+ */
+ tsk->stack = stack;
+
+- err= kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
++ err= kaiser_map_thread_stack(tsk->stack);
+ if (err)
+ goto free_stack;
+ #ifdef CONFIG_VMAP_STACK
diff --git a/queue/kaiser-tidied-up-asm-kaiser.h-somewhat.patch b/queue/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
new file mode 100644
index 0000000..2007d66
--- /dev/null
+++ b/queue/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
@@ -0,0 +1,105 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:18:07 -0700
+Subject: kaiser: tidied up asm/kaiser.h somewhat
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Mainly deleting a surfeit of blank lines, and reflowing header comment.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h | 32 +++++++++++++-------------------
+ 1 file changed, 13 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -1,15 +1,17 @@
+ #ifndef _ASM_X86_KAISER_H
+ #define _ASM_X86_KAISER_H
+-
+-/* This file includes the definitions for the KAISER feature.
+- * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
+- * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
+- * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
+- * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
+- * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
++/*
++ * This file includes the definitions for the KAISER feature.
++ * KAISER is a counter measure against x86_64 side channel attacks on
++ * the kernel virtual memory. It has a shadow pgd for every process: the
++ * shadow pgd has a minimalistic kernel-set mapped, but includes the whole
++ * user memory. Within a kernel context switch, or when an interrupt is handled,
++ * the pgd is switched to the normal one. When the system switches to user mode,
++ * the shadow pgd is enabled. By this, the virtual memory caches are freed,
++ * and the user may not attack the whole kernel memory.
+ *
+- * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
+- * of the user space, or the stacks.
++ * A minimalistic kernel mapping holds the parts needed to be mapped in user
++ * mode, such as the entry/exit functions of the user space, or the stacks.
+ */
+ #ifdef __ASSEMBLY__
+ #ifdef CONFIG_KAISER
+@@ -48,13 +50,10 @@ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+ .endm
+
+-
+ .macro SWITCH_USER_CR3_NO_STACK
+-
+ movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+ _SWITCH_TO_USER_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+-
+ .endm
+
+ #else /* CONFIG_KAISER */
+@@ -72,7 +71,6 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+
+ #else /* __ASSEMBLY__ */
+
+-
+ #ifdef CONFIG_KAISER
+ /*
+ * Upon kernel/user mode switch, it may happen that the address
+@@ -80,7 +78,6 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ * stored. To change the address space, another register is
+ * needed. A register therefore has to be stored/restored.
+ */
+-
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+ /**
+@@ -95,7 +92,6 @@ DECLARE_PER_CPU_USER_MAPPED(unsigned lon
+ */
+ extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+
+-
+ /**
+ * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
+ * @addr: the start address of the range
+@@ -104,12 +100,12 @@ extern int kaiser_add_mapping(unsigned l
+ extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+
+ /**
+- * kaiser_initialize_mapping - Initalize the shadow mapping
++ * kaiser_init - Initialize the shadow mapping
+ *
+ * Most parts of the shadow mapping can be mapped upon boot
+ * time. Only per-process things like the thread stacks
+ * or a new LDT have to be mapped at runtime. These boot-
+- * time mappings are permanent and nevertunmapped.
++ * time mappings are permanent and never unmapped.
+ */
+ extern void kaiser_init(void);
+
+@@ -117,6 +113,4 @@ extern void kaiser_init(void);
+
+ #endif /* __ASSEMBLY */
+
+-
+-
+ #endif /* _ASM_X86_KAISER_H */
diff --git a/queue/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch b/queue/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
new file mode 100644
index 0000000..4bb0110
--- /dev/null
+++ b/queue/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
@@ -0,0 +1,50 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 3 Sep 2017 19:23:08 -0700
+Subject: kaiser: tidied up kaiser_add/remove_mapping slightly
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Yes, unmap_pud_range_nofree()'s declaration ought to be in a
+header file really, but I'm not sure we want to use it anyway:
+so for now just declare it inside kaiser_remove_mapping().
+And there doesn't seem to be such a thing as unmap_p4d_range(),
+even in a 5-level paging tree.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c | 9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -285,8 +285,7 @@ void __init kaiser_init(void)
+ __PAGE_KERNEL);
+ }
+
+-extern void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end);
+-// add a mapping to the shadow-mapping, and synchronize the mappings
++/* Add a mapping to the shadow mapping, and synchronize the mappings */
+ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
+ return kaiser_add_user_map((const void *)addr, size, flags);
+@@ -294,15 +293,13 @@ int kaiser_add_mapping(unsigned long add
+
+ void kaiser_remove_mapping(unsigned long start, unsigned long size)
+ {
++ extern void unmap_pud_range_nofree(pgd_t *pgd,
++ unsigned long start, unsigned long end);
+ unsigned long end = start + size;
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PGDIR_SIZE) {
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
+- /*
+- * unmap_p4d_range() handles > P4D_SIZE unmaps,
+- * so no need to trim 'end'.
+- */
+ unmap_pud_range_nofree(pgd, addr, end);
+ }
+ }
diff --git a/queue/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch b/queue/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
new file mode 100644
index 0000000..44bbb7a
--- /dev/null
+++ b/queue/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
@@ -0,0 +1,130 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 3 Oct 2017 20:49:04 -0700
+Subject: kaiser: use ALTERNATIVE instead of x86_cr3_pcid_noflush
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Now that we're playing the ALTERNATIVE game, use that more efficient
+method: instead of user-mapping an extra page, and reading an extra
+cacheline each time for x86_cr3_pcid_noflush.
+
+Neel has found that __stringify(bts $X86_CR3_PCID_NOFLUSH_BIT, %rax)
+is a working substitute for the "bts $63, %rax" in these ALTERNATIVEs;
+but the one line with $63 in looks clearer, so let's stick with that.
+
+Worried about what happens with an ALTERNATIVE between the jump and
+jump label in another ALTERNATIVE? I was, but have checked the
+combinations in SWITCH_KERNEL_CR3_NO_STACK at entry_SYSCALL_64,
+and it does a good job.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 7 ++++---
+ arch/x86/include/asm/kaiser.h | 6 +++---
+ arch/x86/mm/kaiser.c | 11 +----------
+ 3 files changed, 8 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1084,7 +1084,8 @@ ENTRY(paranoid_entry)
+ jz 2f
+ orl $2, %ebx
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+- orq x86_cr3_pcid_noflush, %rax
++ /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ movq %rax, %cr3
+ 2:
+ #endif
+@@ -1344,7 +1345,7 @@ ENTRY(nmi)
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+- orq x86_cr3_pcid_noflush, %rax
++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+@@ -1588,7 +1589,7 @@ end_repeat_nmi:
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+- orq x86_cr3_pcid_noflush, %rax
++ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -25,7 +25,8 @@
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq x86_cr3_pcid_noflush, \reg
++/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
++ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID
+ movq \reg, %cr3
+ .endm
+
+@@ -39,7 +40,7 @@ movq \reg, %cr3
+ movq %cr3, \reg
+ orq PER_CPU_VAR(x86_cr3_pcid_user), \reg
+ js 9f
+-/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
++/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */
+ movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+ 9:
+ movq \reg, %cr3
+@@ -90,7 +91,6 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+-extern unsigned long x86_cr3_pcid_noflush;
+ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -31,7 +31,6 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long
+ * This is also handy because systems that do not support PCIDs
+ * just end up or'ing a 0 into their CR3, which does no harm.
+ */
+-unsigned long x86_cr3_pcid_noflush __read_mostly;
+ DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+ /*
+@@ -356,10 +355,6 @@ void __init kaiser_init(void)
+ kaiser_add_user_map_early(&debug_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
+-
+- kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
+- sizeof(x86_cr3_pcid_noflush),
+- __PAGE_KERNEL);
+ }
+
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+@@ -433,18 +428,14 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+
+ void kaiser_setup_pcid(void)
+ {
+- unsigned long kern_cr3 = 0;
+ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
+
+- if (this_cpu_has(X86_FEATURE_PCID)) {
+- kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++ if (this_cpu_has(X86_FEATURE_PCID))
+ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
+- }
+ /*
+ * These variables are used by the entry/exit
+ * code to change PCID and pgd and TLB flushing.
+ */
+- x86_cr3_pcid_noflush = kern_cr3;
+ this_cpu_write(x86_cr3_pcid_user, user_cr3);
+ }
+
diff --git a/queue/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch b/queue/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
new file mode 100644
index 0000000..f7c6026
--- /dev/null
+++ b/queue/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
@@ -0,0 +1,116 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 9 Sep 2017 21:27:32 -0700
+Subject: kaiser: vmstat show NR_KAISERTABLE as nr_overhead
+
+From: Hugh Dickins <hughd@google.com>
+
+
+The kaiser update made an interesting choice, never to free any shadow
+page tables. Contention on global spinlock was worrying, particularly
+with it held across page table scans when freeing. Something had to be
+done: I was going to add refcounting; but simply never to free them is
+an appealing choice, minimizing contention without complicating the code
+(the more a page table is found already, the less the spinlock is used).
+
+But leaking pages in this way is also a worry: can we get away with it?
+At the very least, we need a count to show how bad it actually gets:
+in principle, one might end up wasting about 1/256 of memory that way
+(1/512 for when direct-mapped pages have to be user-mapped, plus 1/512
+for when they are user-mapped from the vmalloc area on another occasion
+(but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed).
+
+Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the
+shared pgd entries, and 1 for each intermediate page table added
+thereafter for user-mapping - but leave out the 1 per mm, for its
+shadow pgd, because that distracts from the monotonic increase.
+Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled).
+
+In practice, it doesn't look so bad so far: more like 1/12000 after
+nine hours of gtests below; and movable pageblock segregation should
+tend to cluster the kaiser tables into a subset of the address space
+(if not, they will be bad for compaction too). But production may
+tell a different story: keep an eye on this number, and bring back
+lighter freeing if it gets out of control (maybe a shrinker).
+
+["nr_overhead" should of course say "nr_kaisertable", if it needs
+to stay; but for the moment we are being coy, preferring that when
+Joe Blow notices a new line in his /proc/vmstat, he does not get
+too curious about what this "kaiser" stuff might be.]
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c | 16 +++++++++++-----
+ include/linux/mmzone.h | 3 ++-
+ mm/vmstat.c | 1 +
+ 3 files changed, 14 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -121,9 +121,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ if (!new_pmd_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+- if (pud_none(*pud))
++ if (pud_none(*pud)) {
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+- else
++ __inc_zone_page_state(virt_to_page((void *)
++ new_pmd_page), NR_KAISERTABLE);
++ } else
+ free_page(new_pmd_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+@@ -139,9 +141,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ if (!new_pte_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+- if (pmd_none(*pmd))
++ if (pmd_none(*pmd)) {
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+- else
++ __inc_zone_page_state(virt_to_page((void *)
++ new_pte_page), NR_KAISERTABLE);
++ } else
+ free_page(new_pte_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+@@ -205,11 +209,13 @@ static void __init kaiser_init_all_pgds(
+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+ pgd_t new_pgd;
+- pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
++ pud_t *pud = pud_alloc_one(&init_mm,
++ PAGE_OFFSET + i * PGDIR_SIZE);
+ if (!pud) {
+ WARN_ON(1);
+ break;
+ }
++ inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
+ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
+ /*
+ * Make sure not to stomp on some other pgd entry.
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -124,8 +124,9 @@ enum zone_stat_item {
+ NR_SLAB_UNRECLAIMABLE,
+ NR_PAGETABLE, /* used for pagetables */
+ NR_KERNEL_STACK_KB, /* measured in KiB */
+- /* Second 128 byte cacheline */
++ NR_KAISERTABLE,
+ NR_BOUNCE,
++ /* Second 128 byte cacheline */
+ #if IS_ENABLED(CONFIG_ZSMALLOC)
+ NR_ZSPAGES, /* allocated in zsmalloc */
+ #endif
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -932,6 +932,7 @@ const char * const vmstat_text[] = {
+ "nr_slab_unreclaimable",
+ "nr_page_table_pages",
+ "nr_kernel_stack",
++ "nr_overhead",
+ "nr_bounce",
+ #if IS_ENABLED(CONFIG_ZSMALLOC)
+ "nr_zspages",
diff --git a/queue/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch b/queue/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
new file mode 100644
index 0000000..2098b48
--- /dev/null
+++ b/queue/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
@@ -0,0 +1,141 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Hugh Dickins <hughd@google.com>
+Date: Sun, 27 Aug 2017 16:24:27 -0700
+Subject: kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user
+
+From: Hugh Dickins <hughd@google.com>
+
+
+Mostly this commit is just unshouting X86_CR3_PCID_KERN_VAR and
+X86_CR3_PCID_USER_VAR: we usually name variables in lower-case.
+
+But why does x86_cr3_pcid_noflush need to be __aligned(PAGE_SIZE)?
+Ah, it's a leftover from when kaiser_add_user_map() once complained
+about mapping the same page twice. Make it __read_mostly instead.
+(I'm a little uneasy about all the unrelated data which shares its
+page getting user-mapped too, but that was so before, and not a big
+deal: though we call it user-mapped, it's not mapped with _PAGE_USER.)
+
+And there is a little change around the two calls to do_nmi().
+Previously they set the NOFLUSH bit (if PCID supported) when
+forcing to kernel context before do_nmi(); now they also have the
+NOFLUSH bit set (if PCID supported) when restoring context after:
+nothing done in do_nmi() should require a TLB to be flushed here.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 8 ++++----
+ arch/x86/include/asm/kaiser.h | 11 +++++------
+ arch/x86/mm/kaiser.c | 13 +++++++------
+ 3 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1316,11 +1316,11 @@ ENTRY(nmi)
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
++ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
++ orq x86_cr3_pcid_noflush, %rax
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+- /* Add back kernel PCID and "no flush" bit */
+- orq X86_CR3_PCID_KERN_VAR, %rax
+ movq %rax, %cr3
+ #endif
+ call do_nmi
+@@ -1560,11 +1560,11 @@ end_repeat_nmi:
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ movq %cr3, %rax
++ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
++ orq x86_cr3_pcid_noflush, %rax
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+- /* Add back kernel PCID and "no flush" bit */
+- orq X86_CR3_PCID_KERN_VAR, %rax
+ movq %rax, %cr3
+ #endif
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -25,7 +25,7 @@
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq X86_CR3_PCID_KERN_VAR, \reg
++orq x86_cr3_pcid_noflush, \reg
+ movq \reg, %cr3
+ .endm
+
+@@ -37,11 +37,10 @@ movq \reg, %cr3
+ * not enabled): so that the one register can update both memory and cr3.
+ */
+ movq %cr3, \reg
+-andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
++orq PER_CPU_VAR(x86_cr3_pcid_user), \reg
+ js 9f
+ /* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
+-movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
++movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+ 9:
+ movq \reg, %cr3
+ .endm
+@@ -94,8 +93,8 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+-extern unsigned long X86_CR3_PCID_KERN_VAR;
+-DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++extern unsigned long x86_cr3_pcid_noflush;
++DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -28,8 +28,8 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long
+ * This is also handy because systems that do not support PCIDs
+ * just end up or'ing a 0 into their CR3, which does no harm.
+ */
+-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
+-DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
++unsigned long x86_cr3_pcid_noflush __read_mostly;
++DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+ /*
+ * At runtime, the only things we map are some things for CPU
+@@ -303,7 +303,8 @@ void __init kaiser_init(void)
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
+
+- kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
++ kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
++ sizeof(x86_cr3_pcid_noflush),
+ __PAGE_KERNEL);
+ }
+
+@@ -381,8 +382,8 @@ void kaiser_setup_pcid(void)
+ * These variables are used by the entry/exit
+ * code to change PCID and pgd and TLB flushing.
+ */
+- X86_CR3_PCID_KERN_VAR = kern_cr3;
+- this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
++ x86_cr3_pcid_noflush = kern_cr3;
++ this_cpu_write(x86_cr3_pcid_user, user_cr3);
+ }
+
+ /*
+@@ -392,7 +393,7 @@ void kaiser_setup_pcid(void)
+ */
+ void kaiser_flush_tlb_on_return_to_user(void)
+ {
+- this_cpu_write(X86_CR3_PCID_USER_VAR,
++ this_cpu_write(x86_cr3_pcid_user,
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
diff --git a/queue/kbuild-add-fno-stack-check-to-kernel-build-options.patch b/queue/kbuild-add-fno-stack-check-to-kernel-build-options.patch
new file mode 100644
index 0000000..144d30d
--- /dev/null
+++ b/queue/kbuild-add-fno-stack-check-to-kernel-build-options.patch
@@ -0,0 +1,49 @@
+From 3ce120b16cc548472f80cf8644f90eda958cf1b6 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Fri, 29 Dec 2017 17:34:43 -0800
+Subject: kbuild: add '-fno-stack-check' to kernel build options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 3ce120b16cc548472f80cf8644f90eda958cf1b6 upstream.
+
+It appears that hardened gentoo enables "-fstack-check" by default for
+gcc.
+
+That doesn't work _at_all_ for the kernel, because the kernel stack
+doesn't act like a user stack at all: it's much smaller, and it doesn't
+auto-expand on use. So the extra "probe one page below the stack" code
+generated by -fstack-check just breaks the kernel in horrible ways,
+causing infinite double faults etc.
+
+[ I have to say, that the particular code gcc generates looks very
+ stupid even for user space where it works, but that's a separate
+ issue. ]
+
+Reported-and-tested-by: Alexander Tsoy <alexander@tsoy.me>
+Reported-and-tested-by: Toralf Förster <toralf.foerster@gmx.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Makefile | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/Makefile
++++ b/Makefile
+@@ -788,6 +788,9 @@ KBUILD_CFLAGS += $(call cc-disable-warni
+ # disable invalid "can't wrap" optimizations for signed / pointers
+ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
+
++# Make sure -fstack-check isn't enabled (like gentoo apparently did)
++KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
++
+ # conserve stack if available
+ KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
+
diff --git a/queue/kprobes-x86-blacklist-indirect-thunk-functions-for-kprobes.patch b/queue/kprobes-x86-blacklist-indirect-thunk-functions-for-kprobes.patch
new file mode 100644
index 0000000..a552f71
--- /dev/null
+++ b/queue/kprobes-x86-blacklist-indirect-thunk-functions-for-kprobes.patch
@@ -0,0 +1,40 @@
+From c1804a236894ecc942da7dc6c5abe209e56cba93 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 19 Jan 2018 01:14:51 +0900
+Subject: kprobes/x86: Blacklist indirect thunk functions for kprobes
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit c1804a236894ecc942da7dc6c5abe209e56cba93 upstream.
+
+Mark __x86_indirect_thunk_* functions as blacklist for kprobes
+because those functions can be called from anywhere in the kernel
+including blacklist functions of kprobes.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Link: https://lkml.kernel.org/r/151629209111.10241.5444852823378068683.stgit@devbox
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/lib/retpoline.S | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg)
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+-#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
++#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
++#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+ #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+ GENERATE_THUNK(_ASM_AX)
diff --git a/queue/kprobes-x86-disable-optimizing-on-the-function-jumps-to-indirect-thunk.patch b/queue/kprobes-x86-disable-optimizing-on-the-function-jumps-to-indirect-thunk.patch
new file mode 100644
index 0000000..27f0a7f
--- /dev/null
+++ b/queue/kprobes-x86-disable-optimizing-on-the-function-jumps-to-indirect-thunk.patch
@@ -0,0 +1,80 @@
+From c86a32c09f8ced67971a2310e3b0dda4d1749007 Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 19 Jan 2018 01:15:20 +0900
+Subject: kprobes/x86: Disable optimizing on the function jumps to indirect thunk
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit c86a32c09f8ced67971a2310e3b0dda4d1749007 upstream.
+
+Since indirect jump instructions will be replaced by jump
+to __x86_indirect_thunk_*, those jmp instruction must be
+treated as an indirect jump. Since optprobe prohibits to
+optimize probes in the function which uses an indirect jump,
+it also needs to find out the function which jump to
+__x86_indirect_thunk_* and disable optimization.
+
+Add a check that the jump target address is between the
+__indirect_thunk_start/end when optimizing kprobe.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Link: https://lkml.kernel.org/r/151629212062.10241.6991266100233002273.stgit@devbox
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/kprobes/opt.c | 23 ++++++++++++++++++++++-
+ 1 file changed, 22 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kprobes/opt.c
++++ b/arch/x86/kernel/kprobes/opt.c
+@@ -37,6 +37,7 @@
+ #include <asm/alternative.h>
+ #include <asm/insn.h>
+ #include <asm/debugreg.h>
++#include <asm/nospec-branch.h>
+
+ #include "common.h"
+
+@@ -192,7 +193,7 @@ static int copy_optimized_instructions(u
+ }
+
+ /* Check whether insn is indirect jump */
+-static int insn_is_indirect_jump(struct insn *insn)
++static int __insn_is_indirect_jump(struct insn *insn)
+ {
+ return ((insn->opcode.bytes[0] == 0xff &&
+ (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+@@ -226,6 +227,26 @@ static int insn_jump_into_range(struct i
+ return (start <= target && target <= start + len);
+ }
+
++static int insn_is_indirect_jump(struct insn *insn)
++{
++ int ret = __insn_is_indirect_jump(insn);
++
++#ifdef CONFIG_RETPOLINE
++ /*
++ * Jump to x86_indirect_thunk_* is treated as an indirect jump.
++ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
++ * older gcc may use indirect jump. So we add this check instead of
++ * replace indirect-jump check.
++ */
++ if (!ret)
++ ret = insn_jump_into_range(insn,
++ (unsigned long)__indirect_thunk_start,
++ (unsigned long)__indirect_thunk_end -
++ (unsigned long)__indirect_thunk_start);
++#endif
++ return ret;
++}
++
+ /* Decode whole function to ensure any instructions don't jump into target */
+ static int can_optimize(unsigned long paddr)
+ {
diff --git a/queue/kpti-rename-to-page_table_isolation.patch b/queue/kpti-rename-to-page_table_isolation.patch
new file mode 100644
index 0000000..f77732d
--- /dev/null
+++ b/queue/kpti-rename-to-page_table_isolation.patch
@@ -0,0 +1,329 @@
+From keescook@chromium.org Wed Jan 3 20:47:22 2018
+From: Kees Cook <keescook@chromium.org>
+Date: Wed, 3 Jan 2018 10:17:35 -0800
+Subject: KPTI: Rename to PAGE_TABLE_ISOLATION
+To: Greg KH <gregkh@linuxfoundation.org>
+Message-ID: <20180103181735.GA33341@beast>
+Content-Disposition: inline
+
+From: Kees Cook <keescook@chromium.org>
+
+This renames CONFIG_KAISER to CONFIG_PAGE_TABLE_ISOLATION.
+
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/boot/compressed/misc.h | 2 +-
+ arch/x86/entry/entry_64.S | 12 ++++++------
+ arch/x86/events/intel/ds.c | 4 ++--
+ arch/x86/include/asm/cpufeatures.h | 2 +-
+ arch/x86/include/asm/kaiser.h | 12 ++++++------
+ arch/x86/include/asm/pgtable.h | 4 ++--
+ arch/x86/include/asm/pgtable_64.h | 4 ++--
+ arch/x86/include/asm/pgtable_types.h | 2 +-
+ arch/x86/include/asm/tlbflush.h | 2 +-
+ arch/x86/kernel/head_64.S | 2 +-
+ arch/x86/mm/Makefile | 2 +-
+ arch/x86/mm/kaslr.c | 2 +-
+ include/linux/kaiser.h | 6 +++---
+ include/linux/percpu-defs.h | 2 +-
+ security/Kconfig | 2 +-
+ tools/arch/x86/include/asm/cpufeatures.h | 2 +-
+ 16 files changed, 31 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -9,7 +9,7 @@
+ */
+ #undef CONFIG_PARAVIRT
+ #undef CONFIG_PARAVIRT_SPINLOCKS
+-#undef CONFIG_KAISER
++#undef CONFIG_PAGE_TABLE_ISOLATION
+ #undef CONFIG_KASAN
+
+ #include <linux/linkage.h>
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -1071,7 +1071,7 @@ ENTRY(paranoid_entry)
+ SWAPGS
+ xorl %ebx, %ebx
+ 1:
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
+ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
+@@ -1111,7 +1111,7 @@ ENTRY(paranoid_exit)
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF_DEBUG
+ TRACE_IRQS_IRETQ_DEBUG
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz paranoid_exit_no_switch
+@@ -1340,7 +1340,7 @@ ENTRY(nmi)
+
+ movq %rsp, %rdi
+ movq $-1, %rsi
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+@@ -1354,7 +1354,7 @@ ENTRY(nmi)
+ #endif
+ call do_nmi
+
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Unconditionally restore CR3. I know we return to
+ * kernel code that needs user CR3, but do we ever return
+@@ -1584,7 +1584,7 @@ end_repeat_nmi:
+ 1:
+ movq %rsp, %rdi
+ movq $-1, %rsi
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+@@ -1600,7 +1600,7 @@ end_repeat_nmi:
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ call do_nmi
+
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Unconditionally restore CR3. We might be returning to
+ * kernel code that needs user CR3, like just just before
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -274,7 +274,7 @@ static DEFINE_PER_CPU(void *, insn_buffe
+
+ static void *dsalloc(size_t size, gfp_t flags, int node)
+ {
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ unsigned int order = get_order(size);
+ struct page *page;
+ unsigned long addr;
+@@ -295,7 +295,7 @@ static void *dsalloc(size_t size, gfp_t
+
+ static void dsfree(const void *buffer, size_t size)
+ {
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (!buffer)
+ return;
+ kaiser_remove_mapping((unsigned long)buffer, size);
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -199,7 +199,7 @@
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+-#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -20,7 +20,7 @@
+ #define KAISER_SHADOW_PGD_OFFSET 0x1000
+
+ #ifdef __ASSEMBLY__
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+@@ -69,7 +69,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ 8:
+ .endm
+
+-#else /* CONFIG_KAISER */
++#else /* CONFIG_PAGE_TABLE_ISOLATION */
+
+ .macro SWITCH_KERNEL_CR3
+ .endm
+@@ -78,11 +78,11 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+ #else /* __ASSEMBLY__ */
+
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Upon kernel/user mode switch, it may happen that the address
+ * space has to be switched before the registers have been
+@@ -100,10 +100,10 @@ extern void __init kaiser_check_boottime
+ #else
+ #define kaiser_enabled 0
+ static inline void __init kaiser_check_boottime_disable(void) {}
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+ /*
+- * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
++ * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set,
+ * so as to build with tests on kaiser_enabled instead of #ifdefs.
+ */
+
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -18,7 +18,7 @@
+ #ifndef __ASSEMBLY__
+ #include <asm/x86_init.h>
+
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern int kaiser_enabled;
+ #else
+ #define kaiser_enabled 0
+@@ -920,7 +920,7 @@ static inline void pmdp_set_wrprotect(st
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+ memcpy(dst, src, count * sizeof(pgd_t));
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (kaiser_enabled) {
+ /* Clone the shadow pgd part as well */
+ memcpy(native_get_shadow_pgd(dst),
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -106,7 +106,7 @@ static inline void native_pud_clear(pud_
+ native_set_pud(pud, native_make_pud(0));
+ }
+
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
+
+ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+@@ -127,7 +127,7 @@ static inline pgd_t *native_get_shadow_p
+ BUILD_BUG_ON(1);
+ return NULL;
+ }
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -144,7 +144,7 @@
+ #define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
+ #define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
+
+-#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
++#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64)
+ /* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
+ #define X86_CR3_PCID_ASID_USER (_AC(0x80,UL))
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -136,7 +136,7 @@ static inline void cr4_set_bits_and_upda
+ * Declare a couple of kaiser interfaces here for convenience,
+ * to avoid the need for asm/kaiser.h in unexpected places.
+ */
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern int kaiser_enabled;
+ extern void kaiser_setup_pcid(void);
+ extern void kaiser_flush_tlb_on_return_to_user(void);
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -405,7 +405,7 @@ GLOBAL(early_recursion_flag)
+ .balign PAGE_SIZE; \
+ GLOBAL(name)
+
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Each PGD needs to be 8k long and 8k aligned. We do not
+ * ever go out to userspace with these, so we do not
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -38,4 +38,4 @@ obj-$(CONFIG_NUMA_EMU) += numa_emulatio
+ obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
+ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+-obj-$(CONFIG_KAISER) += kaiser.o
++obj-$(CONFIG_PAGE_TABLE_ISOLATION) += kaiser.o
+--- a/arch/x86/mm/kaslr.c
++++ b/arch/x86/mm/kaslr.c
+@@ -189,6 +189,6 @@ void __meminit init_trampoline(void)
+ *pud_tramp = *pud;
+ }
+
+- /* Avoid set_pgd(), in case it's complicated by CONFIG_KAISER */
++ /* Avoid set_pgd(), in case it's complicated by CONFIG_PAGE_TABLE_ISOLATION */
+ trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
+ }
+--- a/include/linux/kaiser.h
++++ b/include/linux/kaiser.h
+@@ -1,7 +1,7 @@
+ #ifndef _LINUX_KAISER_H
+ #define _LINUX_KAISER_H
+
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ #include <asm/kaiser.h>
+
+ static inline int kaiser_map_thread_stack(void *stack)
+@@ -24,7 +24,7 @@ static inline void kaiser_unmap_thread_s
+ #else
+
+ /*
+- * These stubs are used whenever CONFIG_KAISER is off, which
++ * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which
+ * includes architectures that support KAISER, but have it disabled.
+ */
+
+@@ -48,5 +48,5 @@ static inline void kaiser_unmap_thread_s
+ {
+ }
+
+-#endif /* !CONFIG_KAISER */
++#endif /* !CONFIG_PAGE_TABLE_ISOLATION */
+ #endif /* _LINUX_KAISER_H */
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -35,7 +35,7 @@
+
+ #endif
+
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ #define USER_MAPPED_SECTION "..user_mapped"
+ #else
+ #define USER_MAPPED_SECTION ""
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -31,7 +31,7 @@ config SECURITY
+
+ If you are unsure how to answer this question, answer N.
+
+-config KAISER
++config PAGE_TABLE_ISOLATION
+ bool "Remove the kernel mapping in user mode"
+ default y
+ depends on X86_64 && SMP
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -198,7 +198,7 @@
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+-#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_KAISER w/o nokaiser */
++#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/queue/kpti-report-when-enabled.patch b/queue/kpti-report-when-enabled.patch
new file mode 100644
index 0000000..dbd4233
--- /dev/null
+++ b/queue/kpti-report-when-enabled.patch
@@ -0,0 +1,48 @@
+From keescook@chromium.org Wed Jan 3 20:48:07 2018
+From: Kees Cook <keescook@chromium.org>
+Date: Wed, 3 Jan 2018 10:18:01 -0800
+Subject: KPTI: Report when enabled
+To: Greg KH <gregkh@linuxfoundation.org>
+Message-ID: <20180103181801.GA33383@beast>
+Content-Disposition: inline
+
+From: Kees Cook <keescook@chromium.org>
+
+Make sure dmesg reports when KPTI is enabled.
+
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -10,6 +10,9 @@
+ #include <linux/mm.h>
+ #include <linux/uaccess.h>
+
++#undef pr_fmt
++#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
++
+ #include <asm/kaiser.h>
+ #include <asm/tlbflush.h> /* to verify its kaiser declarations */
+ #include <asm/pgtable.h>
+@@ -292,7 +295,7 @@ enable:
+ return;
+
+ disable:
+- pr_info("Kernel/User page tables isolation: disabled\n");
++ pr_info("disabled\n");
+
+ silent_disable:
+ kaiser_enabled = 0;
+@@ -352,6 +355,8 @@ void __init kaiser_init(void)
+ kaiser_add_user_map_early(&debug_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
++
++ pr_info("enabled\n");
+ }
+
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
diff --git a/queue/kvm-vmx-make-indirect-call-speculation-safe.patch b/queue/kvm-vmx-make-indirect-call-speculation-safe.patch
new file mode 100644
index 0000000..a981c11
--- /dev/null
+++ b/queue/kvm-vmx-make-indirect-call-speculation-safe.patch
@@ -0,0 +1,57 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 25 Jan 2018 10:58:14 +0100
+Subject: KVM: VMX: Make indirect call speculation safe
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+(cherry picked from commit c940a3fb1e2e9b7d03228ab28f375fb5a47ff699)
+
+Replace indirect call with CALL_NOSPEC.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: rga@amazon.de
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8676,14 +8676,14 @@ static void vmx_handle_external_intr(str
+ #endif
+ "pushf\n\t"
+ __ASM_SIZE(push) " $%c[cs]\n\t"
+- "call *%[entry]\n\t"
++ CALL_NOSPEC
+ :
+ #ifdef CONFIG_X86_64
+ [sp]"=&r"(tmp),
+ #endif
+ "+r"(__sp)
+ :
+- [entry]"r"(entry),
++ THUNK_TARGET(entry),
+ [ss]"i"(__KERNEL_DS),
+ [cs]"i"(__KERNEL_CS)
+ );
diff --git a/queue/kvm-x86-make-indirect-calls-in-emulator-speculation-safe.patch b/queue/kvm-x86-make-indirect-calls-in-emulator-speculation-safe.patch
new file mode 100644
index 0000000..86fb749
--- /dev/null
+++ b/queue/kvm-x86-make-indirect-calls-in-emulator-speculation-safe.patch
@@ -0,0 +1,78 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 25 Jan 2018 10:58:13 +0100
+Subject: KVM: x86: Make indirect calls in emulator speculation safe
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+(cherry picked from commit 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab)
+
+Replace the indirect calls with CALL_NOSPEC.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: rga@amazon.de
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org
+[dwmw2: Use ASM_CALL_CONSTRAINT like upstream, now we have it]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -25,6 +25,7 @@
+ #include <asm/kvm_emulate.h>
+ #include <linux/stringify.h>
+ #include <asm/debugreg.h>
++#include <asm/nospec-branch.h>
+
+ #include "x86.h"
+ #include "tss.h"
+@@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsign
+ void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+
+ flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
+- asm("push %[flags]; popf; call *%[fastop]"
+- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
++ asm("push %[flags]; popf; " CALL_NOSPEC
++ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
+ return rc;
+ }
+
+@@ -5306,15 +5307,14 @@ static void fetch_possible_mmx_operand(s
+
+ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
+ {
+- register void *__sp asm(_ASM_SP);
+ ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
+
+ if (!(ctxt->d & ByteOp))
+ fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+
+- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
++ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
+ : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
+- [fastop]"+S"(fop), "+r"(__sp)
++ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
+ : "c"(ctxt->src2.val));
+
+ ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/queue/map-the-vsyscall-page-with-_page_user.patch b/queue/map-the-vsyscall-page-with-_page_user.patch
new file mode 100644
index 0000000..1b85895
--- /dev/null
+++ b/queue/map-the-vsyscall-page-with-_page_user.patch
@@ -0,0 +1,143 @@
+From: Borislav Petkov <bp@suse.de>
+Date: Thu, 4 Jan 2018 17:42:45 +0100
+Subject: Map the vsyscall page with _PAGE_USER
+
+From: Borislav Petkov <bp@suse.de>
+
+This needs to happen early in kaiser_pagetable_walk(), before the
+hierarchy is established so that _PAGE_USER permission can be really
+set.
+
+A proper fix would be to teach kaiser_pagetable_walk() to update those
+permissions but the vsyscall page is the only exception here so ...
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c | 5 +++++
+ arch/x86/include/asm/vsyscall.h | 2 ++
+ arch/x86/mm/kaiser.c | 34 ++++++++++++++++++++++++++++++----
+ 3 files changed, 37 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -66,6 +66,11 @@ static int __init vsyscall_setup(char *s
+ }
+ early_param("vsyscall", vsyscall_setup);
+
++bool vsyscall_enabled(void)
++{
++ return vsyscall_mode != NONE;
++}
++
+ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
+ const char *message)
+ {
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -12,12 +12,14 @@ extern void map_vsyscall(void);
+ * Returns true if handled.
+ */
+ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
++extern bool vsyscall_enabled(void);
+ #else
+ static inline void map_vsyscall(void) {}
+ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+ {
+ return false;
+ }
++static inline bool vsyscall_enabled(void) { return false; }
+ #endif
+
+ #endif /* _ASM_X86_VSYSCALL_H */
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -19,6 +19,7 @@
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+ #include <asm/cmdline.h>
++#include <asm/vsyscall.h>
+
+ int kaiser_enabled __read_mostly = 1;
+ EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
+@@ -110,12 +111,13 @@ static inline unsigned long get_pa_from_
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+-static pte_t *kaiser_pagetable_walk(unsigned long address)
++static pte_t *kaiser_pagetable_walk(unsigned long address, bool user)
+ {
+ pmd_t *pmd;
+ pud_t *pud;
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
++ unsigned long prot = _KERNPG_TABLE;
+
+ if (pgd_none(*pgd)) {
+ WARN_ONCE(1, "All shadow pgds should have been populated");
+@@ -123,6 +125,17 @@ static pte_t *kaiser_pagetable_walk(unsi
+ }
+ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
++ if (user) {
++ /*
++ * The vsyscall page is the only page that will have
++ * _PAGE_USER set. Catch everything else.
++ */
++ BUG_ON(address != VSYSCALL_ADDR);
++
++ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
++ prot = _PAGE_TABLE;
++ }
++
+ pud = pud_offset(pgd, address);
+ /* The shadow page tables do not use large mappings: */
+ if (pud_large(*pud)) {
+@@ -135,7 +148,7 @@ static pte_t *kaiser_pagetable_walk(unsi
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+ if (pud_none(*pud)) {
+- set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
++ set_pud(pud, __pud(prot | __pa(new_pmd_page)));
+ __inc_zone_page_state(virt_to_page((void *)
+ new_pmd_page), NR_KAISERTABLE);
+ } else
+@@ -155,7 +168,7 @@ static pte_t *kaiser_pagetable_walk(unsi
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+ if (pmd_none(*pmd)) {
+- set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
++ set_pmd(pmd, __pmd(prot | __pa(new_pte_page)));
+ __inc_zone_page_state(virt_to_page((void *)
+ new_pte_page), NR_KAISERTABLE);
+ } else
+@@ -191,7 +204,7 @@ static int kaiser_add_user_map(const voi
+ ret = -EIO;
+ break;
+ }
+- pte = kaiser_pagetable_walk(address);
++ pte = kaiser_pagetable_walk(address, flags & _PAGE_USER);
+ if (!pte) {
+ ret = -ENOMEM;
+ break;
+@@ -318,6 +331,19 @@ void __init kaiser_init(void)
+
+ kaiser_init_all_pgds();
+
++ /*
++ * Note that this sets _PAGE_USER and it needs to happen when the
++ * pagetable hierarchy gets created, i.e., early. Otherwise
++ * kaiser_pagetable_walk() will encounter initialized PTEs in the
++ * hierarchy and not set the proper permissions, leading to the
++ * pagefaults with page-protection violations when trying to read the
++ * vsyscall page. For example.
++ */
++ if (vsyscall_enabled())
++ kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
++ PAGE_SIZE,
++ __PAGE_KERNEL_VSYSCALL);
++
+ for_each_possible_cpu(cpu) {
+ void *percpu_vaddr = __per_cpu_user_mapped_start +
+ per_cpu_offset(cpu);
diff --git a/queue/mm-vmstat-make-nr_tlb_remote_flush_received-available-even-on-up.patch b/queue/mm-vmstat-make-nr_tlb_remote_flush_received-available-even-on-up.patch
new file mode 100644
index 0000000..df764bb
--- /dev/null
+++ b/queue/mm-vmstat-make-nr_tlb_remote_flush_received-available-even-on-up.patch
@@ -0,0 +1,39 @@
+From 5dd0b16cdaff9b94da06074d5888b03235c0bf17 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 5 Jun 2017 07:40:25 -0700
+Subject: mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 5dd0b16cdaff9b94da06074d5888b03235c0bf17 upstream.
+
+This fixes CONFIG_SMP=n, CONFIG_DEBUG_TLBFLUSH=y without introducing
+further #ifdef soup. Caught by a Kbuild bot randconfig build.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code")
+Link: http://lkml.kernel.org/r/76da9a3cc4415996f2ad2c905b93414add322021.1496673616.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/vm_event_item.h | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/include/linux/vm_event_item.h
++++ b/include/linux/vm_event_item.h
+@@ -89,10 +89,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS
+ #endif
+ #endif
+ #ifdef CONFIG_DEBUG_TLBFLUSH
+-#ifdef CONFIG_SMP
+ NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */
+ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
+-#endif /* CONFIG_SMP */
+ NR_TLB_LOCAL_FLUSH_ALL,
+ NR_TLB_LOCAL_FLUSH_ONE,
+ #endif /* CONFIG_DEBUG_TLBFLUSH */
diff --git a/queue/module-add-retpoline-tag-to-vermagic.patch b/queue/module-add-retpoline-tag-to-vermagic.patch
new file mode 100644
index 0000000..7870bc4
--- /dev/null
+++ b/queue/module-add-retpoline-tag-to-vermagic.patch
@@ -0,0 +1,53 @@
+From 6cfb521ac0d5b97470883ff9b7facae264b7ab12 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Tue, 16 Jan 2018 12:52:28 -0800
+Subject: module: Add retpoline tag to VERMAGIC
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12 upstream.
+
+Add a marker for retpoline to the module VERMAGIC. This catches the case
+when a non RETPOLINE compiled module gets loaded into a retpoline kernel,
+making it insecure.
+
+It doesn't handle the case when retpoline has been runtime disabled. Even
+in this case the match of the retcompile status will be enforced. This
+implies that even with retpoline run time disabled all modules loaded need
+to be recompiled.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: rusty@rustcorp.com.au
+Cc: arjan.van.de.ven@intel.com
+Cc: jeyu@kernel.org
+Cc: torvalds@linux-foundation.org
+Link: https://lkml.kernel.org/r/20180116205228.4890-1-andi@firstfloor.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/vermagic.h | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/include/linux/vermagic.h
++++ b/include/linux/vermagic.h
+@@ -24,10 +24,16 @@
+ #ifndef MODULE_ARCH_VERMAGIC
+ #define MODULE_ARCH_VERMAGIC ""
+ #endif
++#ifdef RETPOLINE
++#define MODULE_VERMAGIC_RETPOLINE "retpoline "
++#else
++#define MODULE_VERMAGIC_RETPOLINE ""
++#endif
+
+ #define VERMAGIC_STRING \
+ UTS_RELEASE " " \
+ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
+ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \
+- MODULE_ARCH_VERMAGIC
++ MODULE_ARCH_VERMAGIC \
++ MODULE_VERMAGIC_RETPOLINE
+
diff --git a/queue/module-retpoline-warn-about-missing-retpoline-in-module.patch b/queue/module-retpoline-warn-about-missing-retpoline-in-module.patch
new file mode 100644
index 0000000..23a00b6
--- /dev/null
+++ b/queue/module-retpoline-warn-about-missing-retpoline-in-module.patch
@@ -0,0 +1,149 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: Andi Kleen <ak@linux.intel.com>
+Date: Thu, 25 Jan 2018 15:50:28 -0800
+Subject: module/retpoline: Warn about missing retpoline in module
+
+From: Andi Kleen <ak@linux.intel.com>
+
+(cherry picked from commit caf7501a1b4ec964190f31f9c3f163de252273b8)
+
+There's a risk that a kernel which has full retpoline mitigations becomes
+vulnerable when a module gets loaded that hasn't been compiled with the
+right compiler or the right option.
+
+To enable detection of that mismatch at module load time, add a module info
+string "retpoline" at build time when the module was compiled with
+retpoline support. This only covers compiled C source, but assembler source
+or prebuilt object files are not checked.
+
+If a retpoline enabled kernel detects a non retpoline protected module at
+load time, print a warning and report it in the sysfs vulnerability file.
+
+[ tglx: Massaged changelog ]
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: gregkh@linuxfoundation.org
+Cc: torvalds@linux-foundation.org
+Cc: jeyu@kernel.org
+Cc: arjan@linux.intel.com
+Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++-
+ include/linux/module.h | 9 +++++++++
+ kernel/module.c | 11 +++++++++++
+ scripts/mod/modpost.c | 9 +++++++++
+ 4 files changed, 45 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -10,6 +10,7 @@
+ #include <linux/init.h>
+ #include <linux/utsname.h>
+ #include <linux/cpu.h>
++#include <linux/module.h>
+
+ #include <asm/nospec-branch.h>
+ #include <asm/cmdline.h>
+@@ -92,6 +93,19 @@ static const char *spectre_v2_strings[]
+ #define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
+
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
++static bool spectre_v2_bad_module;
++
++#ifdef RETPOLINE
++bool retpoline_module_ok(bool has_retpoline)
++{
++ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
++ return true;
++
++ pr_err("System may be vunerable to spectre v2\n");
++ spectre_v2_bad_module = true;
++ return false;
++}
++#endif
+
+ static void __init spec2_print_if_insecure(const char *reason)
+ {
+@@ -277,6 +291,7 @@ ssize_t cpu_show_spectre_v2(struct devic
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+- return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
++ return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled],
++ spectre_v2_bad_module ? " - vulnerable module loaded" : "");
+ }
+ #endif
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -791,6 +791,15 @@ static inline void module_bug_finalize(c
+ static inline void module_bug_cleanup(struct module *mod) {}
+ #endif /* CONFIG_GENERIC_BUG */
+
++#ifdef RETPOLINE
++extern bool retpoline_module_ok(bool has_retpoline);
++#else
++static inline bool retpoline_module_ok(bool has_retpoline)
++{
++ return true;
++}
++#endif
++
+ #ifdef CONFIG_MODULE_SIG
+ static inline bool module_sig_ok(struct module *module)
+ {
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struc
+ }
+ #endif /* CONFIG_LIVEPATCH */
+
++static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
++{
++ if (retpoline_module_ok(get_modinfo(info, "retpoline")))
++ return;
++
++ pr_warn("%s: loading module not compiled with retpoline compiler.\n",
++ mod->name);
++}
++
+ /* Sets info->hdr and info->len. */
+ static int copy_module_from_user(const void __user *umod, unsigned long len,
+ struct load_info *info)
+@@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *
+ add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
+ }
+
++ check_modinfo_retpoline(mod, info);
++
+ if (get_modinfo(info, "staging")) {
+ add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
+ pr_warn("%s: module is from the staging directory, the quality "
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffe
+ buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
+ }
+
++/* Cannot check for assembler */
++static void add_retpoline(struct buffer *b)
++{
++ buf_printf(b, "\n#ifdef RETPOLINE\n");
++ buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
++ buf_printf(b, "#endif\n");
++}
++
+ static void add_staging_flag(struct buffer *b, const char *name)
+ {
+ static const char *staging_dir = "drivers/staging";
+@@ -2474,6 +2482,7 @@ int main(int argc, char **argv)
+
+ add_header(&buf, mod);
+ add_intree_flag(&buf, !external_module);
++ add_retpoline(&buf);
+ add_staging_flag(&buf, mod->name);
+ err |= add_versions(&buf, mod);
+ add_depends(&buf, mod, modules);
diff --git a/queue/nl80211-sanitize-array-index-in-parse_txq_params.patch b/queue/nl80211-sanitize-array-index-in-parse_txq_params.patch
new file mode 100644
index 0000000..5de0872
--- /dev/null
+++ b/queue/nl80211-sanitize-array-index-in-parse_txq_params.patch
@@ -0,0 +1,72 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:03:15 -0800
+Subject: nl80211: Sanitize array index in parse_txq_params
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit 259d8c1e984318497c84eef547bbb6b1d9f4eb05)
+
+Wireless drivers rely on parse_txq_params to validate that txq_params->ac
+is less than NL80211_NUM_ACS by the time the low-level driver's ->conf_tx()
+handler is called. Use a new helper, array_index_nospec(), to sanitize
+txq_params->ac with respect to speculation. I.e. ensure that any
+speculation into ->conf_tx() handlers is done with a value of
+txq_params->ac that is within the bounds of [0, NL80211_NUM_ACS).
+
+Reported-by: Christian Lamparter <chunkeey@gmail.com>
+Reported-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Johannes Berg <johannes@sipsolutions.net>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: linux-wireless@vger.kernel.org
+Cc: torvalds@linux-foundation.org
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727419584.33451.7700736761686184303.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/wireless/nl80211.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -16,6 +16,7 @@
+ #include <linux/nl80211.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/netlink.h>
++#include <linux/nospec.h>
+ #include <linux/etherdevice.h>
+ #include <net/net_namespace.h>
+ #include <net/genetlink.h>
+@@ -2014,20 +2015,22 @@ static const struct nla_policy txq_param
+ static int parse_txq_params(struct nlattr *tb[],
+ struct ieee80211_txq_params *txq_params)
+ {
++ u8 ac;
++
+ if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
+ !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
+ !tb[NL80211_TXQ_ATTR_AIFS])
+ return -EINVAL;
+
+- txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
++ ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
+ txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
+ txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
+ txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
+ txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
+
+- if (txq_params->ac >= NL80211_NUM_ACS)
++ if (ac >= NL80211_NUM_ACS)
+ return -EINVAL;
+-
++ txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS);
+ return 0;
+ }
+
diff --git a/queue/objtool-allow-alternatives-to-be-ignored.patch b/queue/objtool-allow-alternatives-to-be-ignored.patch
new file mode 100644
index 0000000..db02f25
--- /dev/null
+++ b/queue/objtool-allow-alternatives-to-be-ignored.patch
@@ -0,0 +1,163 @@
+From 258c76059cece01bebae098e81bacb1af2edad17 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 11 Jan 2018 21:46:24 +0000
+Subject: objtool: Allow alternatives to be ignored
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 258c76059cece01bebae098e81bacb1af2edad17 upstream.
+
+Getting objtool to understand retpolines is going to be a bit of a
+challenge. For now, take advantage of the fact that retpolines are
+patched in with alternatives. Just read the original (sane)
+non-alternative instruction, and ignore the patched-in retpoline.
+
+This allows objtool to understand the control flow *around* the
+retpoline, even if it can't yet follow what's inside. This means the
+ORC unwinder will fail to unwind from inside a retpoline, but will work
+fine otherwise.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-3-git-send-email-dwmw@amazon.co.uk
+[dwmw2: Applies to tools/objtool/builtin-check.c not check.[ch]]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/builtin-check.c | 64 +++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 57 insertions(+), 7 deletions(-)
+
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -51,7 +51,7 @@ struct instruction {
+ unsigned int len, state;
+ unsigned char type;
+ unsigned long immediate;
+- bool alt_group, visited;
++ bool alt_group, visited, ignore_alts;
+ struct symbol *call_dest;
+ struct instruction *jump_dest;
+ struct list_head alts;
+@@ -353,6 +353,40 @@ static void add_ignores(struct objtool_f
+ }
+
+ /*
++ * FIXME: For now, just ignore any alternatives which add retpolines. This is
++ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
++ * But it at least allows objtool to understand the control flow *around* the
++ * retpoline.
++ */
++static int add_nospec_ignores(struct objtool_file *file)
++{
++ struct section *sec;
++ struct rela *rela;
++ struct instruction *insn;
++
++ sec = find_section_by_name(file->elf, ".rela.discard.nospec");
++ if (!sec)
++ return 0;
++
++ list_for_each_entry(rela, &sec->rela_list, list) {
++ if (rela->sym->type != STT_SECTION) {
++ WARN("unexpected relocation symbol type in %s", sec->name);
++ return -1;
++ }
++
++ insn = find_insn(file, rela->sym->sec, rela->addend);
++ if (!insn) {
++ WARN("bad .discard.nospec entry");
++ return -1;
++ }
++
++ insn->ignore_alts = true;
++ }
++
++ return 0;
++}
++
++/*
+ * Find the destination instructions for all jumps.
+ */
+ static int add_jump_destinations(struct objtool_file *file)
+@@ -435,11 +469,18 @@ static int add_call_destinations(struct
+ dest_off = insn->offset + insn->len + insn->immediate;
+ insn->call_dest = find_symbol_by_offset(insn->sec,
+ dest_off);
++ /*
++ * FIXME: Thanks to retpolines, it's now considered
++ * normal for a function to call within itself. So
++ * disable this warning for now.
++ */
++#if 0
+ if (!insn->call_dest) {
+ WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+ insn->sec, insn->offset, dest_off);
+ return -1;
+ }
++#endif
+ } else if (rela->sym->type == STT_SECTION) {
+ insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+ rela->addend+4);
+@@ -601,12 +642,6 @@ static int add_special_section_alts(stru
+ return ret;
+
+ list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+- alt = malloc(sizeof(*alt));
+- if (!alt) {
+- WARN("malloc failed");
+- ret = -1;
+- goto out;
+- }
+
+ orig_insn = find_insn(file, special_alt->orig_sec,
+ special_alt->orig_off);
+@@ -617,6 +652,10 @@ static int add_special_section_alts(stru
+ goto out;
+ }
+
++ /* Ignore retpoline alternatives. */
++ if (orig_insn->ignore_alts)
++ continue;
++
+ new_insn = NULL;
+ if (!special_alt->group || special_alt->new_len) {
+ new_insn = find_insn(file, special_alt->new_sec,
+@@ -642,6 +681,13 @@ static int add_special_section_alts(stru
+ goto out;
+ }
+
++ alt = malloc(sizeof(*alt));
++ if (!alt) {
++ WARN("malloc failed");
++ ret = -1;
++ goto out;
++ }
++
+ alt->insn = new_insn;
+ list_add_tail(&alt->list, &orig_insn->alts);
+
+@@ -861,6 +907,10 @@ static int decode_sections(struct objtoo
+
+ add_ignores(file);
+
++ ret = add_nospec_ignores(file);
++ if (ret)
++ return ret;
++
+ ret = add_jump_destinations(file);
+ if (ret)
+ return ret;
diff --git a/queue/objtool-detect-jumps-to-retpoline-thunks.patch b/queue/objtool-detect-jumps-to-retpoline-thunks.patch
new file mode 100644
index 0000000..0079121
--- /dev/null
+++ b/queue/objtool-detect-jumps-to-retpoline-thunks.patch
@@ -0,0 +1,61 @@
+From 39b735332cb8b33a27c28592d969e4016c86c3ea Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 11 Jan 2018 21:46:23 +0000
+Subject: objtool: Detect jumps to retpoline thunks
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 39b735332cb8b33a27c28592d969e4016c86c3ea upstream.
+
+A direct jump to a retpoline thunk is really an indirect jump in
+disguise. Change the objtool instruction type accordingly.
+
+Objtool needs to know where indirect branches are so it can detect
+switch statement jump tables.
+
+This fixes a bunch of warnings with CONFIG_RETPOLINE like:
+
+ arch/x86/events/intel/uncore_nhmex.o: warning: objtool: nhmex_rbox_msr_enable_event()+0x44: sibling call from callable instruction with modified stack frame
+ kernel/signal.o: warning: objtool: copy_siginfo_to_user()+0x91: sibling call from callable instruction with modified stack frame
+ ...
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-2-git-send-email-dwmw@amazon.co.uk
+[dwmw2: Applies to tools/objtool/builtin-check.c not check.c]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/builtin-check.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -382,6 +382,13 @@ static int add_jump_destinations(struct
+ } else if (rela->sym->sec->idx) {
+ dest_sec = rela->sym->sec;
+ dest_off = rela->sym->sym.st_value + rela->addend + 4;
++ } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
++ /*
++ * Retpoline jumps are really dynamic jumps in
++ * disguise, so convert them accordingly.
++ */
++ insn->type = INSN_JUMP_DYNAMIC;
++ continue;
+ } else {
+ /* sibling call */
+ insn->jump_dest = 0;
diff --git a/queue/objtool-fix-retpoline-support-for-pre-orc-objtool.patch b/queue/objtool-fix-retpoline-support-for-pre-orc-objtool.patch
new file mode 100644
index 0000000..a773f63
--- /dev/null
+++ b/queue/objtool-fix-retpoline-support-for-pre-orc-objtool.patch
@@ -0,0 +1,45 @@
+From jpoimboe@redhat.com Mon Jan 15 18:44:58 2018
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 15 Jan 2018 11:00:54 -0600
+Subject: objtool: Fix retpoline support for pre-ORC objtool
+To: David Woodhouse <dwmw2@infradead.org>
+Cc: gregkh@linuxfoundation.org, ak@linux.intel.com, dave.hansen@intel.com, gregkh@linux-foundation.org, jikos@kernel.org, keescook@google.com, luto@amacapital.net, peterz@infradead.org, pjt@google.com, riel@redhat.com, tglx@linutronix.de, tim.c.chen@linux.intel.com, torvalds@linux-foundation.org, stable@vger.kernel.org, stable-commits@vger.kernel.org
+Message-ID: <20180115170054.6baepkgihtla4nub@treble>
+Content-Disposition: inline
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+Objtool 1.0 (pre-ORC) produces the following warning when it encounters
+a retpoline:
+
+ arch/x86/crypto/camellia-aesni-avx2-asm_64.o: warning: objtool: .altinstr_replacement+0xf: return instruction outside of a callable function
+
+That warning is meant to catch GCC bugs and missing ENTRY/ENDPROC
+annotations, neither of which are applicable to alternatives. Silence
+the warning for alternative instructions, just like objtool 2.0 already
+does.
+
+Reported-by: David Woodhouse <dwmw2@infradead.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/objtool/builtin-check.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -1230,6 +1230,14 @@ static int validate_uncallable_instructi
+
+ for_each_insn(file, insn) {
+ if (!insn->visited && insn->type == INSN_RETURN) {
++
++ /*
++ * Don't warn about call instructions in unvisited
++ * retpoline alternatives.
++ */
++ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
++ continue;
++
+ WARN_FUNC("return instruction outside of a callable function",
+ insn->sec, insn->offset);
+ warnings++;
diff --git a/queue/objtool-modules-discard-objtool-annotation-sections-for-modules.patch b/queue/objtool-modules-discard-objtool-annotation-sections-for-modules.patch
new file mode 100644
index 0000000..ceb2b5f
--- /dev/null
+++ b/queue/objtool-modules-discard-objtool-annotation-sections-for-modules.patch
@@ -0,0 +1,84 @@
+From e390f9a9689a42f477a6073e2e7df530a4c1b740 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Wed, 1 Mar 2017 12:04:44 -0600
+Subject: objtool, modules: Discard objtool annotation sections for modules
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit e390f9a9689a42f477a6073e2e7df530a4c1b740 upstream.
+
+The '__unreachable' and '__func_stack_frame_non_standard' sections are
+only used at compile time. They're discarded for vmlinux but they
+should also be discarded for modules.
+
+Since this is a recurring pattern, prefix the section names with
+".discard.". It's a nice convention and vmlinux.lds.h already discards
+such sections.
+
+Also remove the 'a' (allocatable) flag from the __unreachable section
+since it doesn't make sense for a discarded section.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jessica Yu <jeyu@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
+Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[dwmw2: Remove the unreachable part in backporting since it's not here yet]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.ku>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/frame.h | 2 +-
+ scripts/mod/modpost.c | 1 +
+ scripts/module-common.lds | 5 ++++-
+ tools/objtool/builtin-check.c | 2 +-
+ 4 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/include/linux/frame.h
++++ b/include/linux/frame.h
+@@ -11,7 +11,7 @@
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+ #define STACK_FRAME_NON_STANDARD(func) \
+- static void __used __section(__func_stack_frame_non_standard) \
++ static void __used __section(.discard.func_stack_frame_non_standard) \
+ *__func_stack_frame_non_standard_##func = func
+
+ #else /* !CONFIG_STACK_VALIDATION */
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -838,6 +838,7 @@ static const char *const section_white_l
+ ".cmem*", /* EZchip */
+ ".fmt_slot*", /* EZchip */
+ ".gnu.lto*",
++ ".discard.*",
+ NULL
+ };
+
+--- a/scripts/module-common.lds
++++ b/scripts/module-common.lds
+@@ -4,7 +4,10 @@
+ * combine them automatically.
+ */
+ SECTIONS {
+- /DISCARD/ : { *(.discard) }
++ /DISCARD/ : {
++ *(.discard)
++ *(.discard.*)
++ }
+
+ __ksymtab 0 : { *(SORT(___ksymtab+*)) }
+ __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) }
+--- a/tools/objtool/builtin-check.c
++++ b/tools/objtool/builtin-check.c
+@@ -1229,7 +1229,7 @@ int cmd_check(int argc, const char **arg
+
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+- file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard");
++ file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+ file.rodata = find_section_by_name(file.elf, ".rodata");
+ file.ignore_unreachables = false;
+ file.c_file = find_section_by_name(file.elf, ".comment");
diff --git a/queue/retpoline-introduce-start-end-markers-of-indirect-thunk.patch b/queue/retpoline-introduce-start-end-markers-of-indirect-thunk.patch
new file mode 100644
index 0000000..3fdc39b
--- /dev/null
+++ b/queue/retpoline-introduce-start-end-markers-of-indirect-thunk.patch
@@ -0,0 +1,71 @@
+From 736e80a4213e9bbce40a7c050337047128b472ac Mon Sep 17 00:00:00 2001
+From: Masami Hiramatsu <mhiramat@kernel.org>
+Date: Fri, 19 Jan 2018 01:14:21 +0900
+Subject: retpoline: Introduce start/end markers of indirect thunk
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+commit 736e80a4213e9bbce40a7c050337047128b472ac upstream.
+
+Introduce start/end markers of __x86_indirect_thunk_* functions.
+To make it easy, consolidate .text.__x86.indirect_thunk.* sections
+to one .text.__x86.indirect_thunk section and put it in the
+end of kernel text section and adds __indirect_thunk_start/end
+so that other subsystem (e.g. kprobes) can identify it.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Link: https://lkml.kernel.org/r/151629206178.10241.6828804696410044771.stgit@devbox
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/nospec-branch.h | 3 +++
+ arch/x86/kernel/vmlinux.lds.S | 7 +++++++
+ arch/x86/lib/retpoline.S | 2 +-
+ 3 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -194,6 +194,9 @@ enum spectre_v2_mitigation {
+ SPECTRE_V2_IBRS,
+ };
+
++extern char __indirect_thunk_start[];
++extern char __indirect_thunk_end[];
++
+ /*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -105,6 +105,13 @@ SECTIONS
+ SOFTIRQENTRY_TEXT
+ *(.fixup)
+ *(.gnu.warning)
++
++#ifdef CONFIG_RETPOLINE
++ __indirect_thunk_start = .;
++ *(.text.__x86.indirect_thunk)
++ __indirect_thunk_end = .;
++#endif
++
+ /* End of text section */
+ _etext = .;
+ } :text = 0x9090
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -9,7 +9,7 @@
+ #include <asm/nospec-branch.h>
+
+ .macro THUNK reg
+- .section .text.__x86.indirect_thunk.\reg
++ .section .text.__x86.indirect_thunk
+
+ ENTRY(__x86_indirect_thunk_\reg)
+ CFI_STARTPROC
diff --git a/queue/selftests-x86-add-test_vsyscall.patch b/queue/selftests-x86-add-test_vsyscall.patch
new file mode 100644
index 0000000..73478ec
--- /dev/null
+++ b/queue/selftests-x86-add-test_vsyscall.patch
@@ -0,0 +1,569 @@
+From 6fcf09dcfd33e93cfe1808fcb9474087dd40cc05 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 11 Jan 2018 17:16:51 -0800
+Subject: [PATCH] selftests/x86: Add test_vsyscall
+
+commit 352909b49ba0d74929b96af6dfbefc854ab6ebb5 upstream.
+
+This tests that the vsyscall entries do what they're expected to do.
+It also confirms that attempts to read the vsyscall page behave as
+expected.
+
+If changes are made to the vsyscall code or its memory map handling,
+running this test in all three of vsyscall=none, vsyscall=emulate,
+and vsyscall=native are helpful.
+
+(Because it's easy, this also compares the vsyscall results to their
+ vDSO equivalents.)
+
+Note to KAISER backporters: please test this under all three
+vsyscall modes. Also, in the emulate and native modes, make sure
+that test_vsyscall_64 agrees with the command line or config
+option as to which mode you're in. It's quite easy to mess up
+the kernel such that native mode accidentally emulates
+or vice versa.
+
+Greg, etc: please backport this to all your Meltdown-patched
+kernels. It'll help make sure the patches didn't regress
+vsyscalls.
+
+CSigned-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/2b9c5a174c1d60fd7774461d518aa75598b1d8fd.1515719552.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
+index 4f747ee07f10..38dbdf4bfd89 100644
+--- a/tools/testing/selftests/x86/Makefile
++++ b/tools/testing/selftests/x86/Makefile
+@@ -5,7 +5,7 @@ include ../lib.mk
+ .PHONY: all all_32 all_64 warn_32bit_failure clean
+
+ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
+- check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
++ check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test test_vsyscall
+ TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+ test_FCMOV test_FCOMI test_FISTTP \
+ vdso_restorer
+diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
+new file mode 100644
+index 000000000000..6e0bd52ad53d
+--- /dev/null
++++ b/tools/testing/selftests/x86/test_vsyscall.c
+@@ -0,0 +1,500 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#define _GNU_SOURCE
++
++#include <stdio.h>
++#include <sys/time.h>
++#include <time.h>
++#include <stdlib.h>
++#include <sys/syscall.h>
++#include <unistd.h>
++#include <dlfcn.h>
++#include <string.h>
++#include <inttypes.h>
++#include <signal.h>
++#include <sys/ucontext.h>
++#include <errno.h>
++#include <err.h>
++#include <sched.h>
++#include <stdbool.h>
++#include <setjmp.h>
++
++#ifdef __x86_64__
++# define VSYS(x) (x)
++#else
++# define VSYS(x) 0
++#endif
++
++#ifndef SYS_getcpu
++# ifdef __x86_64__
++# define SYS_getcpu 309
++# else
++# define SYS_getcpu 318
++# endif
++#endif
++
++static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
++ int flags)
++{
++ struct sigaction sa;
++ memset(&sa, 0, sizeof(sa));
++ sa.sa_sigaction = handler;
++ sa.sa_flags = SA_SIGINFO | flags;
++ sigemptyset(&sa.sa_mask);
++ if (sigaction(sig, &sa, 0))
++ err(1, "sigaction");
++}
++
++/* vsyscalls and vDSO */
++bool should_read_vsyscall = false;
++
++typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
++gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
++gtod_t vdso_gtod;
++
++typedef int (*vgettime_t)(clockid_t, struct timespec *);
++vgettime_t vdso_gettime;
++
++typedef long (*time_func_t)(time_t *t);
++time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
++time_func_t vdso_time;
++
++typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
++getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
++getcpu_t vdso_getcpu;
++
++static void init_vdso(void)
++{
++ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
++ if (!vdso)
++ vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
++ if (!vdso) {
++ printf("[WARN]\tfailed to find vDSO\n");
++ return;
++ }
++
++ vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
++ if (!vdso_gtod)
++ printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
++
++ vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
++ if (!vdso_gettime)
++ printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
++
++ vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
++ if (!vdso_time)
++ printf("[WARN]\tfailed to find time in vDSO\n");
++
++ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
++ if (!vdso_getcpu) {
++ /* getcpu() was never wired up in the 32-bit vDSO. */
++ printf("[%s]\tfailed to find getcpu in vDSO\n",
++ sizeof(long) == 8 ? "WARN" : "NOTE");
++ }
++}
++
++static int init_vsys(void)
++{
++#ifdef __x86_64__
++ int nerrs = 0;
++ FILE *maps;
++ char line[128];
++ bool found = false;
++
++ maps = fopen("/proc/self/maps", "r");
++ if (!maps) {
++ printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
++ should_read_vsyscall = true;
++ return 0;
++ }
++
++ while (fgets(line, sizeof(line), maps)) {
++ char r, x;
++ void *start, *end;
++ char name[128];
++ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
++ &start, &end, &r, &x, name) != 5)
++ continue;
++
++ if (strcmp(name, "[vsyscall]"))
++ continue;
++
++ printf("\tvsyscall map: %s", line);
++
++ if (start != (void *)0xffffffffff600000 ||
++ end != (void *)0xffffffffff601000) {
++ printf("[FAIL]\taddress range is nonsense\n");
++ nerrs++;
++ }
++
++ printf("\tvsyscall permissions are %c-%c\n", r, x);
++ should_read_vsyscall = (r == 'r');
++ if (x != 'x') {
++ vgtod = NULL;
++ vtime = NULL;
++ vgetcpu = NULL;
++ }
++
++ found = true;
++ break;
++ }
++
++ fclose(maps);
++
++ if (!found) {
++ printf("\tno vsyscall map in /proc/self/maps\n");
++ should_read_vsyscall = false;
++ vgtod = NULL;
++ vtime = NULL;
++ vgetcpu = NULL;
++ }
++
++ return nerrs;
++#else
++ return 0;
++#endif
++}
++
++/* syscalls */
++static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
++{
++ return syscall(SYS_gettimeofday, tv, tz);
++}
++
++static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
++{
++ return syscall(SYS_clock_gettime, id, ts);
++}
++
++static inline long sys_time(time_t *t)
++{
++ return syscall(SYS_time, t);
++}
++
++static inline long sys_getcpu(unsigned * cpu, unsigned * node,
++ void* cache)
++{
++ return syscall(SYS_getcpu, cpu, node, cache);
++}
++
++static jmp_buf jmpbuf;
++
++static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
++{
++ siglongjmp(jmpbuf, 1);
++}
++
++static double tv_diff(const struct timeval *a, const struct timeval *b)
++{
++ return (double)(a->tv_sec - b->tv_sec) +
++ (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
++}
++
++static int check_gtod(const struct timeval *tv_sys1,
++ const struct timeval *tv_sys2,
++ const struct timezone *tz_sys,
++ const char *which,
++ const struct timeval *tv_other,
++ const struct timezone *tz_other)
++{
++ int nerrs = 0;
++ double d1, d2;
++
++ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
++ printf("[FAIL] %s tz mismatch\n", which);
++ nerrs++;
++ }
++
++ d1 = tv_diff(tv_other, tv_sys1);
++ d2 = tv_diff(tv_sys2, tv_other);
++ printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
++
++ if (d1 < 0 || d2 < 0) {
++ printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
++ nerrs++;
++ } else {
++ printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
++ }
++
++ return nerrs;
++}
++
++static int test_gtod(void)
++{
++ struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
++ struct timezone tz_sys, tz_vdso, tz_vsys;
++ long ret_vdso = -1;
++ long ret_vsys = -1;
++ int nerrs = 0;
++
++ printf("[RUN]\ttest gettimeofday()\n");
++
++ if (sys_gtod(&tv_sys1, &tz_sys) != 0)
++ err(1, "syscall gettimeofday");
++ if (vdso_gtod)
++ ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
++ if (vgtod)
++ ret_vsys = vgtod(&tv_vsys, &tz_vsys);
++ if (sys_gtod(&tv_sys2, &tz_sys) != 0)
++ err(1, "syscall gettimeofday");
++
++ if (vdso_gtod) {
++ if (ret_vdso == 0) {
++ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
++ } else {
++ printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
++ nerrs++;
++ }
++ }
++
++ if (vgtod) {
++ if (ret_vsys == 0) {
++ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
++ } else {
++ printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
++ nerrs++;
++ }
++ }
++
++ return nerrs;
++}
++
++static int test_time(void) {
++ int nerrs = 0;
++
++ printf("[RUN]\ttest time()\n");
++ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
++ long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
++ t_sys1 = sys_time(&t2_sys1);
++ if (vdso_time)
++ t_vdso = vdso_time(&t2_vdso);
++ if (vtime)
++ t_vsys = vtime(&t2_vsys);
++ t_sys2 = sys_time(&t2_sys2);
++ if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
++ printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
++ nerrs++;
++ return nerrs;
++ }
++
++ if (vdso_time) {
++ if (t_vdso < 0 || t_vdso != t2_vdso) {
++ printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
++ nerrs++;
++ } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
++ printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
++ nerrs++;
++ } else {
++ printf("[OK]\tvDSO time() is okay\n");
++ }
++ }
++
++ if (vtime) {
++ if (t_vsys < 0 || t_vsys != t2_vsys) {
++ printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
++ nerrs++;
++ } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
++ printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
++ nerrs++;
++ } else {
++ printf("[OK]\tvsyscall time() is okay\n");
++ }
++ }
++
++ return nerrs;
++}
++
++static int test_getcpu(int cpu)
++{
++ int nerrs = 0;
++ long ret_sys, ret_vdso = -1, ret_vsys = -1;
++
++ printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
++
++ cpu_set_t cpuset;
++ CPU_ZERO(&cpuset);
++ CPU_SET(cpu, &cpuset);
++ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
++ printf("[SKIP]\tfailed to force CPU %d\n", cpu);
++ return nerrs;
++ }
++
++ unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
++ unsigned node = 0;
++ bool have_node = false;
++ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
++ if (vdso_getcpu)
++ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
++ if (vgetcpu)
++ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
++
++ if (ret_sys == 0) {
++ if (cpu_sys != cpu) {
++ printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
++ nerrs++;
++ }
++
++ have_node = true;
++ node = node_sys;
++ }
++
++ if (vdso_getcpu) {
++ if (ret_vdso) {
++ printf("[FAIL]\tvDSO getcpu() failed\n");
++ nerrs++;
++ } else {
++ if (!have_node) {
++ have_node = true;
++ node = node_vdso;
++ }
++
++ if (cpu_vdso != cpu) {
++ printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
++ nerrs++;
++ } else {
++ printf("[OK]\tvDSO reported correct CPU\n");
++ }
++
++ if (node_vdso != node) {
++ printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
++ nerrs++;
++ } else {
++ printf("[OK]\tvDSO reported correct node\n");
++ }
++ }
++ }
++
++ if (vgetcpu) {
++ if (ret_vsys) {
++ printf("[FAIL]\tvsyscall getcpu() failed\n");
++ nerrs++;
++ } else {
++ if (!have_node) {
++ have_node = true;
++ node = node_vsys;
++ }
++
++ if (cpu_vsys != cpu) {
++ printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
++ nerrs++;
++ } else {
++ printf("[OK]\tvsyscall reported correct CPU\n");
++ }
++
++ if (node_vsys != node) {
++ printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
++ nerrs++;
++ } else {
++ printf("[OK]\tvsyscall reported correct node\n");
++ }
++ }
++ }
++
++ return nerrs;
++}
++
++static int test_vsys_r(void)
++{
++#ifdef __x86_64__
++ printf("[RUN]\tChecking read access to the vsyscall page\n");
++ bool can_read;
++ if (sigsetjmp(jmpbuf, 1) == 0) {
++ *(volatile int *)0xffffffffff600000;
++ can_read = true;
++ } else {
++ can_read = false;
++ }
++
++ if (can_read && !should_read_vsyscall) {
++ printf("[FAIL]\tWe have read access, but we shouldn't\n");
++ return 1;
++ } else if (!can_read && should_read_vsyscall) {
++ printf("[FAIL]\tWe don't have read access, but we should\n");
++ return 1;
++ } else {
++ printf("[OK]\tgot expected result\n");
++ }
++#endif
++
++ return 0;
++}
++
++
++#ifdef __x86_64__
++#define X86_EFLAGS_TF (1UL << 8)
++static volatile sig_atomic_t num_vsyscall_traps;
++
++static unsigned long get_eflags(void)
++{
++ unsigned long eflags;
++ asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
++ return eflags;
++}
++
++static void set_eflags(unsigned long eflags)
++{
++ asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
++}
++
++static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
++{
++ ucontext_t *ctx = (ucontext_t *)ctx_void;
++ unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
++
++ if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
++ num_vsyscall_traps++;
++}
++
++static int test_native_vsyscall(void)
++{
++ time_t tmp;
++ bool is_native;
++
++ if (!vtime)
++ return 0;
++
++ printf("[RUN]\tchecking for native vsyscall\n");
++ sethandler(SIGTRAP, sigtrap, 0);
++ set_eflags(get_eflags() | X86_EFLAGS_TF);
++ vtime(&tmp);
++ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
++
++ /*
++ * If vsyscalls are emulated, we expect a single trap in the
++ * vsyscall page -- the call instruction will trap with RIP
++ * pointing to the entry point before emulation takes over.
++ * In native mode, we expect two traps, since whatever code
++ * the vsyscall page contains will be more than just a ret
++ * instruction.
++ */
++ is_native = (num_vsyscall_traps > 1);
++
++ printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
++ (is_native ? "native" : "emulated"),
++ (int)num_vsyscall_traps);
++
++ return 0;
++}
++#endif
++
++int main(int argc, char **argv)
++{
++ int nerrs = 0;
++
++ init_vdso();
++ nerrs += init_vsys();
++
++ nerrs += test_gtod();
++ nerrs += test_time();
++ nerrs += test_getcpu(0);
++ nerrs += test_getcpu(1);
++
++ sethandler(SIGSEGV, sigsegv, 0);
++ nerrs += test_vsys_r();
++
++#ifdef __x86_64__
++ nerrs += test_native_vsyscall();
++#endif
++
++ return nerrs ? 1 : 0;
++}
+--
+2.15.0
+
diff --git a/queue/sysfs-cpu-add-vulnerability-folder.patch b/queue/sysfs-cpu-add-vulnerability-folder.patch
new file mode 100644
index 0000000..92522ac
--- /dev/null
+++ b/queue/sysfs-cpu-add-vulnerability-folder.patch
@@ -0,0 +1,149 @@
+From 87590ce6e373d1a5401f6539f0c59ef92dd924a9 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 7 Jan 2018 22:48:00 +0100
+Subject: sysfs/cpu: Add vulnerability folder
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 87590ce6e373d1a5401f6539f0c59ef92dd924a9 upstream.
+
+As the meltdown/spectre problem affects several CPU architectures, it makes
+sense to have common way to express whether a system is affected by a
+particular vulnerability or not. If affected the way to express the
+mitigation should be common as well.
+
+Create /sys/devices/system/cpu/vulnerabilities folder and files for
+meltdown, spectre_v1 and spectre_v2.
+
+Allow architectures to override the show function.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lkml.kernel.org/r/20180107214913.096657732@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/ABI/testing/sysfs-devices-system-cpu | 16 +++++++
+ drivers/base/Kconfig | 3 +
+ drivers/base/cpu.c | 48 +++++++++++++++++++++
+ include/linux/cpu.h | 7 +++
+ 4 files changed, 74 insertions(+)
+
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -350,3 +350,19 @@ Contact: Linux ARM Kernel Mailing list <
+ Description: AArch64 CPU registers
+ 'identification' directory exposes the CPU ID registers for
+ identifying model and revision of the CPU.
++
++What: /sys/devices/system/cpu/vulnerabilities
++ /sys/devices/system/cpu/vulnerabilities/meltdown
++ /sys/devices/system/cpu/vulnerabilities/spectre_v1
++ /sys/devices/system/cpu/vulnerabilities/spectre_v2
++Date: Januar 2018
++Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
++Description: Information about CPU vulnerabilities
++
++ The files are named after the code names of CPU
++ vulnerabilities. The output of those files reflects the
++ state of the CPUs in the system. Possible output values:
++
++ "Not affected" CPU is not affected by the vulnerability
++ "Vulnerable" CPU is affected and no mitigation in effect
++ "Mitigation: $M" CPU is affetcted and mitigation $M is in effect
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES
+ config GENERIC_CPU_AUTOPROBE
+ bool
+
++config GENERIC_CPU_VULNERABILITIES
++ bool
++
+ config SOC_BUS
+ bool
+
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -499,10 +499,58 @@ static void __init cpu_dev_register_gene
+ #endif
+ }
+
++#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
++
++ssize_t __weak cpu_show_meltdown(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sprintf(buf, "Not affected\n");
++}
++
++ssize_t __weak cpu_show_spectre_v1(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sprintf(buf, "Not affected\n");
++}
++
++ssize_t __weak cpu_show_spectre_v2(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return sprintf(buf, "Not affected\n");
++}
++
++static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
++static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
++static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
++
++static struct attribute *cpu_root_vulnerabilities_attrs[] = {
++ &dev_attr_meltdown.attr,
++ &dev_attr_spectre_v1.attr,
++ &dev_attr_spectre_v2.attr,
++ NULL
++};
++
++static const struct attribute_group cpu_root_vulnerabilities_group = {
++ .name = "vulnerabilities",
++ .attrs = cpu_root_vulnerabilities_attrs,
++};
++
++static void __init cpu_register_vulnerabilities(void)
++{
++ if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
++ &cpu_root_vulnerabilities_group))
++ pr_err("Unable to register CPU vulnerabilities\n");
++}
++
++#else
++static inline void cpu_register_vulnerabilities(void) { }
++#endif
++
+ void __init cpu_dev_init(void)
+ {
+ if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
+ panic("Failed to register CPU subsystem");
+
+ cpu_dev_register_generic();
++ cpu_register_vulnerabilities();
+ }
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct d
+ extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
+ extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
+
++extern ssize_t cpu_show_meltdown(struct device *dev,
++ struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_spectre_v1(struct device *dev,
++ struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_spectre_v2(struct device *dev,
++ struct device_attribute *attr, char *buf);
++
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
+ const struct attribute_group **groups,
diff --git a/queue/sysfs-cpu-fix-typos-in-vulnerability-documentation.patch b/queue/sysfs-cpu-fix-typos-in-vulnerability-documentation.patch
new file mode 100644
index 0000000..181d38d
--- /dev/null
+++ b/queue/sysfs-cpu-fix-typos-in-vulnerability-documentation.patch
@@ -0,0 +1,35 @@
+From 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Tue, 9 Jan 2018 15:02:51 +0000
+Subject: sysfs/cpu: Fix typos in vulnerability documentation
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 upstream.
+
+Fixes: 87590ce6e ("sysfs/cpu: Add vulnerability folder")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/ABI/testing/sysfs-devices-system-cpu | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -355,7 +355,7 @@ What: /sys/devices/system/cpu/vulnerabi
+ /sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
+-Date: Januar 2018
++Date: January 2018
+ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Description: Information about CPU vulnerabilities
+
+@@ -365,4 +365,4 @@ Description: Information about CPU vulne
+
+ "Not affected" CPU is not affected by the vulnerability
+ "Vulnerable" CPU is affected and no mitigation in effect
+- "Mitigation: $M" CPU is affetcted and mitigation $M is in effect
++ "Mitigation: $M" CPU is affected and mitigation $M is in effect
diff --git a/queue/vfs-fdtable-prevent-bounds-check-bypass-via-speculative-execution.patch b/queue/vfs-fdtable-prevent-bounds-check-bypass-via-speculative-execution.patch
new file mode 100644
index 0000000..6ed1c28
--- /dev/null
+++ b/queue/vfs-fdtable-prevent-bounds-check-bypass-via-speculative-execution.patch
@@ -0,0 +1,54 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:03:05 -0800
+Subject: vfs, fdtable: Prevent bounds-check bypass via speculative execution
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit 56c30ba7b348b90484969054d561f711ba196507)
+
+'fd' is a user controlled value that is used as a data dependency to
+read from the 'fdt->fd' array. In order to avoid potential leaks of
+kernel memory values, block speculative execution of the instruction
+stream that could issue reads based on an invalid 'file *' returned from
+__fcheck_files.
+
+Co-developed-by: Elena Reshetova <elena.reshetova@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727418500.33451.17392199002892248656.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/fdtable.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/linux/fdtable.h
++++ b/include/linux/fdtable.h
+@@ -9,6 +9,7 @@
+ #include <linux/compiler.h>
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
++#include <linux/nospec.h>
+ #include <linux/types.h>
+ #include <linux/init.h>
+ #include <linux/fs.h>
+@@ -81,8 +82,10 @@ static inline struct file *__fcheck_file
+ {
+ struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+
+- if (fd < fdt->max_fds)
++ if (fd < fdt->max_fds) {
++ fd = array_index_nospec(fd, fdt->max_fds);
+ return rcu_dereference_raw(fdt->fd[fd]);
++ }
+ return NULL;
+ }
+
diff --git a/queue/vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch b/queue/vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch
new file mode 100644
index 0000000..bce5e67
--- /dev/null
+++ b/queue/vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch
@@ -0,0 +1,72 @@
+From ben.hutchings@codethink.co.uk Fri Jan 26 17:35:59 2018
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Fri, 26 Jan 2018 16:23:02 +0000
+Subject: vsyscall: Fix permissions for emulate mode with KAISER/PTI
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Borislav Petkov <bp@suse.de>, Hugh Dickins <hughd@google.com>, stable@vger.kernel.org
+Message-ID: <20180126162302.ei4tmiltl73npmr6@xylophone.i.decadent.org.uk>
+
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+
+The backport of KAISER to 4.4 turned vsyscall emulate mode into native
+mode. Add a vsyscall_pgprot variable to hold the correct page
+protections, like Borislav and Hugh did for 3.2 and 3.18.
+
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ arch/x86/entry/vsyscall/vsyscall_64.c | 7 ++++---
+ arch/x86/include/asm/vsyscall.h | 1 +
+ arch/x86/mm/kaiser.c | 2 +-
+ 3 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/vsyscall/vsyscall_64.c
++++ b/arch/x86/entry/vsyscall/vsyscall_64.c
+@@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vs
+ #else
+ EMULATE;
+ #endif
++unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL;
+
+ static int __init vsyscall_setup(char *str)
+ {
+@@ -336,11 +337,11 @@ void __init map_vsyscall(void)
+ extern char __vsyscall_page;
+ unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+
++ if (vsyscall_mode != NATIVE)
++ vsyscall_pgprot = __PAGE_KERNEL_VVAR;
+ if (vsyscall_mode != NONE)
+ __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
+- vsyscall_mode == NATIVE
+- ? PAGE_KERNEL_VSYSCALL
+- : PAGE_KERNEL_VVAR);
++ __pgprot(vsyscall_pgprot));
+
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
+ (unsigned long)VSYSCALL_ADDR);
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -13,6 +13,7 @@ extern void map_vsyscall(void);
+ */
+ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+ extern bool vsyscall_enabled(void);
++extern unsigned long vsyscall_pgprot;
+ #else
+ static inline void map_vsyscall(void) {}
+ static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -344,7 +344,7 @@ void __init kaiser_init(void)
+ if (vsyscall_enabled())
+ kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
+ PAGE_SIZE,
+- __PAGE_KERNEL_VSYSCALL);
++ vsyscall_pgprot);
+
+ for_each_possible_cpu(cpu) {
+ void *percpu_vaddr = __per_cpu_user_mapped_start +
diff --git a/queue/x86-alternatives-add-missing-n-at-end-of-alternative-inline-asm.patch b/queue/x86-alternatives-add-missing-n-at-end-of-alternative-inline-asm.patch
new file mode 100644
index 0000000..483e564
--- /dev/null
+++ b/queue/x86-alternatives-add-missing-n-at-end-of-alternative-inline-asm.patch
@@ -0,0 +1,56 @@
+From b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 4 Jan 2018 14:37:05 +0000
+Subject: x86/alternatives: Add missing '\n' at end of ALTERNATIVE inline asm
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 upstream.
+
+Where an ALTERNATIVE is used in the middle of an inline asm block, this
+would otherwise lead to the following instruction being appended directly
+to the trailing ".popsection", and a failed compile.
+
+Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: ak@linux.intel.com
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/alternative.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -139,7 +139,7 @@ static inline int alternatives_text_rese
+ ".popsection\n" \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
+- ".popsection"
++ ".popsection\n"
+
+ #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
+ OLDINSTR_2(oldinstr, 1, 2) \
+@@ -150,7 +150,7 @@ static inline int alternatives_text_rese
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
+ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
+- ".popsection"
++ ".popsection\n"
+
+ /*
+ * Alternative instructions for different CPU types or capabilities.
diff --git a/queue/x86-alternatives-fix-optimize_nops-checking.patch b/queue/x86-alternatives-fix-optimize_nops-checking.patch
new file mode 100644
index 0000000..3574563
--- /dev/null
+++ b/queue/x86-alternatives-fix-optimize_nops-checking.patch
@@ -0,0 +1,53 @@
+From 612e8e9350fd19cae6900cf36ea0c6892d1a0dca Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Wed, 10 Jan 2018 12:28:16 +0100
+Subject: x86/alternatives: Fix optimize_nops() checking
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 612e8e9350fd19cae6900cf36ea0c6892d1a0dca upstream.
+
+The alternatives code checks only the first byte whether it is a NOP, but
+with NOPs in front of the payload and having actual instructions after it
+breaks the "optimized' test.
+
+Make sure to scan all bytes before deciding to optimize the NOPs in there.
+
+Reported-by: David Woodhouse <dwmw2@infradead.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Andrew Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -340,9 +340,12 @@ done:
+ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+ {
+ unsigned long flags;
++ int i;
+
+- if (instr[0] != 0x90)
+- return;
++ for (i = 0; i < a->padlen; i++) {
++ if (instr[i] != 0x90)
++ return;
++ }
+
+ local_irq_save(flags);
+ add_nops(instr + (a->instrlen - a->padlen), a->padlen);
diff --git a/queue/x86-asm-move-status-from-thread_struct-to-thread_info.patch b/queue/x86-asm-move-status-from-thread_struct-to-thread_info.patch
new file mode 100644
index 0000000..b22fc0a
--- /dev/null
+++ b/queue/x86-asm-move-status-from-thread_struct-to-thread_info.patch
@@ -0,0 +1,172 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 Jan 2018 10:38:50 -0800
+Subject: x86/asm: Move 'status' from thread_struct to thread_info
+
+From: Andy Lutomirski <luto@kernel.org>
+
+
+(cherry picked from commit 37a8f7c38339b22b69876d6f5a0ab851565284e3)
+
+The TS_COMPAT bit is very hot and is accessed from code paths that mostly
+also touch thread_info::flags. Move it into struct thread_info to improve
+cache locality.
+
+The only reason it was in thread_struct is that there was a brief period
+during which arch-specific fields were not allowed in struct thread_info.
+
+Linus suggested further changing:
+
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+
+to:
+
+ if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED)))
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+
+on the theory that frequently dirtying the cacheline even in pure 64-bit
+code that never needs to modify status hurts performance. That could be a
+reasonable followup patch, but I suspect it matters less on top of this
+patch.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
+Link: https://lkml.kernel.org/r/03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/common.c | 4 ++--
+ arch/x86/include/asm/processor.h | 2 --
+ arch/x86/include/asm/syscall.h | 6 +++---
+ arch/x86/include/asm/thread_info.h | 3 ++-
+ arch/x86/kernel/process_64.c | 4 ++--
+ arch/x86/kernel/ptrace.c | 2 +-
+ arch/x86/kernel/signal.c | 2 +-
+ 7 files changed, 11 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -201,7 +201,7 @@ __visible inline void prepare_exit_to_us
+ * special case only applies after poking regs and before the
+ * very next return to user mode.
+ */
+- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
++ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+ #endif
+
+ user_enter_irqoff();
+@@ -299,7 +299,7 @@ static __always_inline void do_syscall_3
+ unsigned int nr = (unsigned int)regs->orig_ax;
+
+ #ifdef CONFIG_IA32_EMULATION
+- current->thread.status |= TS_COMPAT;
++ ti->status |= TS_COMPAT;
+ #endif
+
+ if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -391,8 +391,6 @@ struct thread_struct {
+ unsigned short gsindex;
+ #endif
+
+- u32 status; /* thread synchronous flags */
+-
+ #ifdef CONFIG_X86_64
+ unsigned long fsbase;
+ unsigned long gsbase;
+--- a/arch/x86/include/asm/syscall.h
++++ b/arch/x86/include/asm/syscall.h
+@@ -60,7 +60,7 @@ static inline long syscall_get_error(str
+ * TS_COMPAT is set for 32-bit syscall entries and then
+ * remains set until we return to user mode.
+ */
+- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
++ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ /*
+ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ * and will match correctly in comparisons.
+@@ -116,7 +116,7 @@ static inline void syscall_get_arguments
+ unsigned long *args)
+ {
+ # ifdef CONFIG_IA32_EMULATION
+- if (task->thread.status & TS_COMPAT)
++ if (task->thread_info.status & TS_COMPAT)
+ switch (i) {
+ case 0:
+ if (!n--) break;
+@@ -177,7 +177,7 @@ static inline void syscall_set_arguments
+ const unsigned long *args)
+ {
+ # ifdef CONFIG_IA32_EMULATION
+- if (task->thread.status & TS_COMPAT)
++ if (task->thread_info.status & TS_COMPAT)
+ switch (i) {
+ case 0:
+ if (!n--) break;
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -54,6 +54,7 @@ struct task_struct;
+
+ struct thread_info {
+ unsigned long flags; /* low level flags */
++ u32 status; /* thread synchronous flags */
+ };
+
+ #define INIT_THREAD_INFO(tsk) \
+@@ -213,7 +214,7 @@ static inline int arch_within_stack_fram
+ #define in_ia32_syscall() true
+ #else
+ #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
+- current->thread.status & TS_COMPAT)
++ current_thread_info()->status & TS_COMPAT)
+ #endif
+
+ /*
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -538,7 +538,7 @@ void set_personality_ia32(bool x32)
+ current->personality &= ~READ_IMPLIES_EXEC;
+ /* in_compat_syscall() uses the presence of the x32
+ syscall bit flag to determine compat status */
+- current->thread.status &= ~TS_COMPAT;
++ current_thread_info()->status &= ~TS_COMPAT;
+ } else {
+ set_thread_flag(TIF_IA32);
+ clear_thread_flag(TIF_X32);
+@@ -546,7 +546,7 @@ void set_personality_ia32(bool x32)
+ current->mm->context.ia32_compat = TIF_IA32;
+ current->personality |= force_personality32;
+ /* Prepare the first "return" to user space */
+- current->thread.status |= TS_COMPAT;
++ current_thread_info()->status |= TS_COMPAT;
+ }
+ }
+ EXPORT_SYMBOL_GPL(set_personality_ia32);
+--- a/arch/x86/kernel/ptrace.c
++++ b/arch/x86/kernel/ptrace.c
+@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *
+ */
+ regs->orig_ax = value;
+ if (syscall_get_nr(child, regs) >= 0)
+- child->thread.status |= TS_I386_REGS_POKED;
++ child->thread_info.status |= TS_I386_REGS_POKED;
+ break;
+
+ case offsetof(struct user32, regs.eflags):
+--- a/arch/x86/kernel/signal.c
++++ b/arch/x86/kernel/signal.c
+@@ -785,7 +785,7 @@ static inline unsigned long get_nr_resta
+ * than the tracee.
+ */
+ #ifdef CONFIG_IA32_EMULATION
+- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
++ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+ return __NR_ia32_restart_syscall;
+ #endif
+ #ifdef CONFIG_X86_X32_ABI
diff --git a/queue/x86-asm-use-register-variable-to-get-stack-pointer-value.patch b/queue/x86-asm-use-register-variable-to-get-stack-pointer-value.patch
new file mode 100644
index 0000000..d1036be
--- /dev/null
+++ b/queue/x86-asm-use-register-variable-to-get-stack-pointer-value.patch
@@ -0,0 +1,138 @@
+From 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc Mon Sep 17 00:00:00 2001
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Fri, 29 Sep 2017 17:15:36 +0300
+Subject: x86/asm: Use register variable to get stack pointer value
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+commit 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc upstream.
+
+Currently we use current_stack_pointer() function to get the value
+of the stack pointer register. Since commit:
+
+ f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang")
+
+... we have a stack register variable declared. It can be used instead of
+current_stack_pointer() function which allows to optimize away some
+excessive "mov %rsp, %<dst>" instructions:
+
+ -mov %rsp,%rdx
+ -sub %rdx,%rax
+ -cmp $0x3fff,%rax
+ -ja ffffffff810722fd <ist_begin_non_atomic+0x2d>
+
+ +sub %rsp,%rax
+ +cmp $0x3fff,%rax
+ +ja ffffffff810722fa <ist_begin_non_atomic+0x2a>
+
+Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer
+and use it instead of the removed function.
+
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@virtuozzo.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[dwmw2: We want ASM_CALL_CONSTRAINT for retpoline]
+Signed-off-by: David Woodhouse <dwmw@amazon.co.ku>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm.h | 11 +++++++++++
+ arch/x86/include/asm/thread_info.h | 11 -----------
+ arch/x86/kernel/irq_32.c | 6 +++---
+ arch/x86/kernel/traps.c | 2 +-
+ arch/x86/mm/tlb.c | 2 +-
+ 5 files changed, 16 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/include/asm/asm.h
++++ b/arch/x86/include/asm/asm.h
+@@ -125,4 +125,15 @@
+ /* For C file, we already have NOKPROBE_SYMBOL macro */
+ #endif
+
++#ifndef __ASSEMBLY__
++/*
++ * This output constraint should be used for any inline asm which has a "call"
++ * instruction. Otherwise the asm may be inserted before the frame pointer
++ * gets set up by the containing function. If you forget to do this, objtool
++ * may print a "call without frame pointer save/setup" warning.
++ */
++register unsigned long current_stack_pointer asm(_ASM_SP);
++#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
++#endif
++
+ #endif /* _ASM_X86_ASM_H */
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -152,17 +152,6 @@ struct thread_info {
+ */
+ #ifndef __ASSEMBLY__
+
+-static inline unsigned long current_stack_pointer(void)
+-{
+- unsigned long sp;
+-#ifdef CONFIG_X86_64
+- asm("mov %%rsp,%0" : "=g" (sp));
+-#else
+- asm("mov %%esp,%0" : "=g" (sp));
+-#endif
+- return sp;
+-}
+-
+ /*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+--- a/arch/x86/kernel/irq_32.c
++++ b/arch/x86/kernel/irq_32.c
+@@ -64,7 +64,7 @@ static void call_on_stack(void *func, vo
+
+ static inline void *current_stack(void)
+ {
+- return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
++ return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+ }
+
+ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
+@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(i
+
+ /* Save the next esp at the bottom of the stack */
+ prev_esp = (u32 *)irqstk;
+- *prev_esp = current_stack_pointer();
++ *prev_esp = current_stack_pointer;
+
+ if (unlikely(overflow))
+ call_on_stack(print_stack_overflow, isp);
+@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
+
+ /* Push the previous esp onto the stack */
+ prev_esp = (u32 *)irqstk;
+- *prev_esp = current_stack_pointer();
++ *prev_esp = current_stack_pointer;
+
+ call_on_stack(__do_softirq, isp);
+ }
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs
+ * from double_fault.
+ */
+ BUG_ON((unsigned long)(current_top_of_stack() -
+- current_stack_pointer()) >= THREAD_SIZE);
++ current_stack_pointer) >= THREAD_SIZE);
+
+ preempt_enable_no_resched();
+ }
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -110,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct
+ * mapped in the new pgd, we'll double-fault. Forcibly
+ * map it.
+ */
+- unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
++ unsigned int stack_pgd_index = pgd_index(current_stack_pointer);
+
+ pgd_t *pgd = next->pgd + stack_pgd_index;
+
diff --git a/queue/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch b/queue/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
new file mode 100644
index 0000000..bb7c7d3
--- /dev/null
+++ b/queue/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
@@ -0,0 +1,178 @@
+From e505371dd83963caae1a37ead9524e8d997341be Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 17 Jul 2017 16:10:33 -0500
+Subject: x86/boot: Add early cmdline parsing for options with arguments
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit e505371dd83963caae1a37ead9524e8d997341be upstream.
+
+Add a cmdline_find_option() function to look for cmdline options that
+take arguments. The argument is returned in a supplied buffer and the
+argument length (regardless of whether it fits in the supplied buffer)
+is returned, with -1 indicating not found.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Larry Woodman <lwoodman@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matt Fleming <matt@codeblueprint.co.uk>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Toshimitsu Kani <toshi.kani@hpe.com>
+Cc: kasan-dev@googlegroups.com
+Cc: kvm@vger.kernel.org
+Cc: linux-arch@vger.kernel.org
+Cc: linux-doc@vger.kernel.org
+Cc: linux-efi@vger.kernel.org
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cmdline.h | 2
+ arch/x86/lib/cmdline.c | 105 +++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 107 insertions(+)
+
+--- a/arch/x86/include/asm/cmdline.h
++++ b/arch/x86/include/asm/cmdline.h
+@@ -2,5 +2,7 @@
+ #define _ASM_X86_CMDLINE_H
+
+ int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
++int cmdline_find_option(const char *cmdline_ptr, const char *option,
++ char *buffer, int bufsize);
+
+ #endif /* _ASM_X86_CMDLINE_H */
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *c
+ return 0; /* Buffer overrun */
+ }
+
++/*
++ * Find a non-boolean option (i.e. option=argument). In accordance with
++ * standard Linux practice, if this option is repeated, this returns the
++ * last instance on the command line.
++ *
++ * @cmdline: the cmdline string
++ * @max_cmdline_size: the maximum size of cmdline
++ * @option: option string to look for
++ * @buffer: memory buffer to return the option argument
++ * @bufsize: size of the supplied memory buffer
++ *
++ * Returns the length of the argument (regardless of if it was
++ * truncated to fit in the buffer), or -1 on not found.
++ */
++static int
++__cmdline_find_option(const char *cmdline, int max_cmdline_size,
++ const char *option, char *buffer, int bufsize)
++{
++ char c;
++ int pos = 0, len = -1;
++ const char *opptr = NULL;
++ char *bufptr = buffer;
++ enum {
++ st_wordstart = 0, /* Start of word/after whitespace */
++ st_wordcmp, /* Comparing this word */
++ st_wordskip, /* Miscompare, skip */
++ st_bufcpy, /* Copying this to buffer */
++ } state = st_wordstart;
++
++ if (!cmdline)
++ return -1; /* No command line */
++
++ /*
++ * This 'pos' check ensures we do not overrun
++ * a non-NULL-terminated 'cmdline'
++ */
++ while (pos++ < max_cmdline_size) {
++ c = *(char *)cmdline++;
++ if (!c)
++ break;
++
++ switch (state) {
++ case st_wordstart:
++ if (myisspace(c))
++ break;
++
++ state = st_wordcmp;
++ opptr = option;
++ /* fall through */
++
++ case st_wordcmp:
++ if ((c == '=') && !*opptr) {
++ /*
++ * We matched all the way to the end of the
++ * option we were looking for, prepare to
++ * copy the argument.
++ */
++ len = 0;
++ bufptr = buffer;
++ state = st_bufcpy;
++ break;
++ } else if (c == *opptr++) {
++ /*
++ * We are currently matching, so continue
++ * to the next character on the cmdline.
++ */
++ break;
++ }
++ state = st_wordskip;
++ /* fall through */
++
++ case st_wordskip:
++ if (myisspace(c))
++ state = st_wordstart;
++ break;
++
++ case st_bufcpy:
++ if (myisspace(c)) {
++ state = st_wordstart;
++ } else {
++ /*
++ * Increment len, but don't overrun the
++ * supplied buffer and leave room for the
++ * NULL terminator.
++ */
++ if (++len < bufsize)
++ *bufptr++ = c;
++ }
++ break;
++ }
++ }
++
++ if (bufsize)
++ *bufptr = '\0';
++
++ return len;
++}
++
+ int cmdline_find_option_bool(const char *cmdline, const char *option)
+ {
+ return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
+ }
++
++int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
++ int bufsize)
++{
++ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
++ buffer, bufsize);
++}
diff --git a/queue/x86-bugs-drop-one-mitigation-from-dmesg.patch b/queue/x86-bugs-drop-one-mitigation-from-dmesg.patch
new file mode 100644
index 0000000..d9380c1
--- /dev/null
+++ b/queue/x86-bugs-drop-one-mitigation-from-dmesg.patch
@@ -0,0 +1,52 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 26 Jan 2018 13:11:39 +0100
+Subject: x86/bugs: Drop one "mitigation" from dmesg
+
+From: Borislav Petkov <bp@suse.de>
+
+(cherry picked from commit 55fa19d3e51f33d9cd4056d25836d93abf9438db)
+
+Make
+
+[ 0.031118] Spectre V2 mitigation: Mitigation: Full generic retpoline
+
+into
+
+[ 0.031118] Spectre V2: Mitigation: Full generic retpoline
+
+to reduce the mitigation mitigations strings.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: riel@redhat.com
+Cc: ak@linux.intel.com
+Cc: peterz@infradead.org
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: jikos@kernel.org
+Cc: luto@amacapital.net
+Cc: dave.hansen@intel.com
+Cc: torvalds@linux-foundation.org
+Cc: keescook@google.com
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: tim.c.chen@linux.intel.com
+Cc: pjt@google.com
+Link: https://lkml.kernel.org/r/20180126121139.31959-5-bp@alien8.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -90,7 +90,7 @@ static const char *spectre_v2_strings[]
+ };
+
+ #undef pr_fmt
+-#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
++#define pr_fmt(fmt) "Spectre V2 : " fmt
+
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+ static bool spectre_v2_bad_module;
diff --git a/queue/x86-cpu-amd-make-lfence-a-serializing-instruction.patch b/queue/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
new file mode 100644
index 0000000..db8ec7a
--- /dev/null
+++ b/queue/x86-cpu-amd-make-lfence-a-serializing-instruction.patch
@@ -0,0 +1,66 @@
+From e4d0e84e490790798691aaa0f2e598637f1867ec Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 8 Jan 2018 16:09:21 -0600
+Subject: x86/cpu/AMD: Make LFENCE a serializing instruction
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit e4d0e84e490790798691aaa0f2e598637f1867ec upstream.
+
+To aid in speculation control, make LFENCE a serializing instruction
+since it has less overhead than MFENCE. This is done by setting bit 1
+of MSR 0xc0011029 (DE_CFG). Some families that support LFENCE do not
+have this MSR. For these families, the LFENCE instruction is already
+serializing.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/msr-index.h | 2 ++
+ arch/x86/kernel/cpu/amd.c | 10 ++++++++++
+ 2 files changed, 12 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -330,6 +330,8 @@
+ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
+ #define FAM10H_MMIO_CONF_BASE_SHIFT 20
+ #define MSR_FAM10H_NODE_ID 0xc001100c
++#define MSR_F10H_DECFG 0xc0011029
++#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1 0xc001001a
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -782,6 +782,16 @@ static void init_amd(struct cpuinfo_x86
+ set_cpu_cap(c, X86_FEATURE_K8);
+
+ if (cpu_has(c, X86_FEATURE_XMM2)) {
++ /*
++ * A serializing LFENCE has less overhead than MFENCE, so
++ * use it for execution serialization. On families which
++ * don't have that MSR, LFENCE is already serializing.
++ * msr_set_bit() uses the safe accessors, too, even if the MSR
++ * is not present.
++ */
++ msr_set_bit(MSR_F10H_DECFG,
++ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
++
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
diff --git a/queue/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch b/queue/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
new file mode 100644
index 0000000..8c8a0ce
--- /dev/null
+++ b/queue/x86-cpu-amd-use-lfence_rdtsc-in-preference-to-mfence_rdtsc.patch
@@ -0,0 +1,81 @@
+From 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Mon, 8 Jan 2018 16:09:32 -0600
+Subject: x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f upstream.
+
+With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference
+to MFENCE_RDTSC. However, since the kernel could be running under a
+hypervisor that does not support writing that MSR, read the MSR back and
+verify that the bit has been set successfully. If the MSR can be read
+and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the
+MFENCE_RDTSC feature.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/msr-index.h | 1 +
+ arch/x86/kernel/cpu/amd.c | 18 ++++++++++++++++--
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -332,6 +332,7 @@
+ #define MSR_FAM10H_NODE_ID 0xc001100c
+ #define MSR_F10H_DECFG 0xc0011029
+ #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
++#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+ /* K8 MSRs */
+ #define MSR_K8_TOP_MEM1 0xc001001a
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -782,6 +782,9 @@ static void init_amd(struct cpuinfo_x86
+ set_cpu_cap(c, X86_FEATURE_K8);
+
+ if (cpu_has(c, X86_FEATURE_XMM2)) {
++ unsigned long long val;
++ int ret;
++
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+@@ -792,8 +795,19 @@ static void init_amd(struct cpuinfo_x86
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+- /* MFENCE stops RDTSC speculation */
+- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
++ /*
++ * Verify that the MSR write was successful (could be running
++ * under a hypervisor) and only then assume that LFENCE is
++ * serializing.
++ */
++ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
++ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
++ /* A serializing LFENCE stops RDTSC speculation */
++ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
++ } else {
++ /* MFENCE stops RDTSC speculation */
++ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
++ }
+ }
+
+ /*
diff --git a/queue/x86-cpu-bugs-make-retpoline-module-warning-conditional.patch b/queue/x86-cpu-bugs-make-retpoline-module-warning-conditional.patch
new file mode 100644
index 0000000..f8a1204
--- /dev/null
+++ b/queue/x86-cpu-bugs-make-retpoline-module-warning-conditional.patch
@@ -0,0 +1,66 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 27 Jan 2018 15:45:14 +0100
+Subject: x86/cpu/bugs: Make retpoline module warning conditional
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+(cherry picked from commit e383095c7fe8d218e00ec0f83e4b95ed4e627b02)
+
+If sysfs is disabled and RETPOLINE not defined:
+
+arch/x86/kernel/cpu/bugs.c:97:13: warning: ‘spectre_v2_bad_module’ defined but not used
+[-Wunused-variable]
+ static bool spectre_v2_bad_module;
+
+Hide it.
+
+Fixes: caf7501a1b4e ("module/retpoline: Warn about missing retpoline in module")
+Reported-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -93,9 +93,10 @@ static const char *spectre_v2_strings[]
+ #define pr_fmt(fmt) "Spectre V2 : " fmt
+
+ static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+-static bool spectre_v2_bad_module;
+
+ #ifdef RETPOLINE
++static bool spectre_v2_bad_module;
++
+ bool retpoline_module_ok(bool has_retpoline)
+ {
+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
+@@ -105,6 +106,13 @@ bool retpoline_module_ok(bool has_retpol
+ spectre_v2_bad_module = true;
+ return false;
+ }
++
++static inline const char *spectre_v2_module_string(void)
++{
++ return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
++}
++#else
++static inline const char *spectre_v2_module_string(void) { return ""; }
+ #endif
+
+ static void __init spec2_print_if_insecure(const char *reason)
+@@ -299,7 +307,7 @@ ssize_t cpu_show_spectre_v2(struct devic
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+- boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "",
+- spectre_v2_bad_module ? " - vulnerable module loaded" : "");
++ boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "",
++ spectre_v2_module_string());
+ }
+ #endif
diff --git a/queue/x86-cpu-factor-out-application-of-forced-cpu-caps.patch b/queue/x86-cpu-factor-out-application-of-forced-cpu-caps.patch
new file mode 100644
index 0000000..8c79c8f
--- /dev/null
+++ b/queue/x86-cpu-factor-out-application-of-forced-cpu-caps.patch
@@ -0,0 +1,79 @@
+From 8bf1ebca215c262e48c15a4a15f175991776f57f Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 18 Jan 2017 11:15:38 -0800
+Subject: x86/cpu: Factor out application of forced CPU caps
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 8bf1ebca215c262e48c15a4a15f175991776f57f upstream.
+
+There are multiple call sites that apply forced CPU caps. Factor
+them into a helper.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Fenghua Yu <fenghua.yu@intel.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matthew Whitehead <tedheadster@gmail.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
+Link: http://lkml.kernel.org/r/623ff7555488122143e4417de09b18be2085ad06.1484705016.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/common.c | 20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -706,6 +706,16 @@ void cpu_detect(struct cpuinfo_x86 *c)
+ }
+ }
+
++static void apply_forced_caps(struct cpuinfo_x86 *c)
++{
++ int i;
++
++ for (i = 0; i < NCAPINTS; i++) {
++ c->x86_capability[i] &= ~cpu_caps_cleared[i];
++ c->x86_capability[i] |= cpu_caps_set[i];
++ }
++}
++
+ void get_cpu_cap(struct cpuinfo_x86 *c)
+ {
+ u32 eax, ebx, ecx, edx;
+@@ -1086,10 +1096,7 @@ static void identify_cpu(struct cpuinfo_
+ this_cpu->c_identify(c);
+
+ /* Clear/Set all flags overridden by options, after probe */
+- for (i = 0; i < NCAPINTS; i++) {
+- c->x86_capability[i] &= ~cpu_caps_cleared[i];
+- c->x86_capability[i] |= cpu_caps_set[i];
+- }
++ apply_forced_caps(c);
+
+ #ifdef CONFIG_X86_64
+ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
+@@ -1151,10 +1158,7 @@ static void identify_cpu(struct cpuinfo_
+ * Clear/Set all flags overridden by options, need do it
+ * before following smp all cpus cap AND.
+ */
+- for (i = 0; i < NCAPINTS; i++) {
+- c->x86_capability[i] &= ~cpu_caps_cleared[i];
+- c->x86_capability[i] |= cpu_caps_set[i];
+- }
++ apply_forced_caps(c);
+
+ /*
+ * On SMP, boot_cpu_data holds the common feature set between
diff --git a/queue/x86-cpu-implement-cpu-vulnerabilites-sysfs-functions.patch b/queue/x86-cpu-implement-cpu-vulnerabilites-sysfs-functions.patch
new file mode 100644
index 0000000..3d30088
--- /dev/null
+++ b/queue/x86-cpu-implement-cpu-vulnerabilites-sysfs-functions.patch
@@ -0,0 +1,82 @@
+From 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 7 Jan 2018 22:48:01 +0100
+Subject: x86/cpu: Implement CPU vulnerabilites sysfs functions
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 upstream.
+
+Implement the CPU vulnerabilty show functions for meltdown, spectre_v1 and
+spectre_v2.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lkml.kernel.org/r/20180107214913.177414879@linutronix.de
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/Kconfig | 1 +
+ arch/x86/kernel/cpu/bugs.c | 29 +++++++++++++++++++++++++++++
+ 2 files changed, 30 insertions(+)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -64,6 +64,7 @@ config X86
+ select GENERIC_CLOCKEVENTS_MIN_ADJUST
+ select GENERIC_CMOS_UPDATE
+ select GENERIC_CPU_AUTOPROBE
++ select GENERIC_CPU_VULNERABILITIES
+ select GENERIC_EARLY_IOREMAP
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_IOMAP
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -9,6 +9,7 @@
+ */
+ #include <linux/init.h>
+ #include <linux/utsname.h>
++#include <linux/cpu.h>
+ #include <asm/bugs.h>
+ #include <asm/processor.h>
+ #include <asm/processor-flags.h>
+@@ -67,3 +68,31 @@ void __init check_bugs(void)
+ set_memory_4k((unsigned long)__va(0), 1);
+ #endif
+ }
++
++#ifdef CONFIG_SYSFS
++ssize_t cpu_show_meltdown(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
++ return sprintf(buf, "Not affected\n");
++ if (boot_cpu_has(X86_FEATURE_KAISER))
++ return sprintf(buf, "Mitigation: PTI\n");
++ return sprintf(buf, "Vulnerable\n");
++}
++
++ssize_t cpu_show_spectre_v1(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
++ return sprintf(buf, "Not affected\n");
++ return sprintf(buf, "Vulnerable\n");
++}
++
++ssize_t cpu_show_spectre_v2(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
++ return sprintf(buf, "Not affected\n");
++ return sprintf(buf, "Vulnerable\n");
++}
++#endif
diff --git a/queue/x86-cpu-merge-bugs.c-and-bugs_64.c.patch b/queue/x86-cpu-merge-bugs.c-and-bugs_64.c.patch
new file mode 100644
index 0000000..cceebca
--- /dev/null
+++ b/queue/x86-cpu-merge-bugs.c-and-bugs_64.c.patch
@@ -0,0 +1,136 @@
+From 62a67e123e058a67db58bc6a14354dd037bafd0a Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 24 Oct 2016 19:38:43 +0200
+Subject: x86/cpu: Merge bugs.c and bugs_64.c
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 62a67e123e058a67db58bc6a14354dd037bafd0a upstream.
+
+Should be easier when following boot paths. It probably is a left over
+from the x86 unification eons ago.
+
+No functionality change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/20161024173844.23038-3-bp@alien8.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/Makefile | 4 +---
+ arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++----
+ arch/x86/kernel/cpu/bugs_64.c | 33 ---------------------------------
+ 3 files changed, 23 insertions(+), 40 deletions(-)
+ delete mode 100644 arch/x86/kernel/cpu/bugs_64.c
+
+--- a/arch/x86/kernel/cpu/Makefile
++++ b/arch/x86/kernel/cpu/Makefile
+@@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o
+ obj-y += common.o
+ obj-y += rdrand.o
+ obj-y += match.o
++obj-y += bugs.o
+
+ obj-$(CONFIG_PROC_FS) += proc.o
+ obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+
+-obj-$(CONFIG_X86_32) += bugs.o
+-obj-$(CONFIG_X86_64) += bugs_64.o
+-
+ obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
+ obj-$(CONFIG_CPU_SUP_AMD) += amd.o
+ obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -16,6 +16,8 @@
+ #include <asm/msr.h>
+ #include <asm/paravirt.h>
+ #include <asm/alternative.h>
++#include <asm/pgtable.h>
++#include <asm/cacheflush.h>
+
+ void __init check_bugs(void)
+ {
+@@ -28,11 +30,13 @@ void __init check_bugs(void)
+ #endif
+
+ identify_boot_cpu();
+-#ifndef CONFIG_SMP
+- pr_info("CPU: ");
+- print_cpu_info(&boot_cpu_data);
+-#endif
+
++ if (!IS_ENABLED(CONFIG_SMP)) {
++ pr_info("CPU: ");
++ print_cpu_info(&boot_cpu_data);
++ }
++
++#ifdef CONFIG_X86_32
+ /*
+ * Check whether we are able to run this kernel safely on SMP.
+ *
+@@ -48,4 +52,18 @@ void __init check_bugs(void)
+ alternative_instructions();
+
+ fpu__init_check_bugs();
++#else /* CONFIG_X86_64 */
++ alternative_instructions();
++
++ /*
++ * Make sure the first 2MB area is not mapped by huge pages
++ * There are typically fixed size MTRRs in there and overlapping
++ * MTRRs into large pages causes slow downs.
++ *
++ * Right now we don't do that with gbpages because there seems
++ * very little benefit for that case.
++ */
++ if (!direct_gbpages)
++ set_memory_4k((unsigned long)__va(0), 1);
++#endif
+ }
+--- a/arch/x86/kernel/cpu/bugs_64.c
++++ /dev/null
+@@ -1,33 +0,0 @@
+-/*
+- * Copyright (C) 1994 Linus Torvalds
+- * Copyright (C) 2000 SuSE
+- */
+-
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <asm/alternative.h>
+-#include <asm/bugs.h>
+-#include <asm/processor.h>
+-#include <asm/mtrr.h>
+-#include <asm/cacheflush.h>
+-
+-void __init check_bugs(void)
+-{
+- identify_boot_cpu();
+-#if !defined(CONFIG_SMP)
+- pr_info("CPU: ");
+- print_cpu_info(&boot_cpu_data);
+-#endif
+- alternative_instructions();
+-
+- /*
+- * Make sure the first 2MB area is not mapped by huge pages
+- * There are typically fixed size MTRRs in there and overlapping
+- * MTRRs into large pages causes slow downs.
+- *
+- * Right now we don't do that with gbpages because there seems
+- * very little benefit for that case.
+- */
+- if (!direct_gbpages)
+- set_memory_4k((unsigned long)__va(0), 1);
+-}
diff --git a/queue/x86-cpu-x86-pti-do-not-enable-pti-on-amd-processors.patch b/queue/x86-cpu-x86-pti-do-not-enable-pti-on-amd-processors.patch
new file mode 100644
index 0000000..7a7cbec
--- /dev/null
+++ b/queue/x86-cpu-x86-pti-do-not-enable-pti-on-amd-processors.patch
@@ -0,0 +1,46 @@
+From 694d99d40972f12e59a3696effee8a376b79d7c8 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Tue, 26 Dec 2017 23:43:54 -0600
+Subject: x86/cpu, x86/pti: Do not enable PTI on AMD processors
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit 694d99d40972f12e59a3696effee8a376b79d7c8 upstream.
+
+AMD processors are not subject to the types of attacks that the kernel
+page table isolation feature protects against. The AMD microarchitecture
+does not allow memory references, including speculative references, that
+access higher privileged data when running in a lesser privileged mode
+when that access would result in a page fault.
+
+Disable page table isolation by default on AMD processors by not setting
+the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI
+is set.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net
+Cc: Nick Lowe <nick.lowe@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/common.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -883,8 +883,8 @@ static void __init early_identify_cpu(st
+
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+- /* Assume for now that ALL x86 CPUs are insecure */
+- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++ if (c->x86_vendor != X86_VENDOR_AMD)
++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
diff --git a/queue/x86-cpufeature-blacklist-spec_ctrl-pred_cmd-on-early-spectre-v2-microcodes.patch b/queue/x86-cpufeature-blacklist-spec_ctrl-pred_cmd-on-early-spectre-v2-microcodes.patch
new file mode 100644
index 0000000..6397ee8
--- /dev/null
+++ b/queue/x86-cpufeature-blacklist-spec_ctrl-pred_cmd-on-early-spectre-v2-microcodes.patch
@@ -0,0 +1,167 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:14 +0000
+Subject: x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit a5b2966364538a0e68c9fa29bc0a3a1651799035)
+
+This doesn't refuse to load the affected microcodes; it just refuses to
+use the Spectre v2 mitigation features if they're detected, by clearing
+the appropriate feature bits.
+
+The AMD CPUID bits are handled here too, because hypervisors *may* have
+been exposing those bits even on Intel chips, for fine-grained control
+of what's available.
+
+It is non-trivial to use x86_match_cpu() for this table because that
+doesn't handle steppings. And the approach taken in commit bd9240a18
+almost made me lose my lunch.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/intel-family.h | 7 ++-
+ arch/x86/kernel/cpu/intel.c | 66 ++++++++++++++++++++++++++++++++++++
+ 2 files changed, 71 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/intel-family.h
++++ b/arch/x86/include/asm/intel-family.h
+@@ -12,6 +12,7 @@
+ */
+
+ #define INTEL_FAM6_CORE_YONAH 0x0E
++
+ #define INTEL_FAM6_CORE2_MEROM 0x0F
+ #define INTEL_FAM6_CORE2_MEROM_L 0x16
+ #define INTEL_FAM6_CORE2_PENRYN 0x17
+@@ -21,6 +22,7 @@
+ #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
+ #define INTEL_FAM6_NEHALEM_EP 0x1A
+ #define INTEL_FAM6_NEHALEM_EX 0x2E
++
+ #define INTEL_FAM6_WESTMERE 0x25
+ #define INTEL_FAM6_WESTMERE_EP 0x2C
+ #define INTEL_FAM6_WESTMERE_EX 0x2F
+@@ -36,9 +38,9 @@
+ #define INTEL_FAM6_HASWELL_GT3E 0x46
+
+ #define INTEL_FAM6_BROADWELL_CORE 0x3D
+-#define INTEL_FAM6_BROADWELL_XEON_D 0x56
+ #define INTEL_FAM6_BROADWELL_GT3E 0x47
+ #define INTEL_FAM6_BROADWELL_X 0x4F
++#define INTEL_FAM6_BROADWELL_XEON_D 0x56
+
+ #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
+ #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
+@@ -57,9 +59,10 @@
+ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
+ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
+ #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
+-#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */
++#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
+ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C
+ #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
++#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
+
+ /* Xeon Phi */
+
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x8
+ }
+ }
+
++/*
++ * Early microcode releases for the Spectre v2 mitigation were broken.
++ * Information taken from;
++ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
++ * - https://kb.vmware.com/s/article/52345
++ * - Microcode revisions observed in the wild
++ * - Release note from 20180108 microcode release
++ */
++struct sku_microcode {
++ u8 model;
++ u8 stepping;
++ u32 microcode;
++};
++static const struct sku_microcode spectre_bad_microcodes[] = {
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
++ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
++ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
++ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
++ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
++ { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
++ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
++ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
++ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
++ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
++ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
++ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
++ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
++ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
++ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
++ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
++ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
++ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
++ /* Updated in the 20180108 release; blacklist until we know otherwise */
++ { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
++ /* Observed in the wild */
++ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
++ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
++};
++
++static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
++ if (c->x86_model == spectre_bad_microcodes[i].model &&
++ c->x86_mask == spectre_bad_microcodes[i].stepping)
++ return (c->microcode <= spectre_bad_microcodes[i].microcode);
++ }
++ return false;
++}
++
+ static void early_init_intel(struct cpuinfo_x86 *c)
+ {
+ u64 misc_enable;
+@@ -87,6 +140,19 @@ static void early_init_intel(struct cpui
+ rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
+ }
+
++ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
++ cpu_has(c, X86_FEATURE_STIBP) ||
++ cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) ||
++ cpu_has(c, X86_FEATURE_AMD_PRED_CMD) ||
++ cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) {
++ pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n");
++ clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
++ clear_cpu_cap(c, X86_FEATURE_STIBP);
++ clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL);
++ clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD);
++ clear_cpu_cap(c, X86_FEATURE_AMD_STIBP);
++ }
++
+ /*
+ * Atom erratum AAE44/AAF40/AAG38/AAH41:
+ *
diff --git a/queue/x86-cpufeature-move-processor-tracing-out-of-scattered-features.patch b/queue/x86-cpufeature-move-processor-tracing-out-of-scattered-features.patch
new file mode 100644
index 0000000..37baee4
--- /dev/null
+++ b/queue/x86-cpufeature-move-processor-tracing-out-of-scattered-features.patch
@@ -0,0 +1,68 @@
+From 4fdec2034b7540dda461c6ba33325dfcff345c64 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 16 Jan 2018 16:42:25 +0100
+Subject: x86/cpufeature: Move processor tracing out of scattered features
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 4fdec2034b7540dda461c6ba33325dfcff345c64 upstream.
+
+Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX),
+so do not duplicate it in the scattered features word.
+
+Besides being more tidy, this will be useful for KVM when it presents
+processor tracing to the guests. KVM selects host features that are
+supported by both the host kernel (depending on command line options,
+CPU errata, or whatever) and KVM. Whenever a full feature word exists,
+KVM's code is written in the expectation that the CPUID bit number
+matches the X86_FEATURE_* bit number, but this is not the case for
+X86_FEATURE_INTEL_PT.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Luwei Kang <luwei.kang@intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Radim Krčmář <rkrcmar@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: kvm@vger.kernel.org
+Link: http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonzini@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeatures.h | 2 +-
+ arch/x86/kernel/cpu/scattered.c | 1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -197,7 +197,6 @@
+ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
+
+-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+@@ -236,6 +235,7 @@
+ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
++#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
+ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -31,7 +31,6 @@ void init_scattered_cpuid_features(struc
+ const struct cpuid_bit *cb;
+
+ static const struct cpuid_bit cpuid_bits[] = {
+- { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
+ { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
diff --git a/queue/x86-cpufeatures-add-amd-feature-bits-for-speculation-control.patch b/queue/x86-cpufeatures-add-amd-feature-bits-for-speculation-control.patch
new file mode 100644
index 0000000..7b48105
--- /dev/null
+++ b/queue/x86-cpufeatures-add-amd-feature-bits-for-speculation-control.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:11 +0000
+Subject: x86/cpufeatures: Add AMD feature bits for Speculation Control
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 5d10cbc91d9eb5537998b65608441b592eec65e7)
+
+AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel.
+See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -258,6 +258,9 @@
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
++#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */
++#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */
++#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */
+
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
diff --git a/queue/x86-cpufeatures-add-cpuid_7_edx-cpuid-leaf.patch b/queue/x86-cpufeatures-add-cpuid_7_edx-cpuid-leaf.patch
new file mode 100644
index 0000000..a47b816
--- /dev/null
+++ b/queue/x86-cpufeatures-add-cpuid_7_edx-cpuid-leaf.patch
@@ -0,0 +1,149 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:09 +0000
+Subject: x86/cpufeatures: Add CPUID_7_EDX CPUID leaf
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f)
+
+This is a pure feature bits leaf. There are two AVX512 feature bits in it
+already which were handled as scattered bits, and three more from this leaf
+are going to be added for speculation control features.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeature.h | 7 +++++--
+ arch/x86/include/asm/cpufeatures.h | 10 ++++++----
+ arch/x86/include/asm/disabled-features.h | 3 ++-
+ arch/x86/include/asm/required-features.h | 3 ++-
+ arch/x86/kernel/cpu/common.c | 1 +
+ arch/x86/kernel/cpu/scattered.c | 2 --
+ 6 files changed, 16 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -28,6 +28,7 @@ enum cpuid_leafs
+ CPUID_8000_000A_EDX,
+ CPUID_7_ECX,
+ CPUID_8000_0007_EBX,
++ CPUID_7_EDX,
+ };
+
+ #ifdef CONFIG_X86_FEATURE_NAMES
+@@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
+ REQUIRED_MASK_CHECK || \
+- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
++ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+
+ #define DISABLED_MASK_BIT_SET(feature_bit) \
+ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
+@@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
+ DISABLED_MASK_CHECK || \
+- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
++ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+
+ #define cpu_has(c, bit) \
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -12,7 +12,7 @@
+ /*
+ * Defines x86 CPU feature bits
+ */
+-#define NCAPINTS 18 /* N 32-bit words worth of info */
++#define NCAPINTS 19 /* N 32-bit words worth of info */
+ #define NBUGINTS 1 /* N 32-bit bug flags */
+
+ /*
+@@ -197,9 +197,7 @@
+ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
+
+-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+@@ -295,6 +293,10 @@
+ #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
+ #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
+
++/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
++#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
++#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
++
+ /*
+ * BUG word(s)
+ */
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -59,6 +59,7 @@
+ #define DISABLED_MASK15 0
+ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
+ #define DISABLED_MASK17 0
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
++#define DISABLED_MASK18 0
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */
+--- a/arch/x86/include/asm/required-features.h
++++ b/arch/x86/include/asm/required-features.h
+@@ -100,6 +100,7 @@
+ #define REQUIRED_MASK15 0
+ #define REQUIRED_MASK16 0
+ #define REQUIRED_MASK17 0
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
++#define REQUIRED_MASK18 0
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -737,6 +737,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[CPUID_7_0_EBX] = ebx;
+ c->x86_capability[CPUID_7_ECX] = ecx;
++ c->x86_capability[CPUID_7_EDX] = edx;
+ }
+
+ /* Extended state features: level 0x0000000d */
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struc
+ const struct cpuid_bit *cb;
+
+ static const struct cpuid_bit cpuid_bits[] = {
+- { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
+- { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
+ { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
+ { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
diff --git a/queue/x86-cpufeatures-add-intel-feature-bits-for-speculation-control.patch b/queue/x86-cpufeatures-add-intel-feature-bits-for-speculation-control.patch
new file mode 100644
index 0000000..568f3bf
--- /dev/null
+++ b/queue/x86-cpufeatures-add-intel-feature-bits-for-speculation-control.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:10 +0000
+Subject: x86/cpufeatures: Add Intel feature bits for Speculation Control
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit fc67dd70adb711a45d2ef34e12d1a8be75edde61)
+
+Add three feature bits exposed by new microcode on Intel CPUs for
+speculation control.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -296,6 +296,9 @@
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
++#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+
+ /*
+ * BUG word(s)
diff --git a/queue/x86-cpufeatures-add-x86_bug_cpu_insecure.patch b/queue/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
new file mode 100644
index 0000000..0f6fc2c
--- /dev/null
+++ b/queue/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
@@ -0,0 +1,73 @@
+From a89f040fa34ec9cd682aed98b8f04e3c47d998bd Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:33 +0100
+Subject: x86/cpufeatures: Add X86_BUG_CPU_INSECURE
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd upstream.
+
+Many x86 CPUs leak information to user space due to missing isolation of
+user space and kernel space page tables. There are many well documented
+ways to exploit that.
+
+The upcoming software migitation of isolating the user and kernel space
+page tables needs a misfeature flag so code can be made runtime
+conditional.
+
+Add the BUG bits which indicates that the CPU is affected and add a feature
+bit which indicates that the software migitation is enabled.
+
+Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be
+made later.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 1 +
+ arch/x86/kernel/cpu/common.c | 4 ++++
+ 2 files changed, 5 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -316,5 +316,6 @@
+ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
+ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
++#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
+
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -882,6 +882,10 @@ static void __init early_identify_cpu(st
+ }
+
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS);
++
++ /* Assume for now that ALL x86 CPUs are insecure */
++ setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
++
+ fpu__init_system(c);
+ }
+
diff --git a/queue/x86-cpufeatures-add-x86_bug_spectre_v.patch b/queue/x86-cpufeatures-add-x86_bug_spectre_v.patch
new file mode 100644
index 0000000..e59cfe5
--- /dev/null
+++ b/queue/x86-cpufeatures-add-x86_bug_spectre_v.patch
@@ -0,0 +1,58 @@
+From 99c6fa2511d8a683e61468be91b83f85452115fa Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Sat, 6 Jan 2018 11:49:23 +0000
+Subject: x86/cpufeatures: Add X86_BUG_SPECTRE_V[12]
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 99c6fa2511d8a683e61468be91b83f85452115fa upstream.
+
+Add the bug bits for spectre v1/2 and force them unconditionally for all
+cpus.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeatures.h | 2 ++
+ arch/x86/kernel/cpu/common.c | 3 +++
+ 2 files changed, 5 insertions(+)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -317,5 +317,7 @@
+ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
++#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
++#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -886,6 +886,9 @@ static void __init early_identify_cpu(st
+ /* Assume for now that ALL x86 CPUs are insecure */
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++
+ fpu__init_system(c);
+ }
+
diff --git a/queue/x86-cpufeatures-clean-up-spectre-v2-related-cpuid-flags.patch b/queue/x86-cpufeatures-clean-up-spectre-v2-related-cpuid-flags.patch
new file mode 100644
index 0000000..96900c0
--- /dev/null
+++ b/queue/x86-cpufeatures-clean-up-spectre-v2-related-cpuid-flags.patch
@@ -0,0 +1,171 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Sat, 27 Jan 2018 16:24:32 +0000
+Subject: x86/cpufeatures: Clean up Spectre v2 related CPUID flags
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2)
+
+We want to expose the hardware features simply in /proc/cpuinfo as "ibrs",
+"ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them
+as the user-visible bits.
+
+When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB
+capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP
+bit is set, set the AMD STIBP that's used for the generic hardware
+capability.
+
+Hide the rest from /proc/cpuinfo by putting "" in the comments. Including
+RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are
+patches to make the sysfs vulnerabilities information non-readable by
+non-root, and the same should apply to all information about which
+mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo.
+
+The feature bit for whether IBPB is actually used, which is needed for
+ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB.
+
+Originally-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: ak@linux.intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 18 +++++++++---------
+ arch/x86/include/asm/nospec-branch.h | 2 +-
+ arch/x86/kernel/cpu/bugs.c | 7 +++----
+ arch/x86/kernel/cpu/intel.c | 31 +++++++++++++++++++++----------
+ 4 files changed, 34 insertions(+), 24 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -194,15 +194,15 @@
+ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+-#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+
+-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+
+-#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/
++#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+@@ -260,9 +260,9 @@
+ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
+-#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */
+-#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */
+-#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */
++#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
++#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
++#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+
+ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+@@ -301,8 +301,8 @@
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+-#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
+-#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
++#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
++#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
+ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+
+ /*
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -225,7 +225,7 @@ static inline void indirect_branch_predi
+ "movl %[val], %%eax\n\t"
+ "movl $0, %%edx\n\t"
+ "wrmsr",
+- X86_FEATURE_IBPB)
++ X86_FEATURE_USE_IBPB)
+ : : [msr] "i" (MSR_IA32_PRED_CMD),
+ [val] "i" (PRED_CMD_IBPB)
+ : "eax", "ecx", "edx", "memory");
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -272,9 +272,8 @@ retpoline_auto:
+ }
+
+ /* Initialize Indirect Branch Prediction Barrier if supported */
+- if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+- boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
+- setup_force_cpu_cap(X86_FEATURE_IBPB);
++ if (boot_cpu_has(X86_FEATURE_IBPB)) {
++ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+ pr_info("Enabling Indirect Branch Prediction Barrier\n");
+ }
+ }
+@@ -307,7 +306,7 @@ ssize_t cpu_show_spectre_v2(struct devic
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+- boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "",
++ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ spectre_v2_module_string());
+ }
+ #endif
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -140,17 +140,28 @@ static void early_init_intel(struct cpui
+ rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
+ }
+
+- if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
+- cpu_has(c, X86_FEATURE_STIBP) ||
+- cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) ||
+- cpu_has(c, X86_FEATURE_AMD_PRED_CMD) ||
+- cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) {
+- pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n");
+- clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
++ /*
++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
++ * and they also have a different bit for STIBP support. Also,
++ * a hypervisor might have set the individual AMD bits even on
++ * Intel CPUs, for finer-grained selection of what's available.
++ */
++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
++ set_cpu_cap(c, X86_FEATURE_IBRS);
++ set_cpu_cap(c, X86_FEATURE_IBPB);
++ }
++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
++ set_cpu_cap(c, X86_FEATURE_STIBP);
++
++ /* Now if any of them are set, check the blacklist and clear the lot */
++ if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
++ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
++ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
++ clear_cpu_cap(c, X86_FEATURE_IBRS);
++ clear_cpu_cap(c, X86_FEATURE_IBPB);
+ clear_cpu_cap(c, X86_FEATURE_STIBP);
+- clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL);
+- clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD);
+- clear_cpu_cap(c, X86_FEATURE_AMD_STIBP);
++ clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
++ clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
+ }
+
+ /*
diff --git a/queue/x86-cpufeatures-make-cpu-bugs-sticky.patch b/queue/x86-cpufeatures-make-cpu-bugs-sticky.patch
new file mode 100644
index 0000000..aa4b9e0
--- /dev/null
+++ b/queue/x86-cpufeatures-make-cpu-bugs-sticky.patch
@@ -0,0 +1,96 @@
+From 6cbd2171e89b13377261d15e64384df60ecb530e Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:32 +0100
+Subject: x86/cpufeatures: Make CPU bugs sticky
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 6cbd2171e89b13377261d15e64384df60ecb530e upstream.
+
+There is currently no way to force CPU bug bits like CPU feature bits. That
+makes it impossible to set a bug bit once at boot and have it stick for all
+upcoming CPUs.
+
+Extend the force set/clear arrays to handle bug bits as well.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpufeature.h | 2 ++
+ arch/x86/include/asm/processor.h | 4 ++--
+ arch/x86/kernel/cpu/common.c | 6 +++---
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -135,6 +135,8 @@ extern const char * const x86_bug_flags[
+ set_bit(bit, (unsigned long *)cpu_caps_set); \
+ } while (0)
+
++#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
++
+ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+ /*
+ * Static testing of CPU features. Used the same as boot_cpu_has().
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data;
+ extern struct cpuinfo_x86 new_cpu_data;
+
+ extern struct tss_struct doublefault_tss;
+-extern __u32 cpu_caps_cleared[NCAPINTS];
+-extern __u32 cpu_caps_set[NCAPINTS];
++extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
++extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
+
+ #ifdef CONFIG_SMP
+ DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -480,8 +480,8 @@ static const char *table_lookup_model(st
+ return NULL; /* Not found */
+ }
+
+-__u32 cpu_caps_cleared[NCAPINTS];
+-__u32 cpu_caps_set[NCAPINTS];
++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
++__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
+
+ void load_percpu_segment(int cpu)
+ {
+@@ -710,7 +710,7 @@ static void apply_forced_caps(struct cpu
+ {
+ int i;
+
+- for (i = 0; i < NCAPINTS; i++) {
++ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
diff --git a/queue/x86-cpuid-fix-up-virtual-ibrs-ibpb-stibp-feature-bits-on-intel.patch b/queue/x86-cpuid-fix-up-virtual-ibrs-ibpb-stibp-feature-bits-on-intel.patch
new file mode 100644
index 0000000..7e942e5
--- /dev/null
+++ b/queue/x86-cpuid-fix-up-virtual-ibrs-ibpb-stibp-feature-bits-on-intel.patch
@@ -0,0 +1,122 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Tue, 30 Jan 2018 14:30:23 +0000
+Subject: x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+
+(cherry picked from commit 7fcae1118f5fd44a862aa5c3525248e35ee67c3b)
+
+Despite the fact that all the other code there seems to be doing it, just
+using set_cpu_cap() in early_intel_init() doesn't actually work.
+
+For CPUs with PKU support, setup_pku() calls get_cpu_cap() after
+c->c_init() has set those feature bits. That resets those bits back to what
+was queried from the hardware.
+
+Turning the bits off for bad microcode is easy to fix. That can just use
+setup_clear_cpu_cap() to force them off for all CPUs.
+
+I was less keen on forcing the feature bits *on* that way, just in case
+of inconsistencies. I appreciate that the kernel is going to get this
+utterly wrong if CPU features are not consistent, because it has already
+applied alternatives by the time secondary CPUs are brought up.
+
+But at least if setup_force_cpu_cap() isn't being used, we might have a
+chance of *detecting* the lack of the corresponding bit and either
+panicking or refusing to bring the offending CPU online.
+
+So ensure that the appropriate feature bits are set within get_cpu_cap()
+regardless of how many extra times it's called.
+
+Fixes: 2961298e ("x86/cpufeatures: Clean up Spectre v2 related CPUID flags")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: karahmed@amazon.de
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Link: https://lkml.kernel.org/r/1517322623-15261-1-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++
+ arch/x86/kernel/cpu/intel.c | 27 ++++++++-------------------
+ 2 files changed, 29 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -718,6 +718,26 @@ static void apply_forced_caps(struct cpu
+ }
+ }
+
++static void init_speculation_control(struct cpuinfo_x86 *c)
++{
++ /*
++ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
++ * and they also have a different bit for STIBP support. Also,
++ * a hypervisor might have set the individual AMD bits even on
++ * Intel CPUs, for finer-grained selection of what's available.
++ *
++ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
++ * features, which are visible in /proc/cpuinfo and used by the
++ * kernel. So set those accordingly from the Intel bits.
++ */
++ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
++ set_cpu_cap(c, X86_FEATURE_IBRS);
++ set_cpu_cap(c, X86_FEATURE_IBPB);
++ }
++ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
++ set_cpu_cap(c, X86_FEATURE_STIBP);
++}
++
+ void get_cpu_cap(struct cpuinfo_x86 *c)
+ {
+ u32 eax, ebx, ecx, edx;
+@@ -812,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+
+ init_scattered_cpuid_features(c);
++ init_speculation_control(c);
+ }
+
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -140,28 +140,17 @@ static void early_init_intel(struct cpui
+ rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
+ }
+
+- /*
+- * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+- * and they also have a different bit for STIBP support. Also,
+- * a hypervisor might have set the individual AMD bits even on
+- * Intel CPUs, for finer-grained selection of what's available.
+- */
+- if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+- set_cpu_cap(c, X86_FEATURE_IBRS);
+- set_cpu_cap(c, X86_FEATURE_IBPB);
+- }
+- if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+- set_cpu_cap(c, X86_FEATURE_STIBP);
+-
+ /* Now if any of them are set, check the blacklist and clear the lot */
+- if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
++ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
++ cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
++ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
+ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
+- clear_cpu_cap(c, X86_FEATURE_IBRS);
+- clear_cpu_cap(c, X86_FEATURE_IBPB);
+- clear_cpu_cap(c, X86_FEATURE_STIBP);
+- clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
+- clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
++ setup_clear_cpu_cap(X86_FEATURE_IBRS);
++ setup_clear_cpu_cap(X86_FEATURE_IBPB);
++ setup_clear_cpu_cap(X86_FEATURE_STIBP);
++ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
++ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ }
+
+ /*
diff --git a/queue/x86-documentation-add-pti-description.patch b/queue/x86-documentation-add-pti-description.patch
new file mode 100644
index 0000000..78b4ebd
--- /dev/null
+++ b/queue/x86-documentation-add-pti-description.patch
@@ -0,0 +1,261 @@
+From 01c9b17bf673b05bb401b76ec763e9730ccf1376 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Fri, 5 Jan 2018 09:44:36 -0800
+Subject: x86/Documentation: Add PTI description
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 01c9b17bf673b05bb401b76ec763e9730ccf1376 upstream.
+
+Add some details about how PTI works, what some of the downsides
+are, and how to debug it when things go wrong.
+
+Also document the kernel parameter: 'pti/nopti'.
+
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+Cc: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+Cc: Richard Fellner <richard.fellner@student.tugraz.at>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Andi Lutomirsky <luto@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/kernel-parameters.txt | 21 ++--
+ Documentation/x86/pti.txt | 186 ++++++++++++++++++++++++++++++++++++
+ 2 files changed, 200 insertions(+), 7 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2763,8 +2763,6 @@ bytes respectively. Such letter suffixes
+
+ nojitter [IA-64] Disables jitter checking for ITC timers.
+
+- nopti [X86-64] Disable KAISER isolation of kernel from user.
+-
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+
+ no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
+@@ -3327,11 +3325,20 @@ bytes respectively. Such letter suffixes
+ pt. [PARIDE]
+ See Documentation/blockdev/paride.txt.
+
+- pti= [X86_64]
+- Control KAISER user/kernel address space isolation:
+- on - enable
+- off - disable
+- auto - default setting
++ pti= [X86_64] Control Page Table Isolation of user and
++ kernel address spaces. Disabling this feature
++ removes hardening, but improves performance of
++ system calls and interrupts.
++
++ on - unconditionally enable
++ off - unconditionally disable
++ auto - kernel detects whether your CPU model is
++ vulnerable to issues that PTI mitigates
++
++ Not specifying this option is equivalent to pti=auto.
++
++ nopti [X86_64]
++ Equivalent to pti=off
+
+ pty.legacy_count=
+ [KNL] Number of legacy pty's. Overwrites compiled-in
+--- /dev/null
++++ b/Documentation/x86/pti.txt
+@@ -0,0 +1,186 @@
++Overview
++========
++
++Page Table Isolation (pti, previously known as KAISER[1]) is a
++countermeasure against attacks on the shared user/kernel address
++space such as the "Meltdown" approach[2].
++
++To mitigate this class of attacks, we create an independent set of
++page tables for use only when running userspace applications. When
++the kernel is entered via syscalls, interrupts or exceptions, the
++page tables are switched to the full "kernel" copy. When the system
++switches back to user mode, the user copy is used again.
++
++The userspace page tables contain only a minimal amount of kernel
++data: only what is needed to enter/exit the kernel such as the
++entry/exit functions themselves and the interrupt descriptor table
++(IDT). There are a few strictly unnecessary things that get mapped
++such as the first C function when entering an interrupt (see
++comments in pti.c).
++
++This approach helps to ensure that side-channel attacks leveraging
++the paging structures do not function when PTI is enabled. It can be
++enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
++Once enabled at compile-time, it can be disabled at boot with the
++'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
++
++Page Table Management
++=====================
++
++When PTI is enabled, the kernel manages two sets of page tables.
++The first set is very similar to the single set which is present in
++kernels without PTI. This includes a complete mapping of userspace
++that the kernel can use for things like copy_to_user().
++
++Although _complete_, the user portion of the kernel page tables is
++crippled by setting the NX bit in the top level. This ensures
++that any missed kernel->user CR3 switch will immediately crash
++userspace upon executing its first instruction.
++
++The userspace page tables map only the kernel data needed to enter
++and exit the kernel. This data is entirely contained in the 'struct
++cpu_entry_area' structure which is placed in the fixmap which gives
++each CPU's copy of the area a compile-time-fixed virtual address.
++
++For new userspace mappings, the kernel makes the entries in its
++page tables like normal. The only difference is when the kernel
++makes entries in the top (PGD) level. In addition to setting the
++entry in the main kernel PGD, a copy of the entry is made in the
++userspace page tables' PGD.
++
++This sharing at the PGD level also inherently shares all the lower
++layers of the page tables. This leaves a single, shared set of
++userspace page tables to manage. One PTE to lock, one set of
++accessed bits, dirty bits, etc...
++
++Overhead
++========
++
++Protection against side-channel attacks is important. But,
++this protection comes at a cost:
++
++1. Increased Memory Use
++ a. Each process now needs an order-1 PGD instead of order-0.
++ (Consumes an additional 4k per process).
++ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
++ aligned so that it can be mapped by setting a single PMD
++ entry. This consumes nearly 2MB of RAM once the kernel
++ is decompressed, but no space in the kernel image itself.
++
++2. Runtime Cost
++ a. CR3 manipulation to switch between the page table copies
++ must be done at interrupt, syscall, and exception entry
++ and exit (it can be skipped when the kernel is interrupted,
++ though.) Moves to CR3 are on the order of a hundred
++ cycles, and are required at every entry and exit.
++ b. A "trampoline" must be used for SYSCALL entry. This
++ trampoline depends on a smaller set of resources than the
++ non-PTI SYSCALL entry code, so requires mapping fewer
++ things into the userspace page tables. The downside is
++ that stacks must be switched at entry time.
++ d. Global pages are disabled for all kernel structures not
++ mapped into both kernel and userspace page tables. This
++ feature of the MMU allows different processes to share TLB
++ entries mapping the kernel. Losing the feature means more
++ TLB misses after a context switch. The actual loss of
++ performance is very small, however, never exceeding 1%.
++ d. Process Context IDentifiers (PCID) is a CPU feature that
++ allows us to skip flushing the entire TLB when switching page
++ tables by setting a special bit in CR3 when the page tables
++ are changed. This makes switching the page tables (at context
++ switch, or kernel entry/exit) cheaper. But, on systems with
++ PCID support, the context switch code must flush both the user
++ and kernel entries out of the TLB. The user PCID TLB flush is
++ deferred until the exit to userspace, minimizing the cost.
++ See intel.com/sdm for the gory PCID/INVPCID details.
++ e. The userspace page tables must be populated for each new
++ process. Even without PTI, the shared kernel mappings
++ are created by copying top-level (PGD) entries into each
++ new process. But, with PTI, there are now *two* kernel
++ mappings: one in the kernel page tables that maps everything
++ and one for the entry/exit structures. At fork(), we need to
++ copy both.
++ f. In addition to the fork()-time copying, there must also
++ be an update to the userspace PGD any time a set_pgd() is done
++ on a PGD used to map userspace. This ensures that the kernel
++ and userspace copies always map the same userspace
++ memory.
++ g. On systems without PCID support, each CR3 write flushes
++ the entire TLB. That means that each syscall, interrupt
++ or exception flushes the TLB.
++ h. INVPCID is a TLB-flushing instruction which allows flushing
++ of TLB entries for non-current PCIDs. Some systems support
++ PCIDs, but do not support INVPCID. On these systems, addresses
++ can only be flushed from the TLB for the current PCID. When
++ flushing a kernel address, we need to flush all PCIDs, so a
++ single kernel address flush will require a TLB-flushing CR3
++ write upon the next use of every PCID.
++
++Possible Future Work
++====================
++1. We can be more careful about not actually writing to CR3
++ unless its value is actually changed.
++2. Allow PTI to be enabled/disabled at runtime in addition to the
++ boot-time switching.
++
++Testing
++========
++
++To test stability of PTI, the following test procedure is recommended,
++ideally doing all of these in parallel:
++
++1. Set CONFIG_DEBUG_ENTRY=y
++2. Run several copies of all of the tools/testing/selftests/x86/ tests
++ (excluding MPX and protection_keys) in a loop on multiple CPUs for
++ several minutes. These tests frequently uncover corner cases in the
++ kernel entry code. In general, old kernels might cause these tests
++ themselves to crash, but they should never crash the kernel.
++3. Run the 'perf' tool in a mode (top or record) that generates many
++ frequent performance monitoring non-maskable interrupts (see "NMI"
++ in /proc/interrupts). This exercises the NMI entry/exit code which
++ is known to trigger bugs in code paths that did not expect to be
++ interrupted, including nested NMIs. Using "-c" boosts the rate of
++ NMIs, and using two -c with separate counters encourages nested NMIs
++ and less deterministic behavior.
++
++ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
++
++4. Launch a KVM virtual machine.
++5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
++ This has been a lightly-tested code path and needs extra scrutiny.
++
++Debugging
++=========
++
++Bugs in PTI cause a few different signatures of crashes
++that are worth noting here.
++
++ * Failures of the selftests/x86 code. Usually a bug in one of the
++ more obscure corners of entry_64.S
++ * Crashes in early boot, especially around CPU bringup. Bugs
++ in the trampoline code or mappings cause these.
++ * Crashes at the first interrupt. Caused by bugs in entry_64.S,
++ like screwing up a page table switch. Also caused by
++ incorrectly mapping the IRQ handler entry code.
++ * Crashes at the first NMI. The NMI code is separate from main
++ interrupt handlers and can have bugs that do not affect
++ normal interrupts. Also caused by incorrectly mapping NMI
++ code. NMIs that interrupt the entry code must be very
++ careful and can be the cause of crashes that show up when
++ running perf.
++ * Kernel crashes at the first exit to userspace. entry_64.S
++ bugs, or failing to map some of the exit code.
++ * Crashes at first interrupt that interrupts userspace. The paths
++ in entry_64.S that return to userspace are sometimes separate
++ from the ones that return to the kernel.
++ * Double faults: overflowing the kernel stack because of page
++ faults upon page faults. Caused by touching non-pti-mapped
++ data in the entry code, or forgetting to switch to kernel
++ CR3 before calling into C functions which are not pti-mapped.
++ * Userspace segfaults early in boot, sometimes manifesting
++ as mount(8) failing to mount the rootfs. These have
++ tended to be TLB invalidation issues. Usually invalidating
++ the wrong PCID, or otherwise missing an invalidation.
++
++1. https://gruss.cc/files/kaiser.pdf
++2. https://meltdownattack.com/meltdown.pdf
diff --git a/queue/x86-entry-64-push-extra-regs-right-away.patch b/queue/x86-entry-64-push-extra-regs-right-away.patch
new file mode 100644
index 0000000..e80f751
--- /dev/null
+++ b/queue/x86-entry-64-push-extra-regs-right-away.patch
@@ -0,0 +1,46 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 Jan 2018 10:38:49 -0800
+Subject: x86/entry/64: Push extra regs right away
+
+From: Andy Lutomirski <luto@kernel.org>
+
+(cherry picked from commit d1f7732009e0549eedf8ea1db948dc37be77fd46)
+
+With the fast path removed there is no point in splitting the push of the
+normal and the extra register set. Just push the extra regs right away.
+
+[ tglx: Split out from 'x86/entry/64: Remove the SYSCALL64 fast path' ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
+Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -177,10 +177,14 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+ pushq %r9 /* pt_regs->r9 */
+ pushq %r10 /* pt_regs->r10 */
+ pushq %r11 /* pt_regs->r11 */
+- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
++ pushq %rbx /* pt_regs->rbx */
++ pushq %rbp /* pt_regs->rbp */
++ pushq %r12 /* pt_regs->r12 */
++ pushq %r13 /* pt_regs->r13 */
++ pushq %r14 /* pt_regs->r14 */
++ pushq %r15 /* pt_regs->r15 */
+
+ /* IRQs are off. */
+- SAVE_EXTRA_REGS
+ movq %rsp, %rdi
+ call do_syscall_64 /* returns with IRQs disabled */
+
diff --git a/queue/x86-entry-64-remove-the-syscall64-fast-path.patch b/queue/x86-entry-64-remove-the-syscall64-fast-path.patch
new file mode 100644
index 0000000..daa62bd
--- /dev/null
+++ b/queue/x86-entry-64-remove-the-syscall64-fast-path.patch
@@ -0,0 +1,202 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 Jan 2018 10:38:49 -0800
+Subject: x86/entry/64: Remove the SYSCALL64 fast path
+
+From: Andy Lutomirski <luto@kernel.org>
+
+(cherry picked from commit 21d375b6b34ff511a507de27bf316b3dde6938d9)
+
+The SYCALLL64 fast path was a nice, if small, optimization back in the good
+old days when syscalls were actually reasonably fast. Now there is PTI to
+slow everything down, and indirect branches are verboten, making everything
+messier. The retpoline code in the fast path is particularly nasty.
+
+Just get rid of the fast path. The slow path is barely slower.
+
+[ tglx: Split out the 'push all extra regs' part ]
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
+Link: https://lkml.kernel.org/r/462dff8d4d64dfbfc851fbf3130641809d980ecd.1517164461.git.luto@kernel.org
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 123 --------------------------------------------
+ arch/x86/entry/syscall_64.c | 7 --
+ 2 files changed, 3 insertions(+), 127 deletions(-)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -179,94 +179,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
+ pushq %r11 /* pt_regs->r11 */
+ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+
+- /*
+- * If we need to do entry work or if we guess we'll need to do
+- * exit work, go straight to the slow path.
+- */
+- movq PER_CPU_VAR(current_task), %r11
+- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
+- jnz entry_SYSCALL64_slow_path
+-
+-entry_SYSCALL_64_fastpath:
+- /*
+- * Easy case: enable interrupts and issue the syscall. If the syscall
+- * needs pt_regs, we'll call a stub that disables interrupts again
+- * and jumps to the slow path.
+- */
+- TRACE_IRQS_ON
+- ENABLE_INTERRUPTS(CLBR_NONE)
+-#if __SYSCALL_MASK == ~0
+- cmpq $__NR_syscall_max, %rax
+-#else
+- andl $__SYSCALL_MASK, %eax
+- cmpl $__NR_syscall_max, %eax
+-#endif
+- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
+- movq %r10, %rcx
+-
+- /*
+- * This call instruction is handled specially in stub_ptregs_64.
+- * It might end up jumping to the slow path. If it jumps, RAX
+- * and all argument registers are clobbered.
+- */
+-#ifdef CONFIG_RETPOLINE
+- movq sys_call_table(, %rax, 8), %rax
+- call __x86_indirect_thunk_rax
+-#else
+- call *sys_call_table(, %rax, 8)
+-#endif
+-.Lentry_SYSCALL_64_after_fastpath_call:
+-
+- movq %rax, RAX(%rsp)
+-1:
+-
+- /*
+- * If we get here, then we know that pt_regs is clean for SYSRET64.
+- * If we see that no exit work is required (which we are required
+- * to check with IRQs off), then we can go straight to SYSRET64.
+- */
+- DISABLE_INTERRUPTS(CLBR_NONE)
+- TRACE_IRQS_OFF
+- movq PER_CPU_VAR(current_task), %r11
+- testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
+- jnz 1f
+-
+- LOCKDEP_SYS_EXIT
+- TRACE_IRQS_ON /* user mode is traced as IRQs on */
+- movq RIP(%rsp), %rcx
+- movq EFLAGS(%rsp), %r11
+- RESTORE_C_REGS_EXCEPT_RCX_R11
+- /*
+- * This opens a window where we have a user CR3, but are
+- * running in the kernel. This makes using the CS
+- * register useless for telling whether or not we need to
+- * switch CR3 in NMIs. Normal interrupts are OK because
+- * they are off here.
+- */
+- SWITCH_USER_CR3
+- movq RSP(%rsp), %rsp
+- USERGS_SYSRET64
+-
+-1:
+- /*
+- * The fast path looked good when we started, but something changed
+- * along the way and we need to switch to the slow path. Calling
+- * raise(3) will trigger this, for example. IRQs are off.
+- */
+- TRACE_IRQS_ON
+- ENABLE_INTERRUPTS(CLBR_NONE)
+- SAVE_EXTRA_REGS
+- movq %rsp, %rdi
+- call syscall_return_slowpath /* returns with IRQs disabled */
+- jmp return_from_SYSCALL_64
+-
+-entry_SYSCALL64_slow_path:
+ /* IRQs are off. */
+ SAVE_EXTRA_REGS
+ movq %rsp, %rdi
+ call do_syscall_64 /* returns with IRQs disabled */
+
+-return_from_SYSCALL_64:
+ RESTORE_EXTRA_REGS
+ TRACE_IRQS_IRETQ /* we're about to change IF */
+
+@@ -339,6 +256,7 @@ return_from_SYSCALL_64:
+ syscall_return_via_sysret:
+ /* rcx and r11 are already restored (see code above) */
+ RESTORE_C_REGS_EXCEPT_RCX_R11
++
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+@@ -363,45 +281,6 @@ opportunistic_sysret_failed:
+ jmp restore_c_regs_and_iret
+ END(entry_SYSCALL_64)
+
+-ENTRY(stub_ptregs_64)
+- /*
+- * Syscalls marked as needing ptregs land here.
+- * If we are on the fast path, we need to save the extra regs,
+- * which we achieve by trying again on the slow path. If we are on
+- * the slow path, the extra regs are already saved.
+- *
+- * RAX stores a pointer to the C function implementing the syscall.
+- * IRQs are on.
+- */
+- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+- jne 1f
+-
+- /*
+- * Called from fast path -- disable IRQs again, pop return address
+- * and jump to slow path
+- */
+- DISABLE_INTERRUPTS(CLBR_NONE)
+- TRACE_IRQS_OFF
+- popq %rax
+- jmp entry_SYSCALL64_slow_path
+-
+-1:
+- JMP_NOSPEC %rax /* Called from C */
+-END(stub_ptregs_64)
+-
+-.macro ptregs_stub func
+-ENTRY(ptregs_\func)
+- leaq \func(%rip), %rax
+- jmp stub_ptregs_64
+-END(ptregs_\func)
+-.endm
+-
+-/* Instantiate ptregs_stub for each ptregs-using syscall */
+-#define __SYSCALL_64_QUAL_(sym)
+-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+-#include <asm/syscalls_64.h>
+-
+ /*
+ * %rdi: prev task
+ * %rsi: next task
+--- a/arch/x86/entry/syscall_64.c
++++ b/arch/x86/entry/syscall_64.c
+@@ -6,14 +6,11 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/syscall.h>
+
+-#define __SYSCALL_64_QUAL_(sym) sym
+-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
+-
+-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
++#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+ #include <asm/syscalls_64.h>
+ #undef __SYSCALL_64
+
+-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
++#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+
+ extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+
diff --git a/queue/x86-get_user-use-pointer-masking-to-limit-speculation.patch b/queue/x86-get_user-use-pointer-masking-to-limit-speculation.patch
new file mode 100644
index 0000000..e81910d
--- /dev/null
+++ b/queue/x86-get_user-use-pointer-masking-to-limit-speculation.patch
@@ -0,0 +1,98 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:54 -0800
+Subject: x86/get_user: Use pointer masking to limit speculation
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit c7f631cb07e7da06ac1d231ca178452339e32a94)
+
+Quoting Linus:
+
+ I do think that it would be a good idea to very expressly document
+ the fact that it's not that the user access itself is unsafe. I do
+ agree that things like "get_user()" want to be protected, but not
+ because of any direct bugs or problems with get_user() and friends,
+ but simply because get_user() is an excellent source of a pointer
+ that is obviously controlled from a potentially attacking user
+ space. So it's a prime candidate for then finding _subsequent_
+ accesses that can then be used to perturb the cache.
+
+Unlike the __get_user() case get_user() includes the address limit check
+near the pointer de-reference. With that locality the speculation can be
+mitigated with pointer narrowing rather than a barrier, i.e.
+array_index_nospec(). Where the narrowing is performed by:
+
+ cmp %limit, %ptr
+ sbb %mask, %mask
+ and %mask, %ptr
+
+With respect to speculation the value of %ptr is either less than %limit
+or NULL.
+
+Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: Kees Cook <keescook@chromium.org>
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727417469.33451.11804043010080838495.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/getuser.S | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/lib/getuser.S
++++ b/arch/x86/lib/getuser.S
+@@ -39,6 +39,8 @@ ENTRY(__get_user_1)
+ mov PER_CPU_VAR(current_task), %_ASM_DX
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ jae bad_get_user
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
++ and %_ASM_DX, %_ASM_AX
+ ASM_STAC
+ 1: movzbl (%_ASM_AX),%edx
+ xor %eax,%eax
+@@ -53,6 +55,8 @@ ENTRY(__get_user_2)
+ mov PER_CPU_VAR(current_task), %_ASM_DX
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ jae bad_get_user
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
++ and %_ASM_DX, %_ASM_AX
+ ASM_STAC
+ 2: movzwl -1(%_ASM_AX),%edx
+ xor %eax,%eax
+@@ -67,6 +71,8 @@ ENTRY(__get_user_4)
+ mov PER_CPU_VAR(current_task), %_ASM_DX
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ jae bad_get_user
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
++ and %_ASM_DX, %_ASM_AX
+ ASM_STAC
+ 3: movl -3(%_ASM_AX),%edx
+ xor %eax,%eax
+@@ -82,6 +88,8 @@ ENTRY(__get_user_8)
+ mov PER_CPU_VAR(current_task), %_ASM_DX
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ jae bad_get_user
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
++ and %_ASM_DX, %_ASM_AX
+ ASM_STAC
+ 4: movq -7(%_ASM_AX),%rdx
+ xor %eax,%eax
+@@ -93,6 +101,8 @@ ENTRY(__get_user_8)
+ mov PER_CPU_VAR(current_task), %_ASM_DX
+ cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ jae bad_get_user_8
++ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
++ and %_ASM_DX, %_ASM_AX
+ ASM_STAC
+ 4: movl -7(%_ASM_AX),%edx
+ 5: movl -3(%_ASM_AX),%ecx
diff --git a/queue/x86-implement-array_index_mask_nospec.patch b/queue/x86-implement-array_index_mask_nospec.patch
new file mode 100644
index 0000000..23b2cba
--- /dev/null
+++ b/queue/x86-implement-array_index_mask_nospec.patch
@@ -0,0 +1,66 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:28 -0800
+Subject: x86: Implement array_index_mask_nospec
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit babdde2698d482b6c0de1eab4f697cf5856c5859)
+
+array_index_nospec() uses a mask to sanitize user controllable array
+indexes, i.e. generate a 0 mask if 'index' >= 'size', and a ~0 mask
+otherwise. While the default array_index_mask_nospec() handles the
+carry-bit from the (index - size) result in software.
+
+The x86 array_index_mask_nospec() does the same, but the carry-bit is
+handled in the processor CF flag without conditional instructions in the
+control flow.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727414808.33451.1873237130672785331.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/barrier.h | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -23,6 +23,30 @@
+ #define wmb() asm volatile("sfence" ::: "memory")
+ #endif
+
++/**
++ * array_index_mask_nospec() - generate a mask that is ~0UL when the
++ * bounds check succeeds and 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * Returns:
++ * 0 - (index < size)
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++ unsigned long size)
++{
++ unsigned long mask;
++
++ asm ("cmp %1,%2; sbb %0,%0;"
++ :"=r" (mask)
++ :"r"(size),"r" (index)
++ :"cc");
++ return mask;
++}
++
++/* Override the default implementation from linux/nospec.h. */
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb() rmb()
+ #else
diff --git a/queue/x86-introduce-__uaccess_begin_nospec-and-uaccess_try_nospec.patch b/queue/x86-introduce-__uaccess_begin_nospec-and-uaccess_try_nospec.patch
new file mode 100644
index 0000000..885922d
--- /dev/null
+++ b/queue/x86-introduce-__uaccess_begin_nospec-and-uaccess_try_nospec.patch
@@ -0,0 +1,80 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:39 -0800
+Subject: x86: Introduce __uaccess_begin_nospec() and uaccess_try_nospec
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit b3bbfb3fb5d25776b8e3f361d2eedaabb0b496cd)
+
+For __get_user() paths, do not allow the kernel to speculate on the value
+of a user controlled pointer. In addition to the 'stac' instruction for
+Supervisor Mode Access Protection (SMAP), a barrier_nospec() causes the
+access_ok() result to resolve in the pipeline before the CPU might take any
+speculative action on the pointer value. Given the cost of 'stac' the
+speculation barrier is placed after 'stac' to hopefully overlap the cost of
+disabling SMAP with the cost of flushing the instruction pipeline.
+
+Since __get_user is a major kernel interface that deals with user
+controlled pointers, the __uaccess_begin_nospec() mechanism will prevent
+speculative execution past an access_ok() permission check. While
+speculative execution past access_ok() is not enough to lead to a kernel
+memory leak, it is a necessary precondition.
+
+To be clear, __uaccess_begin_nospec() is addressing a class of potential
+problems near __get_user() usages.
+
+Note, that while the barrier_nospec() in __uaccess_begin_nospec() is used
+to protect __get_user(), pointer masking similar to array_index_nospec()
+will be used for get_user() since it incorporates a bounds check near the
+usage.
+
+uaccess_try_nospec provides the same mechanism for get_user_try.
+
+No functional changes.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Suggested-by: Andi Kleen <ak@linux.intel.com>
+Suggested-by: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727415922.33451.5796614273104346583.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/uaccess.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -123,6 +123,11 @@ extern int __get_user_bad(void);
+
+ #define __uaccess_begin() stac()
+ #define __uaccess_end() clac()
++#define __uaccess_begin_nospec() \
++({ \
++ stac(); \
++ barrier_nospec(); \
++})
+
+ /*
+ * This is a type: either unsigned long, if the argument fits into
+@@ -474,6 +479,10 @@ struct __large_struct { unsigned long bu
+ __uaccess_begin(); \
+ barrier();
+
++#define uaccess_try_nospec do { \
++ current->thread.uaccess_err = 0; \
++ __uaccess_begin_nospec(); \
++
+ #define uaccess_catch(err) \
+ __uaccess_end(); \
+ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
diff --git a/queue/x86-introduce-barrier_nospec.patch b/queue/x86-introduce-barrier_nospec.patch
new file mode 100644
index 0000000..ae6aa42
--- /dev/null
+++ b/queue/x86-introduce-barrier_nospec.patch
@@ -0,0 +1,66 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:33 -0800
+Subject: x86: Introduce barrier_nospec
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit b3d7ad85b80bbc404635dca80f5b129f6242bc7a)
+
+Rename the open coded form of this instruction sequence from
+rdtsc_ordered() into a generic barrier primitive, barrier_nospec().
+
+One of the mitigations for Spectre variant1 vulnerabilities is to fence
+speculative execution after successfully validating a bounds check. I.e.
+force the result of a bounds check to resolve in the instruction pipeline
+to ensure speculative execution honors that result before potentially
+operating on out-of-bounds data.
+
+No functional changes.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Suggested-by: Andi Kleen <ak@linux.intel.com>
+Suggested-by: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727415361.33451.9049453007262764675.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/barrier.h | 4 ++++
+ arch/x86/include/asm/msr.h | 3 +--
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -47,6 +47,10 @@ static inline unsigned long array_index_
+ /* Override the default implementation from linux/nospec.h. */
+ #define array_index_mask_nospec array_index_mask_nospec
+
++/* Prevent speculative execution past this barrier. */
++#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
++ "lfence", X86_FEATURE_LFENCE_RDTSC)
++
+ #ifdef CONFIG_X86_PPRO_FENCE
+ #define dma_rmb() rmb()
+ #else
+--- a/arch/x86/include/asm/msr.h
++++ b/arch/x86/include/asm/msr.h
+@@ -188,8 +188,7 @@ static __always_inline unsigned long lon
+ * that some other imaginary CPU is updating continuously with a
+ * time stamp.
+ */
+- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+- "lfence", X86_FEATURE_LFENCE_RDTSC);
++ barrier_nospec();
+ return rdtsc();
+ }
+
diff --git a/queue/x86-kaiser-check-boottime-cmdline-params.patch b/queue/x86-kaiser-check-boottime-cmdline-params.patch
new file mode 100644
index 0000000..3476e16
--- /dev/null
+++ b/queue/x86-kaiser-check-boottime-cmdline-params.patch
@@ -0,0 +1,123 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 2 Jan 2018 14:19:48 +0100
+Subject: x86/kaiser: Check boottime cmdline params
+
+From: Borislav Petkov <bp@suse.de>
+
+
+AMD (and possibly other vendors) are not affected by the leak
+KAISER is protecting against.
+
+Keep the "nopti" for traditional reasons and add pti=<on|off|auto>
+like upstream.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt | 6 +++
+ arch/x86/mm/kaiser.c | 59 +++++++++++++++++++++++++-----------
+ 2 files changed, 47 insertions(+), 18 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -3327,6 +3327,12 @@ bytes respectively. Such letter suffixes
+ pt. [PARIDE]
+ See Documentation/blockdev/paride.txt.
+
++ pti= [X86_64]
++ Control KAISER user/kernel address space isolation:
++ on - enable
++ off - disable
++ auto - default setting
++
+ pty.legacy_count=
+ [KNL] Number of legacy pty's. Overwrites compiled-in
+ default number.
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -15,6 +15,7 @@
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
++#include <asm/cmdline.h>
+
+ int kaiser_enabled __read_mostly = 1;
+ EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
+@@ -263,6 +264,43 @@ static void __init kaiser_init_all_pgds(
+ WARN_ON(__ret); \
+ } while (0)
+
++void __init kaiser_check_boottime_disable(void)
++{
++ bool enable = true;
++ char arg[5];
++ int ret;
++
++ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
++ if (ret > 0) {
++ if (!strncmp(arg, "on", 2))
++ goto enable;
++
++ if (!strncmp(arg, "off", 3))
++ goto disable;
++
++ if (!strncmp(arg, "auto", 4))
++ goto skip;
++ }
++
++ if (cmdline_find_option_bool(boot_command_line, "nopti"))
++ goto disable;
++
++skip:
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++ goto disable;
++
++enable:
++ if (enable)
++ setup_force_cpu_cap(X86_FEATURE_KAISER);
++
++ return;
++
++disable:
++ pr_info("Kernel/User page tables isolation: disabled\n");
++ kaiser_enabled = 0;
++ setup_clear_cpu_cap(X86_FEATURE_KAISER);
++}
++
+ /*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+@@ -274,12 +312,10 @@ void __init kaiser_init(void)
+ {
+ int cpu;
+
+- if (!kaiser_enabled) {
+- setup_clear_cpu_cap(X86_FEATURE_KAISER);
+- return;
+- }
++ kaiser_check_boottime_disable();
+
+- setup_force_cpu_cap(X86_FEATURE_KAISER);
++ if (!kaiser_enabled)
++ return;
+
+ kaiser_init_all_pgds();
+
+@@ -423,16 +459,3 @@ void kaiser_flush_tlb_on_return_to_user(
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+-
+-static int __init x86_nokaiser_setup(char *s)
+-{
+- /* nopti doesn't accept parameters */
+- if (s)
+- return -EINVAL;
+-
+- kaiser_enabled = 0;
+- pr_info("Kernel/User page tables isolation: disabled\n");
+-
+- return 0;
+-}
+-early_param("nopti", x86_nokaiser_setup);
diff --git a/queue/x86-kaiser-move-feature-detection-up.patch b/queue/x86-kaiser-move-feature-detection-up.patch
new file mode 100644
index 0000000..5d61f21
--- /dev/null
+++ b/queue/x86-kaiser-move-feature-detection-up.patch
@@ -0,0 +1,79 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Mon, 25 Dec 2017 13:57:16 +0100
+Subject: x86/kaiser: Move feature detection up
+
+From: Borislav Petkov <bp@suse.de>
+
+
+... before the first use of kaiser_enabled as otherwise funky
+things happen:
+
+ about to get started...
+ (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0000]
+ (XEN) Pagetable walk from ffff88022a449090:
+ (XEN) L4[0x110] = 0000000229e0e067 0000000000001e0e
+ (XEN) L3[0x008] = 0000000000000000 ffffffffffffffff
+ (XEN) domain_crash_sync called from entry.S: fault at ffff82d08033fd08
+ entry.o#create_bounce_frame+0x135/0x14d
+ (XEN) Domain 0 (vcpu#0) crashed on cpu#0:
+ (XEN) ----[ Xen-4.9.1_02-3.21 x86_64 debug=n Not tainted ]----
+ (XEN) CPU: 0
+ (XEN) RIP: e033:[<ffffffff81007460>]
+ (XEN) RFLAGS: 0000000000000286 EM: 1 CONTEXT: pv guest (d0v0)
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kaiser.h | 2 ++
+ arch/x86/kernel/setup.c | 7 +++++++
+ arch/x86/mm/kaiser.c | 2 --
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -96,8 +96,10 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_p
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
+ extern int kaiser_enabled;
++extern void __init kaiser_check_boottime_disable(void);
+ #else
+ #define kaiser_enabled 0
++static inline void __init kaiser_check_boottime_disable(void) {}
+ #endif /* CONFIG_KAISER */
+
+ /*
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -114,6 +114,7 @@
+ #include <asm/microcode.h>
+ #include <asm/mmu_context.h>
+ #include <asm/kaslr.h>
++#include <asm/kaiser.h>
+
+ /*
+ * max_low_pfn_mapped: highest direct mapped pfn under 4GB
+@@ -1019,6 +1020,12 @@ void __init setup_arch(char **cmdline_p)
+ */
+ init_hypervisor_platform();
+
++ /*
++ * This needs to happen right after XENPV is set on xen and
++ * kaiser_enabled is checked below in cleanup_highmap().
++ */
++ kaiser_check_boottime_disable();
++
+ x86_init.resources.probe_roms();
+
+ /* after parse_early_param, so could debug it */
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -310,8 +310,6 @@ void __init kaiser_init(void)
+ {
+ int cpu;
+
+- kaiser_check_boottime_disable();
+-
+ if (!kaiser_enabled)
+ return;
+
diff --git a/queue/x86-kaiser-reenable-paravirt.patch b/queue/x86-kaiser-reenable-paravirt.patch
new file mode 100644
index 0000000..d081b61
--- /dev/null
+++ b/queue/x86-kaiser-reenable-paravirt.patch
@@ -0,0 +1,28 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 2 Jan 2018 14:19:49 +0100
+Subject: x86/kaiser: Reenable PARAVIRT
+
+From: Borislav Petkov <bp@suse.de>
+
+
+Now that the required bits have been addressed, reenable
+PARAVIRT.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -34,7 +34,7 @@ config SECURITY
+ config KAISER
+ bool "Remove the kernel mapping in user mode"
+ default y
+- depends on X86_64 && SMP && !PARAVIRT
++ depends on X86_64 && SMP
+ help
+ This enforces a strict kernel and user space isolation, in order
+ to close hardware side channels on kernel address information.
diff --git a/queue/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch b/queue/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch
new file mode 100644
index 0000000..804ffac
--- /dev/null
+++ b/queue/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch
@@ -0,0 +1,97 @@
+From foo@baz Wed Jan 3 20:37:21 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 2 Jan 2018 14:19:48 +0100
+Subject: x86/kaiser: Rename and simplify X86_FEATURE_KAISER handling
+
+From: Borislav Petkov <bp@suse.de>
+
+
+Concentrate it in arch/x86/mm/kaiser.c and use the upstream string "nopti".
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt | 2 +-
+ arch/x86/kernel/cpu/common.c | 18 ------------------
+ arch/x86/mm/kaiser.c | 20 +++++++++++++++++++-
+ 3 files changed, 20 insertions(+), 20 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2763,7 +2763,7 @@ bytes respectively. Such letter suffixes
+
+ nojitter [IA-64] Disables jitter checking for ITC timers.
+
+- nokaiser [X86-64] Disable KAISER isolation of kernel from user.
++ nopti [X86-64] Disable KAISER isolation of kernel from user.
+
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -179,20 +179,6 @@ static int __init x86_pcid_setup(char *s
+ return 1;
+ }
+ __setup("nopcid", x86_pcid_setup);
+-
+-static int __init x86_nokaiser_setup(char *s)
+-{
+- /* nokaiser doesn't accept parameters */
+- if (s)
+- return -EINVAL;
+-#ifdef CONFIG_KAISER
+- kaiser_enabled = 0;
+- setup_clear_cpu_cap(X86_FEATURE_KAISER);
+- pr_info("nokaiser: KAISER feature disabled\n");
+-#endif
+- return 0;
+-}
+-early_param("nokaiser", x86_nokaiser_setup);
+ #endif
+
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -813,10 +799,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+
+ init_scattered_cpuid_features(c);
+-#ifdef CONFIG_KAISER
+- if (kaiser_enabled)
+- set_cpu_cap(c, X86_FEATURE_KAISER);
+-#endif
+ }
+
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -274,8 +274,13 @@ void __init kaiser_init(void)
+ {
+ int cpu;
+
+- if (!kaiser_enabled)
++ if (!kaiser_enabled) {
++ setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ return;
++ }
++
++ setup_force_cpu_cap(X86_FEATURE_KAISER);
++
+ kaiser_init_all_pgds();
+
+ for_each_possible_cpu(cpu) {
+@@ -418,3 +423,16 @@ void kaiser_flush_tlb_on_return_to_user(
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
++
++static int __init x86_nokaiser_setup(char *s)
++{
++ /* nopti doesn't accept parameters */
++ if (s)
++ return -EINVAL;
++
++ kaiser_enabled = 0;
++ pr_info("Kernel/User page tables isolation: disabled\n");
++
++ return 0;
++}
++early_param("nopti", x86_nokaiser_setup);
diff --git a/queue/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch b/queue/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
new file mode 100644
index 0000000..f7b9b05
--- /dev/null
+++ b/queue/x86-mm-32-move-setup_clear_cpu_cap-x86_feature_pcid-earlier.patch
@@ -0,0 +1,62 @@
+From b8b7abaed7a49b350f8ba659ddc264b04931d581 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 17 Sep 2017 09:03:50 -0700
+Subject: x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID) earlier
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit b8b7abaed7a49b350f8ba659ddc264b04931d581 upstream.
+
+Otherwise we might have the PCID feature bit set during cpu_init().
+
+This is just for robustness. I haven't seen any actual bugs here.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels")
+Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.1505663533.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 8 --------
+ arch/x86/kernel/cpu/common.c | 8 ++++++++
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -22,14 +22,6 @@
+
+ void __init check_bugs(void)
+ {
+-#ifdef CONFIG_X86_32
+- /*
+- * Regardless of whether PCID is enumerated, the SDM says
+- * that it can't be enabled in 32-bit mode.
+- */
+- setup_clear_cpu_cap(X86_FEATURE_PCID);
+-#endif
+-
+ identify_boot_cpu();
+
+ if (!IS_ENABLED(CONFIG_SMP)) {
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -890,6 +890,14 @@ static void __init early_identify_cpu(st
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
+ fpu__init_system(c);
++
++#ifdef CONFIG_X86_32
++ /*
++ * Regardless of whether PCID is enumerated, the SDM says
++ * that it can't be enabled in 32-bit mode.
++ */
++ setup_clear_cpu_cap(X86_FEATURE_PCID);
++#endif
+ }
+
+ void __init early_cpu_init(void)
diff --git a/queue/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch b/queue/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
new file mode 100644
index 0000000..d110708
--- /dev/null
+++ b/queue/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
@@ -0,0 +1,43 @@
+From 924c6b900cfdf376b07bccfd80e62b21914f8a5a Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 8 Oct 2017 21:53:05 -0700
+Subject: x86/mm/64: Fix reboot interaction with CR4.PCIDE
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 924c6b900cfdf376b07bccfd80e62b21914f8a5a upstream.
+
+Trying to reboot via real mode fails with PCID on: long mode cannot
+be exited while CR4.PCIDE is set. (No, I have no idea why, but the
+SDM and actual CPUs are in agreement here.) The result is a GPF and
+a hang instead of a reboot.
+
+I didn't catch this in testing because neither my computer nor my VM
+reboots this way. I can trigger it with reboot=bios, though.
+
+Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
+Reported-and-tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Link: https://lkml.kernel.org/r/f1e7d965998018450a7a70c2823873686a8b21c0.1507524746.git.luto@kernel.org
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/reboot.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -106,6 +106,10 @@ void __noreturn machine_real_restart(uns
+ load_cr3(initial_page_table);
+ #else
+ write_cr3(real_mode_header->trampoline_pgd);
++
++ /* Exiting long mode will fail if CR4.PCIDE is set. */
++ if (static_cpu_has(X86_FEATURE_PCID))
++ cr4_clear_bits(X86_CR4_PCIDE);
+ #endif
+
+ /* Jump to the identity-mapped low memory code */
diff --git a/queue/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch b/queue/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
new file mode 100644
index 0000000..4bbaccd
--- /dev/null
+++ b/queue/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
@@ -0,0 +1,74 @@
+From 0790c9aad84901ca1bdc14746175549c8b5da215 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:20 -0700
+Subject: x86/mm: Add the 'nopcid' boot option to turn off PCID
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 0790c9aad84901ca1bdc14746175549c8b5da215 upstream.
+
+The parameter is only present on x86_64 systems to save a few bytes,
+as PCID is always disabled on x86_32.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ Documentation/kernel-parameters.txt | 2 ++
+ arch/x86/kernel/cpu/common.c | 18 ++++++++++++++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2795,6 +2795,8 @@ bytes respectively. Such letter suffixes
+ nopat [X86] Disable PAT (page attribute table extension of
+ pagetables) support.
+
++ nopcid [X86-64] Disable the PCID cpu feature.
++
+ norandmaps Don't use address space randomization. Equivalent to
+ echo 0 > /proc/sys/kernel/randomize_va_space
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -163,6 +163,24 @@ static int __init x86_mpx_setup(char *s)
+ }
+ __setup("nompx", x86_mpx_setup);
+
++#ifdef CONFIG_X86_64
++static int __init x86_pcid_setup(char *s)
++{
++ /* require an exact match without trailing characters */
++ if (strlen(s))
++ return 0;
++
++ /* do not emit a message if the feature is not present */
++ if (!boot_cpu_has(X86_FEATURE_PCID))
++ return 1;
++
++ setup_clear_cpu_cap(X86_FEATURE_PCID);
++ pr_info("nopcid: PCID feature disabled\n");
++ return 1;
++}
++__setup("nopcid", x86_pcid_setup);
++#endif
++
+ static int __init x86_noinvpcid_setup(char *s)
+ {
+ /* noinvpcid doesn't accept parameters */
diff --git a/queue/x86-mm-disable-pcid-on-32-bit-kernels.patch b/queue/x86-mm-disable-pcid-on-32-bit-kernels.patch
new file mode 100644
index 0000000..58abb0a
--- /dev/null
+++ b/queue/x86-mm-disable-pcid-on-32-bit-kernels.patch
@@ -0,0 +1,78 @@
+From cba4671af7550e008f7a7835f06df0763825bf3e Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:19 -0700
+Subject: x86/mm: Disable PCID on 32-bit kernels
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit cba4671af7550e008f7a7835f06df0763825bf3e upstream.
+
+32-bit kernels on new hardware will see PCID in CPUID, but PCID can
+only be used in 64-bit mode. Rather than making all PCID code
+conditional, just disable the feature on 32-bit builds.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/2e391769192a4d31b808410c383c6bf0734bc6ea.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/disabled-features.h | 4 +++-
+ arch/x86/kernel/cpu/bugs.c | 8 ++++++++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -21,11 +21,13 @@
+ # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
+ # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
+ # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
++# define DISABLE_PCID 0
+ #else
+ # define DISABLE_VME 0
+ # define DISABLE_K6_MTRR 0
+ # define DISABLE_CYRIX_ARR 0
+ # define DISABLE_CENTAUR_MCR 0
++# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
+ #endif /* CONFIG_X86_64 */
+
+ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+@@ -43,7 +45,7 @@
+ #define DISABLED_MASK1 0
+ #define DISABLED_MASK2 0
+ #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
+-#define DISABLED_MASK4 0
++#define DISABLED_MASK4 (DISABLE_PCID)
+ #define DISABLED_MASK5 0
+ #define DISABLED_MASK6 0
+ #define DISABLED_MASK7 0
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -19,6 +19,14 @@
+
+ void __init check_bugs(void)
+ {
++#ifdef CONFIG_X86_32
++ /*
++ * Regardless of whether PCID is enumerated, the SDM says
++ * that it can't be enabled in 32-bit mode.
++ */
++ setup_clear_cpu_cap(X86_FEATURE_PCID);
++#endif
++
+ identify_boot_cpu();
+ #ifndef CONFIG_SMP
+ pr_info("CPU: ");
diff --git a/queue/x86-mm-enable-cr4.pcide-on-supported-systems.patch b/queue/x86-mm-enable-cr4.pcide-on-supported-systems.patch
new file mode 100644
index 0000000..8722495
--- /dev/null
+++ b/queue/x86-mm-enable-cr4.pcide-on-supported-systems.patch
@@ -0,0 +1,108 @@
+From 660da7c9228f685b2ebe664f9fd69aaddcc420b5 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 29 Jun 2017 08:53:21 -0700
+Subject: x86/mm: Enable CR4.PCIDE on supported systems
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5 upstream.
+
+We can use PCID if the CPU has PCID and PGE and we're not on Xen.
+
+By itself, this has no effect. A followup patch will start using PCID.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Juergen Gross <jgross@suse.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h | 8 ++++++++
+ arch/x86/kernel/cpu/common.c | 22 ++++++++++++++++++++++
+ arch/x86/xen/enlighten.c | 6 ++++++
+ 3 files changed, 36 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -191,6 +191,14 @@ static inline void __flush_tlb_all(void)
+ __flush_tlb_global();
+ else
+ __flush_tlb();
++
++ /*
++ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
++ * we'd end up flushing kernel translations for the current ASID but
++ * we might fail to flush kernel translations for other cached ASIDs.
++ *
++ * To avoid this issue, we force PCID off if PGE is off.
++ */
+ }
+
+ static inline void __flush_tlb_one(unsigned long addr)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -324,6 +324,25 @@ static __always_inline void setup_smap(s
+ }
+ }
+
++static void setup_pcid(struct cpuinfo_x86 *c)
++{
++ if (cpu_has(c, X86_FEATURE_PCID)) {
++ if (cpu_has(c, X86_FEATURE_PGE)) {
++ cr4_set_bits(X86_CR4_PCIDE);
++ } else {
++ /*
++ * flush_tlb_all(), as currently implemented, won't
++ * work if PCID is on but PGE is not. Since that
++ * combination doesn't exist on real hardware, there's
++ * no reason to try to fully support it, but it's
++ * polite to avoid corrupting data if we're on
++ * an improperly configured VM.
++ */
++ clear_cpu_cap(c, X86_FEATURE_PCID);
++ }
++ }
++}
++
+ /*
+ * Protection Keys are not available in 32-bit mode.
+ */
+@@ -1082,6 +1101,9 @@ static void identify_cpu(struct cpuinfo_
+ setup_smep(c);
+ setup_smap(c);
+
++ /* Set up PCID */
++ setup_pcid(c);
++
+ /*
+ * The vendor-specific functions might have changed features.
+ * Now we do "generic changes."
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -444,6 +444,12 @@ static void __init xen_init_cpuid_mask(v
+ ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
+ (1 << X86_FEATURE_ACC)); /* thermal monitoring */
+
++ /*
++ * Xen PV would need some work to support PCID: CR3 handling as well
++ * as xen_flush_tlb_others() would need updating.
++ */
++ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32)); /* disable PCID */
++
+ if (!xen_initial_domain())
+ cpuid_leaf1_edx_mask &=
+ ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */
diff --git a/queue/x86-mm-make-flush_tlb_mm_range-more-predictable.patch b/queue/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
new file mode 100644
index 0000000..4f7f58e
--- /dev/null
+++ b/queue/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
@@ -0,0 +1,81 @@
+From ce27374fabf553153c3f53efcaa9bfab9216bd8c Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 22 Apr 2017 00:01:21 -0700
+Subject: x86/mm: Make flush_tlb_mm_range() more predictable
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit ce27374fabf553153c3f53efcaa9bfab9216bd8c upstream.
+
+I'm about to rewrite the function almost completely, but first I
+want to get a functional change out of the way. Currently, if
+flush_tlb_mm_range() does not flush the local TLB at all, it will
+never do individual page flushes on remote CPUs. This seems to be
+an accident, and preserving it will be awkward. Let's change it
+first so that any regressions in the rewrite will be easier to
+bisect and so that the rewrite can attempt to change no visible
+behavior at all.
+
+The fix is simple: we can simply avoid short-circuiting the
+calculation of base_pages_to_flush.
+
+As a side effect, this also eliminates a potential corner case: if
+tlb_single_page_flush_ceiling == TLB_FLUSH_ALL, flush_tlb_mm_range()
+could have ended up flushing the entire address space one page at a
+time.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/4b29b771d9975aad7154c314534fec235618175a.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/tlb.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -307,6 +307,12 @@ void flush_tlb_mm_range(struct mm_struct
+ unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
+
+ preempt_disable();
++
++ if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
++ base_pages_to_flush = (end - start) >> PAGE_SHIFT;
++ if (base_pages_to_flush > tlb_single_page_flush_ceiling)
++ base_pages_to_flush = TLB_FLUSH_ALL;
++
+ if (current->active_mm != mm) {
+ /* Synchronize with switch_mm. */
+ smp_mb();
+@@ -323,15 +329,11 @@ void flush_tlb_mm_range(struct mm_struct
+ goto out;
+ }
+
+- if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
+- base_pages_to_flush = (end - start) >> PAGE_SHIFT;
+-
+ /*
+ * Both branches below are implicit full barriers (MOV to CR or
+ * INVLPG) that synchronize with switch_mm.
+ */
+- if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
+- base_pages_to_flush = TLB_FLUSH_ALL;
++ if (base_pages_to_flush == TLB_FLUSH_ALL) {
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ local_flush_tlb();
+ } else {
diff --git a/queue/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch b/queue/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
new file mode 100644
index 0000000..2153f10
--- /dev/null
+++ b/queue/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
@@ -0,0 +1,105 @@
+From d698c90a07e8c70354dad23e61434edf7de2c91c Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Mon, 22 May 2017 15:30:01 -0700
+Subject: [PATCH] x86/mm: Reimplement flush_tlb_page() using
+ flush_tlb_mm_range()
+
+commit ca6c99c0794875c6d1db6e22f246699691ab7e6b upstream.
+
+flush_tlb_page() was very similar to flush_tlb_mm_range() except that
+it had a couple of issues:
+
+ - It was missing an smp_mb() in the case where
+ current->active_mm != mm. (This is a longstanding bug reported by Nadav Amit)
+
+ - It was missing tracepoints and vm counter updates.
+
+The only reason that I can see for keeping it at as a separate
+function is that it could avoid a few branches that
+flush_tlb_mm_range() needs to decide to flush just one page. This
+hardly seems worthwhile. If we decide we want to get rid of those
+branches again, a better way would be to introduce an
+__flush_tlb_mm_range() helper and make both flush_tlb_page() and
+flush_tlb_mm_range() use it.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 8e7ae9e6c59a..abcd615ea27e 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -297,11 +297,15 @@ static inline void flush_tlb_kernel_range(unsigned long start,
+ flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
++static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
++{
++ flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
++}
++
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 9db9260a5e9f..38f6e37959af 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -356,33 +356,6 @@ out:
+ preempt_enable();
+ }
+
+-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
+-{
+- struct mm_struct *mm = vma->vm_mm;
+-
+- preempt_disable();
+-
+- if (current->active_mm == mm) {
+- if (current->mm) {
+- /*
+- * Implicit full barrier (INVLPG) that synchronizes
+- * with switch_mm.
+- */
+- __flush_tlb_one(start);
+- } else {
+- leave_mm(smp_processor_id());
+-
+- /* Synchronize with switch_mm. */
+- smp_mb();
+- }
+- }
+-
+- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+- flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
+-
+- preempt_enable();
+-}
+-
+ static void do_flush_tlb_all(void *info)
+ {
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+--
+2.15.0
+
diff --git a/queue/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch b/queue/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
new file mode 100644
index 0000000..1f63ed5
--- /dev/null
+++ b/queue/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
@@ -0,0 +1,101 @@
+From 29961b59a51f8c6838a26a45e871a7ed6771809b Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 22 Apr 2017 00:01:20 -0700
+Subject: x86/mm: Remove flush_tlb() and flush_tlb_current_task()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 29961b59a51f8c6838a26a45e871a7ed6771809b upstream.
+
+I was trying to figure out what how flush_tlb_current_task() would
+possibly work correctly if current->mm != current->active_mm, but I
+realized I could spare myself the effort: it has no callers except
+the unused flush_tlb() macro.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/e52d64c11690f85e9f1d69d7b48cc2269cd2e94b.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/tlbflush.h | 9 ---------
+ arch/x86/mm/tlb.c | 17 -----------------
+ 2 files changed, 26 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -205,7 +205,6 @@ static inline void __flush_tlb_one(unsig
+ /*
+ * TLB flushing:
+ *
+- * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes TLBs
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+@@ -237,11 +236,6 @@ static inline void flush_tlb_all(void)
+ __flush_tlb_all();
+ }
+
+-static inline void flush_tlb(void)
+-{
+- __flush_tlb_up();
+-}
+-
+ static inline void local_flush_tlb(void)
+ {
+ __flush_tlb_up();
+@@ -303,14 +297,11 @@ static inline void flush_tlb_kernel_rang
+ flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_current_task(void);
+ extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+-#define flush_tlb() flush_tlb_current_task()
+-
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -287,23 +287,6 @@ void native_flush_tlb_others(const struc
+ smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
+ }
+
+-void flush_tlb_current_task(void)
+-{
+- struct mm_struct *mm = current->mm;
+-
+- preempt_disable();
+-
+- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-
+- /* This is an implicit full barrier that synchronizes with switch_mm. */
+- local_flush_tlb();
+-
+- trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
+- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+- flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
+- preempt_enable();
+-}
+-
+ /*
+ * See Documentation/x86/tlb.txt for details. We choose 33
+ * because it is large enough to cover the vast majority (at
diff --git a/queue/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch b/queue/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
new file mode 100644
index 0000000..649fa38
--- /dev/null
+++ b/queue/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
@@ -0,0 +1,305 @@
+From fd56dcc62b454fbbc7d9d6822b55953e5e945976 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 28 May 2017 10:00:14 -0700
+Subject: [PATCH] x86/mm: Remove the UP asm/tlbflush.h code, always use the
+ (formerly) SMP code
+
+commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream.
+
+The UP asm/tlbflush.h generates somewhat nicer code than the SMP version.
+Aside from that, it's fallen quite a bit behind the SMP code:
+
+ - flush_tlb_mm_range() didn't flush individual pages if the range
+ was small.
+
+ - The lazy TLB code was much weaker. This usually wouldn't matter,
+ but, if a kernel thread flushed its lazy "active_mm" more than
+ once (due to reclaim or similar), it wouldn't be unlazied and
+ would instead pointlessly flush repeatedly.
+
+ - Tracepoints were missing.
+
+Aside from that, simply having the UP code around was a maintanence
+burden, since it means that any change to the TLB flush code had to
+make sure not to break it.
+
+Simplify everything by deleting the UP code.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 951dc26b1a5e..63e83fe8987c 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -45,7 +45,7 @@ config X86
+ select ARCH_USE_CMPXCHG_LOCKREF if X86_64
+ select ARCH_USE_QUEUED_RWLOCKS
+ select ARCH_USE_QUEUED_SPINLOCKS
+- select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
++ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANT_IPC_PARSE_VERSION if X86_32
+diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
+index 59405a248fc2..9b76cd331990 100644
+--- a/arch/x86/include/asm/hardirq.h
++++ b/arch/x86/include/asm/hardirq.h
+@@ -22,8 +22,8 @@ typedef struct {
+ #ifdef CONFIG_SMP
+ unsigned int irq_resched_count;
+ unsigned int irq_call_count;
+- unsigned int irq_tlb_count;
+ #endif
++ unsigned int irq_tlb_count;
+ #ifdef CONFIG_X86_THERMAL_VECTOR
+ unsigned int irq_thermal_count;
+ #endif
+diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
+index 1ea0baef1175..6deccb456060 100644
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -25,12 +25,6 @@ typedef struct {
+ atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
+ } mm_context_t;
+
+-#ifdef CONFIG_SMP
+ void leave_mm(int cpu);
+-#else
+-static inline void leave_mm(int cpu)
+-{
+-}
+-#endif
+
+ #endif /* _ASM_X86_MMU_H */
+diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
+index d8abfcf524d1..d15f740111c9 100644
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -98,10 +98,8 @@ static inline void load_mm_ldt(struct mm_struct *mm)
+
+ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+-#ifdef CONFIG_SMP
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+-#endif
+ }
+
+ static inline int init_new_context(struct task_struct *tsk,
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index abcd615ea27e..12dedd6c9e42 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -7,6 +7,7 @@
+ #include <asm/processor.h>
+ #include <asm/cpufeature.h>
+ #include <asm/special_insns.h>
++#include <asm/smp.h>
+
+ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+@@ -65,10 +66,8 @@ static inline void invpcid_flush_all_nonglobals(void)
+ #endif
+
+ struct tlb_state {
+-#ifdef CONFIG_SMP
+ struct mm_struct *active_mm;
+ int state;
+-#endif
+
+ /*
+ * Access to this CR4 shadow and to H/W CR4 is protected by
+@@ -216,79 +215,6 @@ static inline void __flush_tlb_one(unsigned long addr)
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+-#ifndef CONFIG_SMP
+-
+-/* "_up" is for UniProcessor.
+- *
+- * This is a helper for other header functions. *Not* intended to be called
+- * directly. All global TLB flushes need to either call this, or to bump the
+- * vm statistics themselves.
+- */
+-static inline void __flush_tlb_up(void)
+-{
+- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+- __flush_tlb();
+-}
+-
+-static inline void flush_tlb_all(void)
+-{
+- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+- __flush_tlb_all();
+-}
+-
+-static inline void local_flush_tlb(void)
+-{
+- __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm(struct mm_struct *mm)
+-{
+- if (mm == current->active_mm)
+- __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_page(struct vm_area_struct *vma,
+- unsigned long addr)
+-{
+- if (vma->vm_mm == current->active_mm)
+- __flush_tlb_one(addr);
+-}
+-
+-static inline void flush_tlb_range(struct vm_area_struct *vma,
+- unsigned long start, unsigned long end)
+-{
+- if (vma->vm_mm == current->active_mm)
+- __flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm_range(struct mm_struct *mm,
+- unsigned long start, unsigned long end, unsigned long vmflag)
+-{
+- if (mm == current->active_mm)
+- __flush_tlb_up();
+-}
+-
+-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
+- struct mm_struct *mm,
+- unsigned long start,
+- unsigned long end)
+-{
+-}
+-
+-static inline void reset_lazy_tlbstate(void)
+-{
+-}
+-
+-static inline void flush_tlb_kernel_range(unsigned long start,
+- unsigned long end)
+-{
+- flush_tlb_all();
+-}
+-
+-#else /* SMP */
+-
+-#include <asm/smp.h>
+-
+ #define local_flush_tlb() __flush_tlb()
+
+ #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
+@@ -319,8 +245,6 @@ static inline void reset_lazy_tlbstate(void)
+ this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
+ }
+
+-#endif /* SMP */
+-
+ #ifndef CONFIG_PARAVIRT
+ #define flush_tlb_others(mask, mm, start, end) \
+ native_flush_tlb_others(mask, mm, start, end)
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 8a427715f541..0381e949de17 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -762,10 +762,8 @@ void __init zone_sizes_init(void)
+ }
+
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+-#ifdef CONFIG_SMP
+ .active_mm = &init_mm,
+ .state = 0,
+-#endif
+ .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
+ };
+ EXPORT_SYMBOL_GPL(cpu_tlbstate);
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 38f6e37959af..7882e4e3c113 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -15,7 +15,7 @@
+ #include <linux/debugfs.h>
+
+ /*
+- * Smarter SMP flushing macros.
++ * TLB flushing, formerly SMP-only
+ * c/o Linus Torvalds.
+ *
+ * These mean you can really definitely utterly forget about
+@@ -28,8 +28,6 @@
+ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+ */
+
+-#ifdef CONFIG_SMP
+-
+ struct flush_tlb_info {
+ struct mm_struct *flush_mm;
+ unsigned long flush_start;
+@@ -59,8 +57,6 @@ void leave_mm(int cpu)
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+
+-#endif /* CONFIG_SMP */
+-
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+ {
+@@ -91,10 +87,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+ }
+
+-#ifdef CONFIG_SMP
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
+-#endif
+
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+@@ -152,9 +146,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ if (unlikely(prev->context.ldt != next->context.ldt))
+ load_mm_ldt(next);
+ #endif
+- }
+-#ifdef CONFIG_SMP
+- else {
++ } else {
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+
+@@ -181,11 +173,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ load_mm_ldt(next);
+ }
+ }
+-#endif
+ }
+
+-#ifdef CONFIG_SMP
+-
+ /*
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+@@ -440,5 +429,3 @@ static int __init create_tlb_single_page_flush_ceiling(void)
+ return 0;
+ }
+ late_initcall(create_tlb_single_page_flush_ceiling);
+-
+-#endif /* CONFIG_SMP */
+--
+2.15.0
+
diff --git a/queue/x86-msr-add-definitions-for-new-speculation-control-msrs.patch b/queue/x86-msr-add-definitions-for-new-speculation-control-msrs.patch
new file mode 100644
index 0000000..884d448
--- /dev/null
+++ b/queue/x86-msr-add-definitions-for-new-speculation-control-msrs.patch
@@ -0,0 +1,63 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:12 +0000
+Subject: x86/msr: Add definitions for new speculation control MSRs
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410)
+
+Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES.
+
+See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -37,6 +37,13 @@
+ #define EFER_FFXSR (1<<_EFER_FFXSR)
+
+ /* Intel MSRs. Some also available on other CPUs */
++#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
++#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
++#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
++
++#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
++#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
++
+ #define MSR_IA32_PERFCTR0 0x000000c1
+ #define MSR_IA32_PERFCTR1 0x000000c2
+ #define MSR_FSB_FREQ 0x000000cd
+@@ -50,6 +57,11 @@
+ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+
+ #define MSR_MTRRcap 0x000000fe
++
++#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
++#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
++#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
++
+ #define MSR_IA32_BBL_CR_CTL 0x00000119
+ #define MSR_IA32_BBL_CR_CTL3 0x0000011e
+
diff --git a/queue/x86-nospec-fix-header-guards-names.patch b/queue/x86-nospec-fix-header-guards-names.patch
new file mode 100644
index 0000000..a9bcc71
--- /dev/null
+++ b/queue/x86-nospec-fix-header-guards-names.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: Borislav Petkov <bp@suse.de>
+Date: Fri, 26 Jan 2018 13:11:37 +0100
+Subject: x86/nospec: Fix header guards names
+
+From: Borislav Petkov <bp@suse.de>
+
+(cherry picked from commit 7a32fc51ca938e67974cbb9db31e1a43f98345a9)
+
+... to adhere to the _ASM_X86_ naming scheme.
+
+No functional change.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: riel@redhat.com
+Cc: ak@linux.intel.com
+Cc: peterz@infradead.org
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: jikos@kernel.org
+Cc: luto@amacapital.net
+Cc: dave.hansen@intel.com
+Cc: torvalds@linux-foundation.org
+Cc: keescook@google.com
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Cc: pjt@google.com
+Link: https://lkml.kernel.org/r/20180126121139.31959-3-bp@alien8.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -1,7 +1,7 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+
+-#ifndef __NOSPEC_BRANCH_H__
+-#define __NOSPEC_BRANCH_H__
++#ifndef _ASM_X86_NOSPEC_BRANCH_H_
++#define _ASM_X86_NOSPEC_BRANCH_H_
+
+ #include <asm/alternative.h>
+ #include <asm/alternative-asm.h>
+@@ -232,4 +232,4 @@ static inline void indirect_branch_predi
+ }
+
+ #endif /* __ASSEMBLY__ */
+-#endif /* __NOSPEC_BRANCH_H__ */
++#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/queue/x86-paravirt-dont-patch-flush_tlb_single.patch b/queue/x86-paravirt-dont-patch-flush_tlb_single.patch
new file mode 100644
index 0000000..4783222
--- /dev/null
+++ b/queue/x86-paravirt-dont-patch-flush_tlb_single.patch
@@ -0,0 +1,68 @@
+From 7f6999b379b7f1c378345e436be46df760668145 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:30 +0100
+Subject: [PATCH] x86/paravirt: Dont patch flush_tlb_single
+
+commit a035795499ca1c2bd1928808d1a156eda1420383 upstream
+
+native_flush_tlb_single() will be changed with the upcoming
+PAGE_TABLE_ISOLATION feature. This requires to have more code in
+there than INVLPG.
+
+Remove the paravirt patching for it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Borislav Petkov <bpetkov@suse.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David Laight <David.Laight@aculab.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: Eduardo Valentin <eduval@amazon.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: aliguori@amazon.com
+Cc: daniel.gruss@iaik.tugraz.at
+Cc: hughd@google.com
+Cc: keescook@google.com
+Cc: linux-mm@kvack.org
+Cc: michael.schwarz@iaik.tugraz.at
+Cc: moritz.lipp@iaik.tugraz.at
+Cc: richard.fellner@student.tugraz.at
+Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
+index e70087a04cc8..68f8273b152e 100644
+--- a/arch/x86/kernel/paravirt_patch_64.c
++++ b/arch/x86/kernel/paravirt_patch_64.c
+@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+ DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+ DEF_NATIVE(pv_cpu_ops, clts, "clts");
+ DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+
+@@ -59,7 +58,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+ PATCH_SITE(pv_mmu_ops, read_cr3);
+ PATCH_SITE(pv_mmu_ops, write_cr3);
+ PATCH_SITE(pv_cpu_ops, clts);
+- PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+ PATCH_SITE(pv_cpu_ops, wbinvd);
+ #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+--
+2.15.0
+
diff --git a/queue/x86-paravirt-remove-noreplace-paravirt-cmdline-option.patch b/queue/x86-paravirt-remove-noreplace-paravirt-cmdline-option.patch
new file mode 100644
index 0000000..a670a21
--- /dev/null
+++ b/queue/x86-paravirt-remove-noreplace-paravirt-cmdline-option.patch
@@ -0,0 +1,91 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Tue, 30 Jan 2018 22:13:33 -0600
+Subject: x86/paravirt: Remove 'noreplace-paravirt' cmdline option
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+
+(cherry picked from commit 12c69f1e94c89d40696e83804dd2f0965b5250cd)
+
+The 'noreplace-paravirt' option disables paravirt patching, leaving the
+original pv indirect calls in place.
+
+That's highly incompatible with retpolines, unless we want to uglify
+paravirt even further and convert the paravirt calls to retpolines.
+
+As far as I can tell, the option doesn't seem to be useful for much
+other than introducing surprising corner cases and making the kernel
+vulnerable to Spectre v2. It was probably a debug option from the early
+paravirt days. So just remove it.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Rusty Russell <rusty@rustcorp.com.au>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Alok Kataria <akataria@vmware.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: David Woodhouse <dwmw2@infradead.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Link: https://lkml.kernel.org/r/20180131041333.2x6blhxirc2kclrq@treble
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt | 2 --
+ arch/x86/kernel/alternative.c | 14 --------------
+ 2 files changed, 16 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2805,8 +2805,6 @@ bytes respectively. Such letter suffixes
+ norandmaps Don't use address space randomization. Equivalent to
+ echo 0 > /proc/sys/kernel/randomize_va_space
+
+- noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops
+-
+ noreplace-smp [X86-32,SMP] Don't replace SMP instructions
+ with UP alternatives
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(ch
+ }
+ __setup("noreplace-smp", setup_noreplace_smp);
+
+-#ifdef CONFIG_PARAVIRT
+-static int __initdata_or_module noreplace_paravirt = 0;
+-
+-static int __init setup_noreplace_paravirt(char *str)
+-{
+- noreplace_paravirt = 1;
+- return 1;
+-}
+-__setup("noreplace-paravirt", setup_noreplace_paravirt);
+-#endif
+-
+ #define DPRINTK(fmt, args...) \
+ do { \
+ if (debug_alternative) \
+@@ -588,9 +577,6 @@ void __init_or_module apply_paravirt(str
+ struct paravirt_patch_site *p;
+ char insnbuf[MAX_PATCH_LEN];
+
+- if (noreplace_paravirt)
+- return;
+-
+ for (p = start; p < end; p++) {
+ unsigned int used;
+
diff --git a/queue/x86-pti-do-not-enable-pti-on-cpus-which-are-not-vulnerable-to-meltdown.patch b/queue/x86-pti-do-not-enable-pti-on-cpus-which-are-not-vulnerable-to-meltdown.patch
new file mode 100644
index 0000000..ffbcbc7
--- /dev/null
+++ b/queue/x86-pti-do-not-enable-pti-on-cpus-which-are-not-vulnerable-to-meltdown.patch
@@ -0,0 +1,112 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:13 +0000
+Subject: x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit fec9434a12f38d3aeafeb75711b71d8a1fdef621)
+
+Also, for CPUs which don't speculate at all, don't report that they're
+vulnerable to the Spectre variants either.
+
+Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it
+for now, even though that could be done with a simple comparison, on the
+assumption that we'll have more to add.
+
+Based on suggestions from Dave Hansen and Alan Cox.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@intel.com>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 43 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -44,6 +44,8 @@
+ #include <asm/pat.h>
+ #include <asm/microcode.h>
+ #include <asm/microcode_intel.h>
++#include <asm/intel-family.h>
++#include <asm/cpu_device_id.h>
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+ #include <asm/uv/uv.h>
+@@ -838,6 +840,41 @@ static void identify_cpu_without_cpuid(s
+ #endif
+ }
+
++static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
++ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
++ { X86_VENDOR_CENTAUR, 5 },
++ { X86_VENDOR_INTEL, 5 },
++ { X86_VENDOR_NSC, 5 },
++ { X86_VENDOR_ANY, 4 },
++ {}
++};
++
++static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
++ { X86_VENDOR_AMD },
++ {}
++};
++
++static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
++{
++ u64 ia32_cap = 0;
++
++ if (x86_match_cpu(cpu_no_meltdown))
++ return false;
++
++ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
++
++ /* Rogue Data Cache Load? No! */
++ if (ia32_cap & ARCH_CAP_RDCL_NO)
++ return false;
++
++ return true;
++}
++
+ /*
+ * Do minimum CPU detection early.
+ * Fields really needed: vendor, cpuid_level, family, model, mask,
+@@ -884,11 +921,12 @@ static void __init early_identify_cpu(st
+
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+- if (c->x86_vendor != X86_VENDOR_AMD)
+- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+-
+- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++ if (!x86_match_cpu(cpu_no_speculation)) {
++ if (cpu_vulnerable_to_meltdown(c))
++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++ }
+
+ fpu__init_system(c);
+
diff --git a/queue/x86-pti-document-fix-wrong-index.patch b/queue/x86-pti-document-fix-wrong-index.patch
new file mode 100644
index 0000000..cb7b93a
--- /dev/null
+++ b/queue/x86-pti-document-fix-wrong-index.patch
@@ -0,0 +1,32 @@
+From 98f0fceec7f84d80bc053e49e596088573086421 Mon Sep 17 00:00:00 2001
+From: "zhenwei.pi" <zhenwei.pi@youruncloud.com>
+Date: Thu, 18 Jan 2018 09:04:52 +0800
+Subject: x86/pti: Document fix wrong index
+
+From: zhenwei.pi <zhenwei.pi@youruncloud.com>
+
+commit 98f0fceec7f84d80bc053e49e596088573086421 upstream.
+
+In section <2. Runtime Cost>, fix wrong index.
+
+Signed-off-by: zhenwei.pi <zhenwei.pi@youruncloud.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: dave.hansen@linux.intel.com
+Link: https://lkml.kernel.org/r/1516237492-27739-1-git-send-email-zhenwei.pi@youruncloud.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/x86/pti.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/Documentation/x86/pti.txt
++++ b/Documentation/x86/pti.txt
+@@ -78,7 +78,7 @@ this protection comes at a cost:
+ non-PTI SYSCALL entry code, so requires mapping fewer
+ things into the userspace page tables. The downside is
+ that stacks must be switched at entry time.
+- d. Global pages are disabled for all kernel structures not
++ c. Global pages are disabled for all kernel structures not
+ mapped into both kernel and userspace page tables. This
+ feature of the MMU allows different processes to share TLB
+ entries mapping the kernel. Losing the feature means more
diff --git a/queue/x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch b/queue/x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch
new file mode 100644
index 0000000..b38321d
--- /dev/null
+++ b/queue/x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch
@@ -0,0 +1,76 @@
+From pasha.tatashin@oracle.com Mon Jan 15 18:48:49 2018
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+Date: Mon, 15 Jan 2018 11:44:14 -0500
+Subject: x86/pti/efi: broken conversion from efi to kernel page table
+To: steven.sistare@oracle.com, linux-kernel@vger.kernel.org, tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, x86@kernel.org, gregkh@linuxfoundation.org, jkosina@suse.cz, hughd@google.com, dave.hansen@linux.intel.com, luto@kernel.org, stable@vger.kernel.org
+Message-ID: <20180115164414.19778-1-pasha.tatashin@oracle.com>
+
+From: Pavel Tatashin <pasha.tatashin@oracle.com>
+
+The page table order must be increased for EFI table in order to avoid a
+bug where NMI tries to change the page table to kernel page table, while
+efi page table is active.
+
+For more disccussion about this bug, see this thread:
+http://lkml.iu.edu/hypermail/linux/kernel/1801.1/00951.html
+
+Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
+Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgalloc.h | 11 +++++++++++
+ arch/x86/mm/pgtable.c | 7 -------
+ arch/x86/platform/efi/efi_64.c | 2 +-
+ 3 files changed, 12 insertions(+), 8 deletions(-)
+
+Changelog:
+ v1 - v2: Fixed compiling warning
+
+--- a/arch/x86/include/asm/pgalloc.h
++++ b/arch/x86/include/asm/pgalloc.h
+@@ -27,6 +27,17 @@ static inline void paravirt_release_pud(
+ */
+ extern gfp_t __userpte_alloc_gfp;
+
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++/*
++ * Instead of one PGD, we acquire two PGDs. Being order-1, it is
++ * both 8k in size and 8k-aligned. That lets us just flip bit 12
++ * in a pointer to swap between the two 4k halves.
++ */
++#define PGD_ALLOCATION_ORDER 1
++#else
++#define PGD_ALLOCATION_ORDER 0
++#endif
++
+ /*
+ * Allocate and free page tables.
+ */
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd)
+ }
+ #else
+
+-/*
+- * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
+- * both 8k in size and 8k-aligned. That lets us just flip bit 12
+- * in a pointer to swap between the two 4k halves.
+- */
+-#define PGD_ALLOCATION_ORDER kaiser_enabled
+-
+ static inline pgd_t *_pgd_alloc(void)
+ {
+ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void)
+ return 0;
+
+ gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
+- efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
++ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
+ if (!efi_pgd)
+ return -ENOMEM;
+
diff --git a/queue/x86-pti-mark-constant-arrays-as-__initconst.patch b/queue/x86-pti-mark-constant-arrays-as-__initconst.patch
new file mode 100644
index 0000000..d9843e5
--- /dev/null
+++ b/queue/x86-pti-mark-constant-arrays-as-__initconst.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Feb 8 03:33:09 CET 2018
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Fri, 2 Feb 2018 22:39:23 +0100
+Subject: x86/pti: Mark constant arrays as __initconst
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+
+(cherry picked from commit 4bf5d56d429cbc96c23d809a08f63cd29e1a702e)
+
+I'm seeing build failures from the two newly introduced arrays that
+are marked 'const' and '__initdata', which are mutually exclusive:
+
+arch/x86/kernel/cpu/common.c:882:43: error: 'cpu_no_speculation' causes a section type conflict with 'e820_table_firmware_init'
+arch/x86/kernel/cpu/common.c:895:43: error: 'cpu_no_meltdown' causes a section type conflict with 'e820_table_firmware_init'
+
+The correct annotation is __initconst.
+
+Fixes: fec9434a12f3 ("x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Thomas Garnier <thgarnie@google.com>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lkml.kernel.org/r/20180202213959.611210-1-arnd@arndb.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -861,7 +861,7 @@ static void identify_cpu_without_cpuid(s
+ #endif
+ }
+
+-static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
++static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
+@@ -874,7 +874,7 @@ static const __initdata struct x86_cpu_i
+ {}
+ };
+
+-static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
++static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
+ { X86_VENDOR_AMD },
+ {}
+ };
diff --git a/queue/x86-pti-rename-bug_cpu_insecure-to-bug_cpu_meltdown.patch b/queue/x86-pti-rename-bug_cpu_insecure-to-bug_cpu_meltdown.patch
new file mode 100644
index 0000000..5dce2f6
--- /dev/null
+++ b/queue/x86-pti-rename-bug_cpu_insecure-to-bug_cpu_meltdown.patch
@@ -0,0 +1,56 @@
+From de791821c295cc61419a06fe5562288417d1bc58 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 5 Jan 2018 15:27:34 +0100
+Subject: x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit de791821c295cc61419a06fe5562288417d1bc58 upstream.
+
+Use the name associated with the particular attack which needs page table
+isolation for mitigation.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
+Cc: Jiri Koshina <jikos@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Andi Lutomirski <luto@amacapital.net>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Paul Turner <pjt@google.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Greg KH <gregkh@linux-foundation.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos
+Signed-off-by: Razvan Ghitulete <rga@amazon.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 2 +-
+ arch/x86/kernel/cpu/common.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -316,6 +316,6 @@
+ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
+ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
+ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+-#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
++#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -884,7 +884,7 @@ static void __init early_identify_cpu(st
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ /* Assume for now that ALL x86 CPUs are insecure */
+- setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+ fpu__init_system(c);
+ }
diff --git a/queue/x86-retpoline-add-initial-retpoline-support.patch b/queue/x86-retpoline-add-initial-retpoline-support.patch
new file mode 100644
index 0000000..7ea928b
--- /dev/null
+++ b/queue/x86-retpoline-add-initial-retpoline-support.patch
@@ -0,0 +1,359 @@
+From 76b043848fd22dbf7f8bf3a1452f8c70d557b860 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:25 +0000
+Subject: x86/retpoline: Add initial retpoline support
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 76b043848fd22dbf7f8bf3a1452f8c70d557b860 upstream.
+
+Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide
+the corresponding thunks. Provide assembler macros for invoking the thunks
+in the same way that GCC does, from native and inline assembler.
+
+This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In
+some circumstances, IBRS microcode features may be used instead, and the
+retpoline can be disabled.
+
+On AMD CPUs if lfence is serialising, the retpoline can be dramatically
+simplified to a simple "lfence; jmp *\reg". A future patch, after it has
+been verified that lfence really is serialising in all circumstances, can
+enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition
+to X86_FEATURE_RETPOLINE.
+
+Do not align the retpoline in the altinstr section, because there is no
+guarantee that it stays aligned when it's copied over the oldinstr during
+alternative patching.
+
+[ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks]
+[ tglx: Put actual function CALL/JMP in front of the macros, convert to
+ symbolic labels ]
+[ dwmw2: Convert back to numeric labels, merge objtool fixes ]
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig | 13 +++
+ arch/x86/Makefile | 10 ++
+ arch/x86/include/asm/asm-prototypes.h | 25 ++++++
+ arch/x86/include/asm/cpufeatures.h | 3
+ arch/x86/include/asm/nospec-branch.h | 128 ++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/common.c | 4 +
+ arch/x86/lib/Makefile | 1
+ arch/x86/lib/retpoline.S | 48 ++++++++++++
+ 8 files changed, 232 insertions(+)
+ create mode 100644 arch/x86/include/asm/nospec-branch.h
+ create mode 100644 arch/x86/lib/retpoline.S
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -408,6 +408,19 @@ config GOLDFISH
+ def_bool y
+ depends on X86_GOLDFISH
+
++config RETPOLINE
++ bool "Avoid speculative indirect branches in kernel"
++ default y
++ ---help---
++ Compile kernel with the retpoline compiler options to guard against
++ kernel-to-user data leaks by avoiding speculative indirect
++ branches. Requires a compiler with -mindirect-branch=thunk-extern
++ support for full protection. The kernel may run slower.
++
++ Without compiler support, at least indirect branches in assembler
++ code are eliminated. Since this includes the syscall entry path,
++ it is not entirely pointless.
++
+ if X86_32
+ config X86_EXTENDED_PLATFORM
+ bool "Support for extended (non-PC) x86 platforms"
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -182,6 +182,16 @@ KBUILD_CFLAGS += -fno-asynchronous-unwin
+ KBUILD_CFLAGS += $(mflags-y)
+ KBUILD_AFLAGS += $(mflags-y)
+
++# Avoid indirect branches in kernel to deal with Spectre
++ifdef CONFIG_RETPOLINE
++ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
++ ifneq ($(RETPOLINE_CFLAGS),)
++ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
++ else
++ $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.)
++ endif
++endif
++
+ archscripts: scripts_basic
+ $(Q)$(MAKE) $(build)=arch/x86/tools relocs
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -10,7 +10,32 @@
+ #include <asm/pgtable.h>
+ #include <asm/special_insns.h>
+ #include <asm/preempt.h>
++#include <asm/asm.h>
+
+ #ifndef CONFIG_X86_CMPXCHG64
+ extern void cmpxchg8b_emu(void);
+ #endif
++
++#ifdef CONFIG_RETPOLINE
++#ifdef CONFIG_X86_32
++#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
++#else
++#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
++INDIRECT_THUNK(8)
++INDIRECT_THUNK(9)
++INDIRECT_THUNK(10)
++INDIRECT_THUNK(11)
++INDIRECT_THUNK(12)
++INDIRECT_THUNK(13)
++INDIRECT_THUNK(14)
++INDIRECT_THUNK(15)
++#endif
++INDIRECT_THUNK(ax)
++INDIRECT_THUNK(bx)
++INDIRECT_THUNK(cx)
++INDIRECT_THUNK(dx)
++INDIRECT_THUNK(si)
++INDIRECT_THUNK(di)
++INDIRECT_THUNK(bp)
++INDIRECT_THUNK(sp)
++#endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -194,6 +194,9 @@
+ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
++#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
++#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
++
+ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+--- /dev/null
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -0,0 +1,128 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#ifndef __NOSPEC_BRANCH_H__
++#define __NOSPEC_BRANCH_H__
++
++#include <asm/alternative.h>
++#include <asm/alternative-asm.h>
++#include <asm/cpufeatures.h>
++
++#ifdef __ASSEMBLY__
++
++/*
++ * This should be used immediately before a retpoline alternative. It tells
++ * objtool where the retpolines are so that it can make sense of the control
++ * flow by just reading the original instruction(s) and ignoring the
++ * alternatives.
++ */
++.macro ANNOTATE_NOSPEC_ALTERNATIVE
++ .Lannotate_\@:
++ .pushsection .discard.nospec
++ .long .Lannotate_\@ - .
++ .popsection
++.endm
++
++/*
++ * These are the bare retpoline primitives for indirect jmp and call.
++ * Do not use these directly; they only exist to make the ALTERNATIVE
++ * invocation below less ugly.
++ */
++.macro RETPOLINE_JMP reg:req
++ call .Ldo_rop_\@
++.Lspec_trap_\@:
++ pause
++ jmp .Lspec_trap_\@
++.Ldo_rop_\@:
++ mov \reg, (%_ASM_SP)
++ ret
++.endm
++
++/*
++ * This is a wrapper around RETPOLINE_JMP so the called function in reg
++ * returns to the instruction after the macro.
++ */
++.macro RETPOLINE_CALL reg:req
++ jmp .Ldo_call_\@
++.Ldo_retpoline_jmp_\@:
++ RETPOLINE_JMP \reg
++.Ldo_call_\@:
++ call .Ldo_retpoline_jmp_\@
++.endm
++
++/*
++ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
++ * indirect jmp/call which may be susceptible to the Spectre variant 2
++ * attack.
++ */
++.macro JMP_NOSPEC reg:req
++#ifdef CONFIG_RETPOLINE
++ ANNOTATE_NOSPEC_ALTERNATIVE
++ ALTERNATIVE_2 __stringify(jmp *\reg), \
++ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
++ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
++#else
++ jmp *\reg
++#endif
++.endm
++
++.macro CALL_NOSPEC reg:req
++#ifdef CONFIG_RETPOLINE
++ ANNOTATE_NOSPEC_ALTERNATIVE
++ ALTERNATIVE_2 __stringify(call *\reg), \
++ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
++ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
++#else
++ call *\reg
++#endif
++.endm
++
++#else /* __ASSEMBLY__ */
++
++#define ANNOTATE_NOSPEC_ALTERNATIVE \
++ "999:\n\t" \
++ ".pushsection .discard.nospec\n\t" \
++ ".long 999b - .\n\t" \
++ ".popsection\n\t"
++
++#if defined(CONFIG_X86_64) && defined(RETPOLINE)
++
++/*
++ * Since the inline asm uses the %V modifier which is only in newer GCC,
++ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
++ */
++# define CALL_NOSPEC \
++ ANNOTATE_NOSPEC_ALTERNATIVE \
++ ALTERNATIVE( \
++ "call *%[thunk_target]\n", \
++ "call __x86_indirect_thunk_%V[thunk_target]\n", \
++ X86_FEATURE_RETPOLINE)
++# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
++
++#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
++/*
++ * For i386 we use the original ret-equivalent retpoline, because
++ * otherwise we'll run out of registers. We don't care about CET
++ * here, anyway.
++ */
++# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
++ " jmp 904f;\n" \
++ " .align 16\n" \
++ "901: call 903f;\n" \
++ "902: pause;\n" \
++ " jmp 902b;\n" \
++ " .align 16\n" \
++ "903: addl $4, %%esp;\n" \
++ " pushl %[thunk_target];\n" \
++ " ret;\n" \
++ " .align 16\n" \
++ "904: call 901b;\n", \
++ X86_FEATURE_RETPOLINE)
++
++# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
++#else /* No retpoline */
++# define CALL_NOSPEC "call *%[thunk_target]\n"
++# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
++#endif
++
++#endif /* __ASSEMBLY__ */
++#endif /* __NOSPEC_BRANCH_H__ */
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -889,6 +889,10 @@ static void __init early_identify_cpu(st
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
++#ifdef CONFIG_RETPOLINE
++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
++#endif
++
+ fpu__init_system(c);
+
+ #ifdef CONFIG_X86_32
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -25,6 +25,7 @@ lib-y += memcpy_$(BITS).o
+ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
++lib-$(CONFIG_RETPOLINE) += retpoline.o
+
+ obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
+
+--- /dev/null
++++ b/arch/x86/lib/retpoline.S
+@@ -0,0 +1,48 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#include <linux/stringify.h>
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/cpufeatures.h>
++#include <asm/alternative-asm.h>
++#include <asm/export.h>
++#include <asm/nospec-branch.h>
++
++.macro THUNK reg
++ .section .text.__x86.indirect_thunk.\reg
++
++ENTRY(__x86_indirect_thunk_\reg)
++ CFI_STARTPROC
++ JMP_NOSPEC %\reg
++ CFI_ENDPROC
++ENDPROC(__x86_indirect_thunk_\reg)
++.endm
++
++/*
++ * Despite being an assembler file we can't just use .irp here
++ * because __KSYM_DEPS__ only uses the C preprocessor and would
++ * only see one instance of "__x86_indirect_thunk_\reg" rather
++ * than one per register with the correct names. So we do it
++ * the simple and nasty way...
++ */
++#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
++#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
++
++GENERATE_THUNK(_ASM_AX)
++GENERATE_THUNK(_ASM_BX)
++GENERATE_THUNK(_ASM_CX)
++GENERATE_THUNK(_ASM_DX)
++GENERATE_THUNK(_ASM_SI)
++GENERATE_THUNK(_ASM_DI)
++GENERATE_THUNK(_ASM_BP)
++GENERATE_THUNK(_ASM_SP)
++#ifdef CONFIG_64BIT
++GENERATE_THUNK(r8)
++GENERATE_THUNK(r9)
++GENERATE_THUNK(r10)
++GENERATE_THUNK(r11)
++GENERATE_THUNK(r12)
++GENERATE_THUNK(r13)
++GENERATE_THUNK(r14)
++GENERATE_THUNK(r15)
++#endif
diff --git a/queue/x86-retpoline-add-lfence-to-the-retpoline-rsb-filling-rsb-macros.patch b/queue/x86-retpoline-add-lfence-to-the-retpoline-rsb-filling-rsb-macros.patch
new file mode 100644
index 0000000..0f624d5
--- /dev/null
+++ b/queue/x86-retpoline-add-lfence-to-the-retpoline-rsb-filling-rsb-macros.patch
@@ -0,0 +1,91 @@
+From 28d437d550e1e39f805d99f9f8ac399c778827b7 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Sat, 13 Jan 2018 17:27:30 -0600
+Subject: x86/retpoline: Add LFENCE to the retpoline/RSB filling RSB macros
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit 28d437d550e1e39f805d99f9f8ac399c778827b7 upstream.
+
+The PAUSE instruction is currently used in the retpoline and RSB filling
+macros as a speculation trap. The use of PAUSE was originally suggested
+because it showed a very, very small difference in the amount of
+cycles/time used to execute the retpoline as compared to LFENCE. On AMD,
+the PAUSE instruction is not a serializing instruction, so the pause/jmp
+loop will use excess power as it is speculated over waiting for return
+to mispredict to the correct target.
+
+The RSB filling macro is applicable to AMD, and, if software is unable to
+verify that LFENCE is serializing on AMD (possible when running under a
+hypervisor), the generic retpoline support will be used and, so, is also
+applicable to AMD. Keep the current usage of PAUSE for Intel, but add an
+LFENCE instruction to the speculation trap for AMD.
+
+The same sequence has been adopted by GCC for the GCC generated retpolines.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Borislav Petkov <bp@alien8.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Paul Turner <pjt@google.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Kees Cook <keescook@google.com>
+Link: https://lkml.kernel.org/r/20180113232730.31060.36287.stgit@tlendack-t1.amdoffice.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/nospec-branch.h | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -11,7 +11,7 @@
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+- * infinite 'pause; jmp' loop to capture speculative execution.
++ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+@@ -38,11 +38,13 @@
+ call 772f; \
+ 773: /* speculation trap */ \
+ pause; \
++ lfence; \
+ jmp 773b; \
+ 772: \
+ call 774f; \
+ 775: /* speculation trap */ \
+ pause; \
++ lfence; \
+ jmp 775b; \
+ 774: \
+ dec reg; \
+@@ -73,6 +75,7 @@
+ call .Ldo_rop_\@
+ .Lspec_trap_\@:
+ pause
++ lfence
+ jmp .Lspec_trap_\@
+ .Ldo_rop_\@:
+ mov \reg, (%_ASM_SP)
+@@ -165,6 +168,7 @@
+ " .align 16\n" \
+ "901: call 903f;\n" \
+ "902: pause;\n" \
++ " lfence;\n" \
+ " jmp 902b;\n" \
+ " .align 16\n" \
+ "903: addl $4, %%esp;\n" \
diff --git a/queue/x86-retpoline-avoid-retpolines-for-built-in-__init-functions.patch b/queue/x86-retpoline-avoid-retpolines-for-built-in-__init-functions.patch
new file mode 100644
index 0000000..c810eec
--- /dev/null
+++ b/queue/x86-retpoline-avoid-retpolines-for-built-in-__init-functions.patch
@@ -0,0 +1,50 @@
+From ee4aac311ae4f66980ec4e1accc83740909c130a Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 1 Feb 2018 11:27:20 +0000
+Subject: [PATCH] x86/retpoline: Avoid retpolines for built-in __init functions
+
+(cherry picked from commit 66f793099a636862a71c59d4a6ba91387b155e0c)
+
+There's no point in building init code with retpolines, since it runs before
+any potentially hostile userspace does. And before the retpoline is actually
+ALTERNATIVEd into place, for much of it.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: karahmed@amazon.de
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Link: https://lkml.kernel.org/r/1517484441-1420-2-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/include/linux/init.h b/include/linux/init.h
+index 6935d02474aa..fe1e5981c43b 100644
+--- a/include/linux/init.h
++++ b/include/linux/init.h
+@@ -4,6 +4,13 @@
+ #include <linux/compiler.h>
+ #include <linux/types.h>
+
++/* Built-in __init functions needn't be compiled with retpoline */
++#if defined(RETPOLINE) && !defined(MODULE)
++#define __noretpoline __attribute__((indirect_branch("keep")))
++#else
++#define __noretpoline
++#endif
++
+ /* These macros are used to mark some functions or
+ * initialized data (doesn't apply to uninitialized data)
+ * as `initialization' functions. The kernel can take this
+@@ -39,7 +46,7 @@
+
+ /* These are for everybody (although not all archs will actually
+ discard it in modules) */
+-#define __init __section(.init.text) __cold notrace
++#define __init __section(.init.text) __cold notrace __noretpoline
+ #define __initdata __section(.init.data)
+ #define __initconst __constsection(.init.rodata)
+ #define __exitdata __section(.exit.data)
+--
+2.15.0
+
diff --git a/queue/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch b/queue/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
new file mode 100644
index 0000000..c40ee9e
--- /dev/null
+++ b/queue/x86-retpoline-checksum32-convert-assembler-indirect-jumps.patch
@@ -0,0 +1,67 @@
+From 5096732f6f695001fa2d6f1335a2680b37912c69 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:32 +0000
+Subject: x86/retpoline/checksum32: Convert assembler indirect jumps
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 5096732f6f695001fa2d6f1335a2680b37912c69 upstream.
+
+Convert all indirect jumps in 32bit checksum assembler code to use
+non-speculative sequences when CONFIG_RETPOLINE is enabled.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-11-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/lib/checksum_32.S | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/lib/checksum_32.S
++++ b/arch/x86/lib/checksum_32.S
+@@ -29,7 +29,8 @@
+ #include <asm/errno.h>
+ #include <asm/asm.h>
+ #include <asm/export.h>
+-
++#include <asm/nospec-branch.h>
++
+ /*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+@@ -156,7 +157,7 @@ ENTRY(csum_partial)
+ negl %ebx
+ lea 45f(%ebx,%ebx,2), %ebx
+ testl %esi, %esi
+- jmp *%ebx
++ JMP_NOSPEC %ebx
+
+ # Handle 2-byte-aligned regions
+ 20: addw (%esi), %ax
+@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
+ andl $-32,%edx
+ lea 3f(%ebx,%ebx), %ebx
+ testl %esi, %esi
+- jmp *%ebx
++ JMP_NOSPEC %ebx
+ 1: addl $64,%esi
+ addl $64,%edi
+ SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/queue/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch b/queue/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
new file mode 100644
index 0000000..e9b7176
--- /dev/null
+++ b/queue/x86-retpoline-crypto-convert-crypto-assembler-indirect-jumps.patch
@@ -0,0 +1,125 @@
+From 9697fa39efd3fc3692f2949d4045f393ec58450b Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:27 +0000
+Subject: x86/retpoline/crypto: Convert crypto assembler indirect jumps
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 9697fa39efd3fc3692f2949d4045f393ec58450b upstream.
+
+Convert all indirect jumps in crypto assembler code to use non-speculative
+sequences when CONFIG_RETPOLINE is enabled.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-6-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/crypto/aesni-intel_asm.S | 5 +++--
+ arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 ++-
+ arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 ++-
+ arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 ++-
+ 4 files changed, 9 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -32,6 +32,7 @@
+ #include <linux/linkage.h>
+ #include <asm/inst.h>
+ #include <asm/frame.h>
++#include <asm/nospec-branch.h>
+
+ /*
+ * The following macros are used to move an (un)aligned 16 byte value to/from
+@@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8)
+ pxor INC, STATE4
+ movdqu IV, 0x30(OUTP)
+
+- call *%r11
++ CALL_NOSPEC %r11
+
+ movdqu 0x00(OUTP), INC
+ pxor INC, STATE1
+@@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8)
+ _aesni_gf128mul_x_ble()
+ movups IV, (IVP)
+
+- call *%r11
++ CALL_NOSPEC %r11
+
+ movdqu 0x40(OUTP), INC
+ pxor INC, STATE1
+--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
++++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+@@ -17,6 +17,7 @@
+
+ #include <linux/linkage.h>
+ #include <asm/frame.h>
++#include <asm/nospec-branch.h>
+
+ #define CAMELLIA_TABLE_BYTE_LEN 272
+
+@@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way:
+ vpxor 14 * 16(%rax), %xmm15, %xmm14;
+ vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+- call *%r9;
++ CALL_NOSPEC %r9;
+
+ addq $(16 * 16), %rsp;
+
+--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
++++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+@@ -12,6 +12,7 @@
+
+ #include <linux/linkage.h>
+ #include <asm/frame.h>
++#include <asm/nospec-branch.h>
+
+ #define CAMELLIA_TABLE_BYTE_LEN 272
+
+@@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way:
+ vpxor 14 * 32(%rax), %ymm15, %ymm14;
+ vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+- call *%r9;
++ CALL_NOSPEC %r9;
+
+ addq $(16 * 32), %rsp;
+
+--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
++++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+@@ -45,6 +45,7 @@
+
+ #include <asm/inst.h>
+ #include <linux/linkage.h>
++#include <asm/nospec-branch.h>
+
+ ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
+
+@@ -172,7 +173,7 @@ continue_block:
+ movzxw (bufp, %rax, 2), len
+ lea crc_array(%rip), bufp
+ lea (bufp, len, 1), bufp
+- jmp *bufp
++ JMP_NOSPEC bufp
+
+ ################################################################
+ ## 2a) PROCESS FULL BLOCKS:
diff --git a/queue/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch b/queue/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
new file mode 100644
index 0000000..ea6e7ec
--- /dev/null
+++ b/queue/x86-retpoline-entry-convert-entry-assembler-indirect-jumps.patch
@@ -0,0 +1,117 @@
+From 2641f08bb7fc63a636a2b18173221d7040a3512e Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:28 +0000
+Subject: x86/retpoline/entry: Convert entry assembler indirect jumps
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 2641f08bb7fc63a636a2b18173221d7040a3512e upstream.
+
+Convert indirect jumps in core 32/64bit entry assembler code to use
+non-speculative sequences when CONFIG_RETPOLINE is enabled.
+
+Don't use CALL_NOSPEC in entry_SYSCALL_64_fastpath because the return
+address after the 'call' instruction must be *precisely* at the
+.Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work,
+and the use of alternatives will mess that up unless we play horrid
+games to prepend with NOPs and make the variants the same length. It's
+not worth it; in the case where we ALTERNATIVE out the retpoline, the
+first instruction at __x86.indirect_thunk.rax is going to be a bare
+jmp *%rax anyway.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-7-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S | 5 +++--
+ arch/x86/entry/entry_64.S | 10 ++++++++--
+ 2 files changed, 11 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -45,6 +45,7 @@
+ #include <asm/asm.h>
+ #include <asm/smap.h>
+ #include <asm/export.h>
++#include <asm/nospec-branch.h>
+
+ .section .entry.text, "ax"
+
+@@ -260,7 +261,7 @@ ENTRY(ret_from_fork)
+
+ /* kernel thread */
+ 1: movl %edi, %eax
+- call *%ebx
++ CALL_NOSPEC %ebx
+ /*
+ * A kernel thread is allowed to return here after successfully
+ * calling do_execve(). Exit to userspace to complete the execve()
+@@ -1062,7 +1063,7 @@ error_code:
+ movl %ecx, %es
+ TRACE_IRQS_OFF
+ movl %esp, %eax # pt_regs pointer
+- call *%edi
++ CALL_NOSPEC %edi
+ jmp ret_from_exception
+ END(page_fault)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -37,6 +37,7 @@
+ #include <asm/pgtable_types.h>
+ #include <asm/export.h>
+ #include <asm/kaiser.h>
++#include <asm/nospec-branch.h>
+ #include <linux/err.h>
+
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+@@ -208,7 +209,12 @@ entry_SYSCALL_64_fastpath:
+ * It might end up jumping to the slow path. If it jumps, RAX
+ * and all argument registers are clobbered.
+ */
++#ifdef CONFIG_RETPOLINE
++ movq sys_call_table(, %rax, 8), %rax
++ call __x86_indirect_thunk_rax
++#else
+ call *sys_call_table(, %rax, 8)
++#endif
+ .Lentry_SYSCALL_64_after_fastpath_call:
+
+ movq %rax, RAX(%rsp)
+@@ -380,7 +386,7 @@ ENTRY(stub_ptregs_64)
+ jmp entry_SYSCALL64_slow_path
+
+ 1:
+- jmp *%rax /* Called from C */
++ JMP_NOSPEC %rax /* Called from C */
+ END(stub_ptregs_64)
+
+ .macro ptregs_stub func
+@@ -457,7 +463,7 @@ ENTRY(ret_from_fork)
+ 1:
+ /* kernel thread */
+ movq %r12, %rdi
+- call *%rbx
++ CALL_NOSPEC %rbx
+ /*
+ * A kernel thread is allowed to return here after successfully
+ * calling do_execve(). Exit to userspace to complete the execve()
diff --git a/queue/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch b/queue/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
new file mode 100644
index 0000000..1fb8cfb
--- /dev/null
+++ b/queue/x86-retpoline-fill-return-stack-buffer-on-vmexit.patch
@@ -0,0 +1,190 @@
+From 7762bf1d921deb32bebbf18e4f7f4a5b5ca16f84 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Fri, 12 Jan 2018 11:11:27 +0000
+Subject: [PATCH] x86/retpoline: Fill return stack buffer on vmexit
+
+commit 117cc7a908c83697b0b737d15ae1eb5943afe35b upstream.
+
+In accordance with the Intel and AMD documentation, we need to overwrite
+all entries in the RSB on exiting a guest, to prevent malicious branch
+target predictions from affecting the host kernel. This is needed both
+for retpoline and for IBRS.
+
+[ak: numbers again for the RSB stuffing labels]
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index ea034fa6e261..402a11c803c3 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -7,6 +7,48 @@
+ #include <asm/alternative-asm.h>
+ #include <asm/cpufeatures.h>
+
++/*
++ * Fill the CPU return stack buffer.
++ *
++ * Each entry in the RSB, if used for a speculative 'ret', contains an
++ * infinite 'pause; jmp' loop to capture speculative execution.
++ *
++ * This is required in various cases for retpoline and IBRS-based
++ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
++ * eliminate potentially bogus entries from the RSB, and sometimes
++ * purely to ensure that it doesn't get empty, which on some CPUs would
++ * allow predictions from other (unwanted!) sources to be used.
++ *
++ * We define a CPP macro such that it can be used from both .S files and
++ * inline assembly. It's possible to do a .macro and then include that
++ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
++ */
++
++#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
++#define RSB_FILL_LOOPS 16 /* To avoid underflow */
++
++/*
++ * Google experimented with loop-unrolling and this turned out to be
++ * the optimal version — two calls, each with their own speculation
++ * trap should their return address end up getting used, in a loop.
++ */
++#define __FILL_RETURN_BUFFER(reg, nr, sp) \
++ mov $(nr/2), reg; \
++771: \
++ call 772f; \
++773: /* speculation trap */ \
++ pause; \
++ jmp 773b; \
++772: \
++ call 774f; \
++775: /* speculation trap */ \
++ pause; \
++ jmp 775b; \
++774: \
++ dec reg; \
++ jnz 771b; \
++ add $(BITS_PER_LONG/8) * nr, sp;
++
+ #ifdef __ASSEMBLY__
+
+ /*
+@@ -74,6 +116,20 @@
+ #else
+ call *\reg
+ #endif
++.endm
++
++ /*
++ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
++ * monstrosity above, manually.
++ */
++.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
++#ifdef CONFIG_RETPOLINE
++ ANNOTATE_NOSPEC_ALTERNATIVE
++ ALTERNATIVE "jmp .Lskip_rsb_\@", \
++ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
++ \ftr
++.Lskip_rsb_\@:
++#endif
+ .endm
+
+ #else /* __ASSEMBLY__ */
+@@ -119,7 +175,7 @@
+ X86_FEATURE_RETPOLINE)
+
+ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+-#else /* No retpoline */
++#else /* No retpoline for C / inline asm */
+ # define CALL_NOSPEC "call *%[thunk_target]\n"
+ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+ #endif
+@@ -134,5 +190,25 @@ enum spectre_v2_mitigation {
+ SPECTRE_V2_IBRS,
+ };
+
++/*
++ * On VMEXIT we must ensure that no RSB predictions learned in the guest
++ * can be followed in the host, by overwriting the RSB completely. Both
++ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
++ * CPUs with IBRS_ATT *might* it be avoided.
++ */
++static inline void vmexit_fill_RSB(void)
++{
++#ifdef CONFIG_RETPOLINE
++ unsigned long loops = RSB_CLEAR_LOOPS / 2;
++
++ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
++ ALTERNATIVE("jmp 910f",
++ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
++ X86_FEATURE_RETPOLINE)
++ "910:"
++ : "=&r" (loops), ASM_CALL_CONSTRAINT
++ : "r" (loops) : "memory" );
++#endif
++}
+ #endif /* __ASSEMBLY__ */
+ #endif /* __NOSPEC_BRANCH_H__ */
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index af523d84d102..c94a5fdfb26c 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -41,6 +41,7 @@
+ #include <asm/desc.h>
+ #include <asm/debugreg.h>
+ #include <asm/kvm_para.h>
++#include <asm/nospec-branch.h>
+
+ #include <asm/virtext.h>
+ #include "trace.h"
+@@ -4550,6 +4551,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+ );
+
++ /* Eliminate branch target predictions from guest mode */
++ vmexit_fill_RSB();
++
+ #ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+ #else
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 9bef32504146..6890ba12205d 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -48,6 +48,7 @@
+ #include <asm/kexec.h>
+ #include <asm/apic.h>
+ #include <asm/irq_remapping.h>
++#include <asm/nospec-branch.h>
+
+ #include "trace.h"
+ #include "pmu.h"
+@@ -8951,6 +8952,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ #endif
+ );
+
++ /* Eliminate branch target predictions from guest mode */
++ vmexit_fill_RSB();
++
+ /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
+ if (debugctlmsr)
+ update_debugctlmsr(debugctlmsr);
+--
+2.15.0
+
diff --git a/queue/x86-retpoline-fill-rsb-on-context-switch-for-affected-cpus.patch b/queue/x86-retpoline-fill-rsb-on-context-switch-for-affected-cpus.patch
new file mode 100644
index 0000000..5f0b6b1
--- /dev/null
+++ b/queue/x86-retpoline-fill-rsb-on-context-switch-for-affected-cpus.patch
@@ -0,0 +1,170 @@
+From c995efd5a740d9cbafbf58bde4973e8b50b4d761 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Fri, 12 Jan 2018 17:49:25 +0000
+Subject: x86/retpoline: Fill RSB on context switch for affected CPUs
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.
+
+On context switch from a shallow call stack to a deeper one, as the CPU
+does 'ret' up the deeper side it may encounter RSB entries (predictions for
+where the 'ret' goes to) which were populated in userspace.
+
+This is problematic if neither SMEP nor KPTI (the latter of which marks
+userspace pages as NX for the kernel) are active, as malicious code in
+userspace may then be executed speculatively.
+
+Overwrite the CPU's return prediction stack with calls which are predicted
+to return to an infinite loop, to "capture" speculation if this
+happens. This is required both for retpoline, and also in conjunction with
+IBRS for !SMEP && !KPTI.
+
+On Skylake+ the problem is slightly different, and an *underflow* of the
+RSB may cause errant branch predictions to occur. So there it's not so much
+overwrite, as *filling* the RSB to attempt to prevent it getting
+empty. This is only a partial solution for Skylake+ since there are many
+other conditions which may result in the RSB becoming empty. The full
+solution on Skylake+ is to use IBRS, which will prevent the problem even
+when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
+required on context switch.
+
+[ tglx: Added missing vendor check and slighty massaged comments and
+ changelog ]
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S | 11 +++++++++++
+ arch/x86/entry/entry_64.S | 11 +++++++++++
+ arch/x86/include/asm/cpufeatures.h | 1 +
+ arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++++++++
+ 4 files changed, 59 insertions(+)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -229,6 +229,17 @@ ENTRY(__switch_to_asm)
+ movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+ #endif
+
++#ifdef CONFIG_RETPOLINE
++ /*
++ * When switching from a shallower to a deeper call stack
++ * the RSB may either underflow or use entries populated
++ * with userspace addresses. On CPUs where those concerns
++ * exist, overwrite the RSB with entries which capture
++ * speculative execution to prevent attack.
++ */
++ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
++#endif
++
+ /* restore callee-saved registers */
+ popl %esi
+ popl %edi
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -427,6 +427,17 @@ ENTRY(__switch_to_asm)
+ movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+ #endif
+
++#ifdef CONFIG_RETPOLINE
++ /*
++ * When switching from a shallower to a deeper call stack
++ * the RSB may either underflow or use entries populated
++ * with userspace addresses. On CPUs where those concerns
++ * exist, overwrite the RSB with entries which capture
++ * speculative execution to prevent attack.
++ */
++ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
++#endif
++
+ /* restore callee-saved registers */
+ popq %r15
+ popq %r14
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -200,6 +200,7 @@
+ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
++#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -22,6 +22,7 @@
+ #include <asm/alternative.h>
+ #include <asm/pgtable.h>
+ #include <asm/cacheflush.h>
++#include <asm/intel-family.h>
+
+ static void __init spectre_v2_select_mitigation(void);
+
+@@ -154,6 +155,23 @@ disable:
+ return SPECTRE_V2_CMD_NONE;
+ }
+
++/* Check for Skylake-like CPUs (for RSB handling) */
++static bool __init is_skylake_era(void)
++{
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
++ boot_cpu_data.x86 == 6) {
++ switch (boot_cpu_data.x86_model) {
++ case INTEL_FAM6_SKYLAKE_MOBILE:
++ case INTEL_FAM6_SKYLAKE_DESKTOP:
++ case INTEL_FAM6_SKYLAKE_X:
++ case INTEL_FAM6_KABYLAKE_MOBILE:
++ case INTEL_FAM6_KABYLAKE_DESKTOP:
++ return true;
++ }
++ }
++ return false;
++}
++
+ static void __init spectre_v2_select_mitigation(void)
+ {
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+@@ -212,6 +230,24 @@ retpoline_auto:
+
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
++
++ /*
++ * If neither SMEP or KPTI are available, there is a risk of
++ * hitting userspace addresses in the RSB after a context switch
++ * from a shallow call stack to a deeper one. To prevent this fill
++ * the entire RSB, even when using IBRS.
++ *
++ * Skylake era CPUs have a separate issue with *underflow* of the
++ * RSB, when they will predict 'ret' targets from the generic BTB.
++ * The proper mitigation for this is IBRS. If IBRS is not supported
++ * or deactivated in favour of retpolines the RSB fill on context
++ * switch is required.
++ */
++ if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
++ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
++ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
++ pr_info("Filling RSB on context switch\n");
++ }
+ }
+
+ #undef pr_fmt
diff --git a/queue/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch b/queue/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
new file mode 100644
index 0000000..17aeb29
--- /dev/null
+++ b/queue/x86-retpoline-ftrace-convert-ftrace-assembler-indirect-jumps.patch
@@ -0,0 +1,88 @@
+From 9351803bd803cdbeb9b5a7850b7b6f464806e3db Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:29 +0000
+Subject: x86/retpoline/ftrace: Convert ftrace assembler indirect jumps
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 9351803bd803cdbeb9b5a7850b7b6f464806e3db upstream.
+
+Convert all indirect jumps in ftrace assembler code to use non-speculative
+sequences when CONFIG_RETPOLINE is enabled.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-8-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_32.S | 5 +++--
+ arch/x86/kernel/mcount_64.S | 7 ++++---
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -985,7 +985,8 @@ trace:
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+- call *ftrace_trace_function
++ movl ftrace_trace_function, %ecx
++ CALL_NOSPEC %ecx
+
+ popl %edx
+ popl %ecx
+@@ -1021,7 +1022,7 @@ return_to_handler:
+ movl %eax, %ecx
+ popl %edx
+ popl %eax
+- jmp *%ecx
++ JMP_NOSPEC %ecx
+ #endif
+
+ #ifdef CONFIG_TRACING
+--- a/arch/x86/kernel/mcount_64.S
++++ b/arch/x86/kernel/mcount_64.S
+@@ -8,7 +8,7 @@
+ #include <asm/ptrace.h>
+ #include <asm/ftrace.h>
+ #include <asm/export.h>
+-
++#include <asm/nospec-branch.h>
+
+ .code64
+ .section .entry.text, "ax"
+@@ -290,8 +290,9 @@ trace:
+ * ip and parent ip are used and the list function is called when
+ * function tracing is enabled.
+ */
+- call *ftrace_trace_function
+
++ movq ftrace_trace_function, %r8
++ CALL_NOSPEC %r8
+ restore_mcount_regs
+
+ jmp fgraph_trace
+@@ -334,5 +335,5 @@ GLOBAL(return_to_handler)
+ movq 8(%rsp), %rdx
+ movq (%rsp), %rax
+ addq $24, %rsp
+- jmp *%rdi
++ JMP_NOSPEC %rdi
+ #endif
diff --git a/queue/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch b/queue/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
new file mode 100644
index 0000000..4114df9
--- /dev/null
+++ b/queue/x86-retpoline-hyperv-convert-assembler-indirect-jumps.patch
@@ -0,0 +1,76 @@
+From e70e5892b28c18f517f29ab6e83bd57705104b31 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:30 +0000
+Subject: x86/retpoline/hyperv: Convert assembler indirect jumps
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit e70e5892b28c18f517f29ab6e83bd57705104b31 upstream.
+
+Convert all indirect jumps in hyperv inline asm code to use non-speculative
+sequences when CONFIG_RETPOLINE is enabled.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-9-git-send-email-dwmw@amazon.co.uk
+[ backport to 4.9, hopefully correct, not tested... - gregkh ]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hv/hv.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/hv/hv.c
++++ b/drivers/hv/hv.c
+@@ -31,6 +31,7 @@
+ #include <linux/clockchips.h>
+ #include <asm/hyperv.h>
+ #include <asm/mshyperv.h>
++#include <asm/nospec-branch.h>
+ #include "hyperv_vmbus.h"
+
+ /* The one and only */
+@@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *i
+ return (u64)ULLONG_MAX;
+
+ __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
+- __asm__ __volatile__("call *%3" : "=a" (hv_status) :
++ __asm__ __volatile__(CALL_NOSPEC :
++ "=a" (hv_status) :
+ "c" (control), "d" (input_address),
+- "m" (hypercall_page));
++ THUNK_TARGET(hypercall_page));
+
+ return hv_status;
+
+@@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *i
+ if (!hypercall_page)
+ return (u64)ULLONG_MAX;
+
+- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
++ __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi),
+ "=a"(hv_status_lo) : "d" (control_hi),
+ "a" (control_lo), "b" (input_address_hi),
+ "c" (input_address_lo), "D"(output_address_hi),
+- "S"(output_address_lo), "m" (hypercall_page));
++ "S"(output_address_lo),
++ THUNK_TARGET(hypercall_page));
+
+ return hv_status_lo | ((u64)hv_status_hi << 32);
+ #endif /* !x86_64 */
diff --git a/queue/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch b/queue/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
new file mode 100644
index 0000000..029afab
--- /dev/null
+++ b/queue/x86-retpoline-irq32-convert-assembler-indirect-jumps.patch
@@ -0,0 +1,74 @@
+From 7614e913db1f40fff819b36216484dc3808995d4 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Thu, 11 Jan 2018 21:46:33 +0000
+Subject: x86/retpoline/irq32: Convert assembler indirect jumps
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 7614e913db1f40fff819b36216484dc3808995d4 upstream.
+
+Convert all indirect jumps in 32bit irq inline asm code to use non
+speculative sequences.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-12-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/irq_32.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/irq_32.c
++++ b/arch/x86/kernel/irq_32.c
+@@ -19,6 +19,7 @@
+ #include <linux/mm.h>
+
+ #include <asm/apic.h>
++#include <asm/nospec-branch.h>
+
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+
+@@ -54,11 +55,11 @@ DEFINE_PER_CPU(struct irq_stack *, softi
+ static void call_on_stack(void *func, void *stack)
+ {
+ asm volatile("xchgl %%ebx,%%esp \n"
+- "call *%%edi \n"
++ CALL_NOSPEC
+ "movl %%ebx,%%esp \n"
+ : "=b" (stack)
+ : "0" (stack),
+- "D"(func)
++ [thunk_target] "D"(func)
+ : "memory", "cc", "edx", "ecx", "eax");
+ }
+
+@@ -94,11 +95,11 @@ static inline int execute_on_irq_stack(i
+ call_on_stack(print_stack_overflow, isp);
+
+ asm volatile("xchgl %%ebx,%%esp \n"
+- "call *%%edi \n"
++ CALL_NOSPEC
+ "movl %%ebx,%%esp \n"
+ : "=a" (arg1), "=b" (isp)
+ : "0" (desc), "1" (isp),
+- "D" (desc->handle_irq)
++ [thunk_target] "D" (desc->handle_irq)
+ : "memory", "cc", "ecx");
+ return 1;
+ }
diff --git a/queue/x86-retpoline-optimize-inline-assembler-for-vmexit_fill_rsb.patch b/queue/x86-retpoline-optimize-inline-assembler-for-vmexit_fill_rsb.patch
new file mode 100644
index 0000000..963e73a
--- /dev/null
+++ b/queue/x86-retpoline-optimize-inline-assembler-for-vmexit_fill_rsb.patch
@@ -0,0 +1,58 @@
+From 3f7d875566d8e79c5e0b2c9a413e91b2c29e0854 Mon Sep 17 00:00:00 2001
+From: Andi Kleen <ak@linux.intel.com>
+Date: Wed, 17 Jan 2018 14:53:28 -0800
+Subject: x86/retpoline: Optimize inline assembler for vmexit_fill_RSB
+
+From: Andi Kleen <ak@linux.intel.com>
+
+commit 3f7d875566d8e79c5e0b2c9a413e91b2c29e0854 upstream.
+
+The generated assembler for the C fill RSB inline asm operations has
+several issues:
+
+- The C code sets up the loop register, which is then immediately
+ overwritten in __FILL_RETURN_BUFFER with the same value again.
+
+- The C code also passes in the iteration count in another register, which
+ is not used at all.
+
+Remove these two unnecessary operations. Just rely on the single constant
+passed to the macro for the iterations.
+
+Signed-off-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: dave.hansen@intel.com
+Cc: gregkh@linuxfoundation.org
+Cc: torvalds@linux-foundation.org
+Cc: arjan@linux.intel.com
+Link: https://lkml.kernel.org/r/20180117225328.15414-1-andi@firstfloor.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/nospec-branch.h | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -206,16 +206,17 @@ extern char __indirect_thunk_end[];
+ static inline void vmexit_fill_RSB(void)
+ {
+ #ifdef CONFIG_RETPOLINE
+- unsigned long loops = RSB_CLEAR_LOOPS / 2;
++ unsigned long loops;
+
+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+- : "=&r" (loops), ASM_CALL_CONSTRAINT
+- : "r" (loops) : "memory" );
++ : "=r" (loops), ASM_CALL_CONSTRAINT
++ : : "memory" );
+ #endif
+ }
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* __NOSPEC_BRANCH_H__ */
diff --git a/queue/x86-retpoline-remove-compile-time-warning.patch b/queue/x86-retpoline-remove-compile-time-warning.patch
new file mode 100644
index 0000000..24dd1c7
--- /dev/null
+++ b/queue/x86-retpoline-remove-compile-time-warning.patch
@@ -0,0 +1,60 @@
+From b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 14 Jan 2018 22:13:29 +0100
+Subject: x86/retpoline: Remove compile time warning
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 upstream.
+
+Remove the compile time warning when CONFIG_RETPOLINE=y and the compiler
+does not have retpoline support. Linus rationale for this is:
+
+ It's wrong because it will just make people turn off RETPOLINE, and the
+ asm updates - and return stack clearing - that are independent of the
+ compiler are likely the most important parts because they are likely the
+ ones easiest to target.
+
+ And it's annoying because most people won't be able to do anything about
+ it. The number of people building their own compiler? Very small. So if
+ their distro hasn't got a compiler yet (and pretty much nobody does), the
+ warning is just annoying crap.
+
+ It is already properly reported as part of the sysfs interface. The
+ compile-time warning only encourages bad things.
+
+Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support")
+Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Link: https://lkml.kernel.org/r/CA+55aFzWgquv4i6Mab6bASqYXg3ErV3XDFEYf=GEcCDQg5uAtw@mail.gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/Makefile | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -187,8 +187,6 @@ ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+- else
+- $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.)
+ endif
+ endif
+
diff --git a/queue/x86-retpoline-remove-the-esp-rsp-thunk.patch b/queue/x86-retpoline-remove-the-esp-rsp-thunk.patch
new file mode 100644
index 0000000..f0d2444
--- /dev/null
+++ b/queue/x86-retpoline-remove-the-esp-rsp-thunk.patch
@@ -0,0 +1,58 @@
+From foo@baz Wed Feb 7 19:38:23 CST 2018
+From: Waiman Long <longman@redhat.com>
+Date: Mon, 22 Jan 2018 17:09:34 -0500
+Subject: x86/retpoline: Remove the esp/rsp thunk
+
+From: Waiman Long <longman@redhat.com>
+
+(cherry picked from commit 1df37383a8aeabb9b418698f0bcdffea01f4b1b2)
+
+It doesn't make sense to have an indirect call thunk with esp/rsp as
+retpoline code won't work correctly with the stack pointer register.
+Removing it will help compiler writers to catch error in case such
+a thunk call is emitted incorrectly.
+
+Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support")
+Suggested-by: Jeff Law <law@redhat.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1516658974-27852-1-git-send-email-longman@redhat.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/asm-prototypes.h | 1 -
+ arch/x86/lib/retpoline.S | 1 -
+ 2 files changed, 2 deletions(-)
+
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -37,5 +37,4 @@ INDIRECT_THUNK(dx)
+ INDIRECT_THUNK(si)
+ INDIRECT_THUNK(di)
+ INDIRECT_THUNK(bp)
+-INDIRECT_THUNK(sp)
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -36,7 +36,6 @@ GENERATE_THUNK(_ASM_DX)
+ GENERATE_THUNK(_ASM_SI)
+ GENERATE_THUNK(_ASM_DI)
+ GENERATE_THUNK(_ASM_BP)
+-GENERATE_THUNK(_ASM_SP)
+ #ifdef CONFIG_64BIT
+ GENERATE_THUNK(r8)
+ GENERATE_THUNK(r9)
diff --git a/queue/x86-retpoline-simplify-vmexit_fill_rsb.patch b/queue/x86-retpoline-simplify-vmexit_fill_rsb.patch
new file mode 100644
index 0000000..f524811
--- /dev/null
+++ b/queue/x86-retpoline-simplify-vmexit_fill_rsb.patch
@@ -0,0 +1,248 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: Borislav Petkov <bp@alien8.de>
+Date: Sat, 27 Jan 2018 16:24:33 +0000
+Subject: x86/retpoline: Simplify vmexit_fill_RSB()
+
+From: Borislav Petkov <bp@alien8.de>
+
+(cherry picked from commit 1dde7415e99933bb7293d6b2843752cbdb43ec11)
+
+Simplify it to call an asm-function instead of pasting 41 insn bytes at
+every call site. Also, add alignment to the macro as suggested here:
+
+ https://support.google.com/faqs/answer/7625886
+
+[dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler]
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: ak@linux.intel.com
+Cc: dave.hansen@intel.com
+Cc: karahmed@amazon.de
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1517070274-12128-3-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_32.S | 3 -
+ arch/x86/entry/entry_64.S | 3 -
+ arch/x86/include/asm/asm-prototypes.h | 3 +
+ arch/x86/include/asm/nospec-branch.h | 70 +++-------------------------------
+ arch/x86/lib/Makefile | 1
+ arch/x86/lib/retpoline.S | 56 +++++++++++++++++++++++++++
+ 6 files changed, 71 insertions(+), 65 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -237,7 +237,8 @@ ENTRY(__switch_to_asm)
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+- FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
++ /* Clobbers %ebx */
++ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ #endif
+
+ /* restore callee-saved registers */
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -435,7 +435,8 @@ ENTRY(__switch_to_asm)
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+- FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
++ /* Clobbers %rbx */
++ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ #endif
+
+ /* restore callee-saved registers */
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -37,4 +37,7 @@ INDIRECT_THUNK(dx)
+ INDIRECT_THUNK(si)
+ INDIRECT_THUNK(di)
+ INDIRECT_THUNK(bp)
++asmlinkage void __fill_rsb(void);
++asmlinkage void __clear_rsb(void);
++
+ #endif /* CONFIG_RETPOLINE */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -7,50 +7,6 @@
+ #include <asm/alternative-asm.h>
+ #include <asm/cpufeatures.h>
+
+-/*
+- * Fill the CPU return stack buffer.
+- *
+- * Each entry in the RSB, if used for a speculative 'ret', contains an
+- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+- *
+- * This is required in various cases for retpoline and IBRS-based
+- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+- * eliminate potentially bogus entries from the RSB, and sometimes
+- * purely to ensure that it doesn't get empty, which on some CPUs would
+- * allow predictions from other (unwanted!) sources to be used.
+- *
+- * We define a CPP macro such that it can be used from both .S files and
+- * inline assembly. It's possible to do a .macro and then include that
+- * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+- */
+-
+-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+-
+-/*
+- * Google experimented with loop-unrolling and this turned out to be
+- * the optimal version — two calls, each with their own speculation
+- * trap should their return address end up getting used, in a loop.
+- */
+-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+- mov $(nr/2), reg; \
+-771: \
+- call 772f; \
+-773: /* speculation trap */ \
+- pause; \
+- lfence; \
+- jmp 773b; \
+-772: \
+- call 774f; \
+-775: /* speculation trap */ \
+- pause; \
+- lfence; \
+- jmp 775b; \
+-774: \
+- dec reg; \
+- jnz 771b; \
+- add $(BITS_PER_LONG/8) * nr, sp;
+-
+ #ifdef __ASSEMBLY__
+
+ /*
+@@ -121,17 +77,10 @@
+ #endif
+ .endm
+
+- /*
+- * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+- * monstrosity above, manually.
+- */
+-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
++/* This clobbers the BX register */
++.macro FILL_RETURN_BUFFER nr:req ftr:req
+ #ifdef CONFIG_RETPOLINE
+- ANNOTATE_NOSPEC_ALTERNATIVE
+- ALTERNATIVE "jmp .Lskip_rsb_\@", \
+- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+- \ftr
+-.Lskip_rsb_\@:
++ ALTERNATIVE "", "call __clear_rsb", \ftr
+ #endif
+ .endm
+
+@@ -206,15 +155,10 @@ extern char __indirect_thunk_end[];
+ static inline void vmexit_fill_RSB(void)
+ {
+ #ifdef CONFIG_RETPOLINE
+- unsigned long loops;
+-
+- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+- ALTERNATIVE("jmp 910f",
+- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+- X86_FEATURE_RETPOLINE)
+- "910:"
+- : "=r" (loops), ASM_CALL_CONSTRAINT
+- : : "memory" );
++ alternative_input("",
++ "call __fill_rsb",
++ X86_FEATURE_RETPOLINE,
++ ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+ #endif
+ }
+
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -26,6 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) +=
+ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+ lib-$(CONFIG_RETPOLINE) += retpoline.o
++OBJECT_FILES_NON_STANDARD_retpoline.o :=y
+
+ obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
+
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -7,6 +7,7 @@
+ #include <asm/alternative-asm.h>
+ #include <asm/export.h>
+ #include <asm/nospec-branch.h>
++#include <asm/bitsperlong.h>
+
+ .macro THUNK reg
+ .section .text.__x86.indirect_thunk
+@@ -46,3 +47,58 @@ GENERATE_THUNK(r13)
+ GENERATE_THUNK(r14)
+ GENERATE_THUNK(r15)
+ #endif
++
++/*
++ * Fill the CPU return stack buffer.
++ *
++ * Each entry in the RSB, if used for a speculative 'ret', contains an
++ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
++ *
++ * This is required in various cases for retpoline and IBRS-based
++ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
++ * eliminate potentially bogus entries from the RSB, and sometimes
++ * purely to ensure that it doesn't get empty, which on some CPUs would
++ * allow predictions from other (unwanted!) sources to be used.
++ *
++ * Google experimented with loop-unrolling and this turned out to be
++ * the optimal version - two calls, each with their own speculation
++ * trap should their return address end up getting used, in a loop.
++ */
++.macro STUFF_RSB nr:req sp:req
++ mov $(\nr / 2), %_ASM_BX
++ .align 16
++771:
++ call 772f
++773: /* speculation trap */
++ pause
++ lfence
++ jmp 773b
++ .align 16
++772:
++ call 774f
++775: /* speculation trap */
++ pause
++ lfence
++ jmp 775b
++ .align 16
++774:
++ dec %_ASM_BX
++ jnz 771b
++ add $((BITS_PER_LONG/8) * \nr), \sp
++.endm
++
++#define RSB_FILL_LOOPS 16 /* To avoid underflow */
++
++ENTRY(__fill_rsb)
++ STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
++ ret
++END(__fill_rsb)
++EXPORT_SYMBOL_GPL(__fill_rsb)
++
++#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
++
++ENTRY(__clear_rsb)
++ STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
++ ret
++END(__clear_rsb)
++EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/queue/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch b/queue/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
new file mode 100644
index 0000000..0893be0
--- /dev/null
+++ b/queue/x86-retpoline-xen-convert-xen-hypercall-indirect-jumps.patch
@@ -0,0 +1,60 @@
+From 4c7f165396cddc6e5fc484743e259fc7f92cffcc Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:31 +0000
+Subject: [PATCH] x86/retpoline/xen: Convert Xen hypercall indirect jumps
+
+commit ea08816d5b185ab3d09e95e393f265af54560350 upstream.
+
+Convert indirect call in Xen hypercall to use non-speculative sequence,
+when CONFIG_RETPOLINE is enabled.
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Arjan van de Ven <arjan@linux.intel.com>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-10-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
+index a12a047184ee..8b1f91f59f0a 100644
+--- a/arch/x86/include/asm/xen/hypercall.h
++++ b/arch/x86/include/asm/xen/hypercall.h
+@@ -43,6 +43,7 @@
+
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
++#include <asm/nospec-branch.h>
+
+ #include <xen/interface/xen.h>
+ #include <xen/interface/sched.h>
+@@ -214,9 +215,9 @@ privcmd_call(unsigned call,
+ __HYPERCALL_DECLS;
+ __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
+
+- asm volatile("call *%[call]"
++ asm volatile(CALL_NOSPEC
+ : __HYPERCALL_5PARAM
+- : [call] "a" (&hypercall_page[call])
++ : [thunk_target] "a" (&hypercall_page[call])
+ : __HYPERCALL_CLOBBER5);
+
+ return (long)__res;
+--
+2.15.0
+
diff --git a/queue/x86-smpboot-remove-stale-tlb-flush-invocations.patch b/queue/x86-smpboot-remove-stale-tlb-flush-invocations.patch
new file mode 100644
index 0000000..cdf81a5
--- /dev/null
+++ b/queue/x86-smpboot-remove-stale-tlb-flush-invocations.patch
@@ -0,0 +1,65 @@
+From 322f8b8b340c824aef891342b0f5795d15e11562 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sat, 30 Dec 2017 22:13:53 +0100
+Subject: x86/smpboot: Remove stale TLB flush invocations
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 322f8b8b340c824aef891342b0f5795d15e11562 upstream.
+
+smpboot_setup_warm_reset_vector() and smpboot_restore_warm_reset_vector()
+invoke local_flush_tlb() for no obvious reason.
+
+Digging in history revealed that the original code in the 2.1 era added
+those because the code manipulated a swapper_pg_dir pagetable entry. The
+pagetable manipulation was removed long ago in the 2.3 timeframe, but the
+TLB flush invocations stayed around forever.
+
+Remove them along with the pointless pr_debug()s which come from the same 2.1
+change.
+
+Reported-by: Dominik Brodowski <linux@dominikbrodowski.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20171230211829.586548655@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/smpboot.c | 9 ---------
+ 1 file changed, 9 deletions(-)
+
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -115,14 +115,10 @@ static inline void smpboot_setup_warm_re
+ spin_lock_irqsave(&rtc_lock, flags);
+ CMOS_WRITE(0xa, 0xf);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+- local_flush_tlb();
+- pr_debug("1.\n");
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+ start_eip >> 4;
+- pr_debug("2.\n");
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+ start_eip & 0xf;
+- pr_debug("3.\n");
+ }
+
+ static inline void smpboot_restore_warm_reset_vector(void)
+@@ -130,11 +126,6 @@ static inline void smpboot_restore_warm_
+ unsigned long flags;
+
+ /*
+- * Install writable page 0 entry to set BIOS data area.
+- */
+- local_flush_tlb();
+-
+- /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
diff --git a/queue/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch b/queue/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
new file mode 100644
index 0000000..475414c
--- /dev/null
+++ b/queue/x86-spectre-add-boot-time-option-to-select-spectre-v2-mitigation.patch
@@ -0,0 +1,317 @@
+From da285121560e769cc31797bba6422eea71d473e0 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 11 Jan 2018 21:46:26 +0000
+Subject: x86/spectre: Add boot time option to select Spectre v2 mitigation
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit da285121560e769cc31797bba6422eea71d473e0 upstream.
+
+Add a spectre_v2= option to select the mitigation used for the indirect
+branch speculation vulnerability.
+
+Currently, the only option available is retpoline, in its various forms.
+This will be expanded to cover the new IBRS/IBPB microcode features.
+
+The RETPOLINE_AMD feature relies on a serializing LFENCE for speculation
+control. For AMD hardware, only set RETPOLINE_AMD if LFENCE is a
+serializing instruction, which is indicated by the LFENCE_RDTSC feature.
+
+[ tglx: Folded back the LFENCE/AMD fixes and reworked it so IBRS
+ integration becomes simple ]
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: thomas.lendacky@amd.com
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Kees Cook <keescook@google.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linux-foundation.org>
+Cc: Paul Turner <pjt@google.com>
+Link: https://lkml.kernel.org/r/1515707194-20531-5-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt | 28 ++++++
+ arch/x86/include/asm/nospec-branch.h | 10 ++
+ arch/x86/kernel/cpu/bugs.c | 158 ++++++++++++++++++++++++++++++++++-
+ arch/x86/kernel/cpu/common.c | 4
+ 4 files changed, 195 insertions(+), 5 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2691,6 +2691,11 @@ bytes respectively. Such letter suffixes
+ nosmt [KNL,S390] Disable symmetric multithreading (SMT).
+ Equivalent to smt=1.
+
++ nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
++ (indirect branch prediction) vulnerability. System may
++ allow data leaks with this option, which is equivalent
++ to spectre_v2=off.
++
+ noxsave [BUGS=X86] Disables x86 extended register state save
+ and restore using xsave. The kernel will fallback to
+ enabling legacy floating-point and sse state.
+@@ -3944,6 +3949,29 @@ bytes respectively. Such letter suffixes
+ sonypi.*= [HW] Sony Programmable I/O Control Device driver
+ See Documentation/laptops/sonypi.txt
+
++ spectre_v2= [X86] Control mitigation of Spectre variant 2
++ (indirect branch speculation) vulnerability.
++
++ on - unconditionally enable
++ off - unconditionally disable
++ auto - kernel detects whether your CPU model is
++ vulnerable
++
++ Selecting 'on' will, and 'auto' may, choose a
++ mitigation method at run time according to the
++ CPU, the available microcode, the setting of the
++ CONFIG_RETPOLINE configuration option, and the
++ compiler with which the kernel was built.
++
++ Specific mitigations can also be selected manually:
++
++ retpoline - replace indirect branches
++ retpoline,generic - google's original retpoline
++ retpoline,amd - AMD-specific minimal thunk
++
++ Not specifying this option is equivalent to
++ spectre_v2=auto.
++
+ spia_io_base= [HW,MTD]
+ spia_fio_base=
+ spia_pedr=
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -124,5 +124,15 @@
+ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+ #endif
+
++/* The Spectre V2 mitigation variants */
++enum spectre_v2_mitigation {
++ SPECTRE_V2_NONE,
++ SPECTRE_V2_RETPOLINE_MINIMAL,
++ SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
++ SPECTRE_V2_RETPOLINE_GENERIC,
++ SPECTRE_V2_RETPOLINE_AMD,
++ SPECTRE_V2_IBRS,
++};
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* __NOSPEC_BRANCH_H__ */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -10,6 +10,9 @@
+ #include <linux/init.h>
+ #include <linux/utsname.h>
+ #include <linux/cpu.h>
++
++#include <asm/nospec-branch.h>
++#include <asm/cmdline.h>
+ #include <asm/bugs.h>
+ #include <asm/processor.h>
+ #include <asm/processor-flags.h>
+@@ -20,6 +23,8 @@
+ #include <asm/pgtable.h>
+ #include <asm/cacheflush.h>
+
++static void __init spectre_v2_select_mitigation(void);
++
+ void __init check_bugs(void)
+ {
+ identify_boot_cpu();
+@@ -29,6 +34,9 @@ void __init check_bugs(void)
+ print_cpu_info(&boot_cpu_data);
+ }
+
++ /* Select the proper spectre mitigation before patching alternatives */
++ spectre_v2_select_mitigation();
++
+ #ifdef CONFIG_X86_32
+ /*
+ * Check whether we are able to run this kernel safely on SMP.
+@@ -61,6 +69,153 @@ void __init check_bugs(void)
+ #endif
+ }
+
++/* The kernel command line selection */
++enum spectre_v2_mitigation_cmd {
++ SPECTRE_V2_CMD_NONE,
++ SPECTRE_V2_CMD_AUTO,
++ SPECTRE_V2_CMD_FORCE,
++ SPECTRE_V2_CMD_RETPOLINE,
++ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
++ SPECTRE_V2_CMD_RETPOLINE_AMD,
++};
++
++static const char *spectre_v2_strings[] = {
++ [SPECTRE_V2_NONE] = "Vulnerable",
++ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
++ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
++ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
++ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
++};
++
++#undef pr_fmt
++#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
++
++static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
++
++static void __init spec2_print_if_insecure(const char *reason)
++{
++ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
++ pr_info("%s\n", reason);
++}
++
++static void __init spec2_print_if_secure(const char *reason)
++{
++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
++ pr_info("%s\n", reason);
++}
++
++static inline bool retp_compiler(void)
++{
++ return __is_defined(RETPOLINE);
++}
++
++static inline bool match_option(const char *arg, int arglen, const char *opt)
++{
++ int len = strlen(opt);
++
++ return len == arglen && !strncmp(arg, opt, len);
++}
++
++static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
++{
++ char arg[20];
++ int ret;
++
++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
++ sizeof(arg));
++ if (ret > 0) {
++ if (match_option(arg, ret, "off")) {
++ goto disable;
++ } else if (match_option(arg, ret, "on")) {
++ spec2_print_if_secure("force enabled on command line.");
++ return SPECTRE_V2_CMD_FORCE;
++ } else if (match_option(arg, ret, "retpoline")) {
++ spec2_print_if_insecure("retpoline selected on command line.");
++ return SPECTRE_V2_CMD_RETPOLINE;
++ } else if (match_option(arg, ret, "retpoline,amd")) {
++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
++ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
++ return SPECTRE_V2_CMD_AUTO;
++ }
++ spec2_print_if_insecure("AMD retpoline selected on command line.");
++ return SPECTRE_V2_CMD_RETPOLINE_AMD;
++ } else if (match_option(arg, ret, "retpoline,generic")) {
++ spec2_print_if_insecure("generic retpoline selected on command line.");
++ return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
++ } else if (match_option(arg, ret, "auto")) {
++ return SPECTRE_V2_CMD_AUTO;
++ }
++ }
++
++ if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
++ return SPECTRE_V2_CMD_AUTO;
++disable:
++ spec2_print_if_insecure("disabled on command line.");
++ return SPECTRE_V2_CMD_NONE;
++}
++
++static void __init spectre_v2_select_mitigation(void)
++{
++ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
++ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
++
++ /*
++ * If the CPU is not affected and the command line mode is NONE or AUTO
++ * then nothing to do.
++ */
++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
++ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
++ return;
++
++ switch (cmd) {
++ case SPECTRE_V2_CMD_NONE:
++ return;
++
++ case SPECTRE_V2_CMD_FORCE:
++ /* FALLTRHU */
++ case SPECTRE_V2_CMD_AUTO:
++ goto retpoline_auto;
++
++ case SPECTRE_V2_CMD_RETPOLINE_AMD:
++ if (IS_ENABLED(CONFIG_RETPOLINE))
++ goto retpoline_amd;
++ break;
++ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
++ if (IS_ENABLED(CONFIG_RETPOLINE))
++ goto retpoline_generic;
++ break;
++ case SPECTRE_V2_CMD_RETPOLINE:
++ if (IS_ENABLED(CONFIG_RETPOLINE))
++ goto retpoline_auto;
++ break;
++ }
++ pr_err("kernel not compiled with retpoline; no mitigation available!");
++ return;
++
++retpoline_auto:
++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
++ retpoline_amd:
++ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
++ pr_err("LFENCE not serializing. Switching to generic retpoline\n");
++ goto retpoline_generic;
++ }
++ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
++ SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
++ } else {
++ retpoline_generic:
++ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
++ SPECTRE_V2_RETPOLINE_MINIMAL;
++ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
++ }
++
++ spectre_v2_enabled = mode;
++ pr_info("%s\n", spectre_v2_strings[mode]);
++}
++
++#undef pr_fmt
++
+ #ifdef CONFIG_SYSFS
+ ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+@@ -85,6 +240,7 @@ ssize_t cpu_show_spectre_v2(struct devic
+ {
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+- return sprintf(buf, "Vulnerable\n");
++
++ return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+ }
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -889,10 +889,6 @@ static void __init early_identify_cpu(st
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
+-#ifdef CONFIG_RETPOLINE
+- setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+-#endif
+-
+ fpu__init_system(c);
+
+ #ifdef CONFIG_X86_32
diff --git a/queue/x86-spectre-check-config_retpoline-in-command-line-parser.patch b/queue/x86-spectre-check-config_retpoline-in-command-line-parser.patch
new file mode 100644
index 0000000..c2d046a
--- /dev/null
+++ b/queue/x86-spectre-check-config_retpoline-in-command-line-parser.patch
@@ -0,0 +1,49 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: Dou Liyang <douly.fnst@cn.fujitsu.com>
+Date: Tue, 30 Jan 2018 14:13:50 +0800
+Subject: x86/spectre: Check CONFIG_RETPOLINE in command line parser
+
+From: Dou Liyang <douly.fnst@cn.fujitsu.com>
+
+(cherry picked from commit 9471eee9186a46893726e22ebb54cade3f9bc043)
+
+The spectre_v2 option 'auto' does not check whether CONFIG_RETPOLINE is
+enabled. As a consequence it fails to emit the appropriate warning and sets
+feature flags which have no effect at all.
+
+Add the missing IS_ENABLED() check.
+
+Fixes: da285121560e ("x86/spectre: Add boot time option to select Spectre v2 mitigation")
+Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: ak@linux.intel.com
+Cc: peterz@infradead.org
+Cc: Tomohiro <misono.tomohiro@jp.fujitsu.com>
+Cc: dave.hansen@intel.com
+Cc: bp@alien8.de
+Cc: arjan@linux.intel.com
+Cc: dwmw@amazon.co.uk
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/f5892721-7528-3647-08fb-f8d10e65ad87@cn.fujitsu.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -212,10 +212,10 @@ static void __init spectre_v2_select_mit
+ return;
+
+ case SPECTRE_V2_CMD_FORCE:
+- /* FALLTRHU */
+ case SPECTRE_V2_CMD_AUTO:
+- goto retpoline_auto;
+-
++ if (IS_ENABLED(CONFIG_RETPOLINE))
++ goto retpoline_auto;
++ break;
+ case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_amd;
diff --git a/queue/x86-spectre-fix-spelling-mistake-vunerable-vulnerable.patch b/queue/x86-spectre-fix-spelling-mistake-vunerable-vulnerable.patch
new file mode 100644
index 0000000..9c55ad7
--- /dev/null
+++ b/queue/x86-spectre-fix-spelling-mistake-vunerable-vulnerable.patch
@@ -0,0 +1,38 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Colin Ian King <colin.king@canonical.com>
+Date: Tue, 30 Jan 2018 19:32:18 +0000
+Subject: x86/spectre: Fix spelling mistake: "vunerable"-> "vulnerable"
+
+From: Colin Ian King <colin.king@canonical.com>
+
+
+(cherry picked from commit e698dcdfcda41efd0984de539767b4cddd235f1e)
+
+Trivial fix to spelling mistake in pr_err error message text.
+
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: kernel-janitors@vger.kernel.org
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lkml.kernel.org/r/20180130193218.9271-1-colin.king@canonical.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -102,7 +102,7 @@ bool retpoline_module_ok(bool has_retpol
+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
+ return true;
+
+- pr_err("System may be vunerable to spectre v2\n");
++ pr_err("System may be vulnerable to spectre v2\n");
+ spectre_v2_bad_module = true;
+ return false;
+ }
diff --git a/queue/x86-spectre-report-get_user-mitigation-for-spectre_v1.patch b/queue/x86-spectre-report-get_user-mitigation-for-spectre_v1.patch
new file mode 100644
index 0000000..1388be9
--- /dev/null
+++ b/queue/x86-spectre-report-get_user-mitigation-for-spectre_v1.patch
@@ -0,0 +1,41 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:03:21 -0800
+Subject: x86/spectre: Report get_user mitigation for spectre_v1
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit edfbae53dab8348fca778531be9f4855d2ca0360)
+
+Reflect the presence of get_user(), __get_user(), and 'syscall' protections
+in sysfs. The expectation is that new and better tooling will allow the
+kernel to grow more usages of array_index_nospec(), for now, only claim
+mitigation for __user pointer de-references.
+
+Reported-by: Jiri Slaby <jslaby@suse.cz>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727420158.33451.11658324346540434635.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -296,7 +296,7 @@ ssize_t cpu_show_spectre_v1(struct devic
+ {
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+- return sprintf(buf, "Vulnerable\n");
++ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ }
+
+ ssize_t cpu_show_spectre_v2(struct device *dev,
diff --git a/queue/x86-spectre-simplify-spectre_v2-command-line-parsing.patch b/queue/x86-spectre-simplify-spectre_v2-command-line-parsing.patch
new file mode 100644
index 0000000..bef7737
--- /dev/null
+++ b/queue/x86-spectre-simplify-spectre_v2-command-line-parsing.patch
@@ -0,0 +1,138 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: KarimAllah Ahmed <karahmed@amazon.de>
+Date: Thu, 1 Feb 2018 11:27:21 +0000
+Subject: x86/spectre: Simplify spectre_v2 command line parsing
+
+From: KarimAllah Ahmed <karahmed@amazon.de>
+
+
+(cherry picked from commit 9005c6834c0ffdfe46afa76656bd9276cca864f6)
+
+[dwmw2: Use ARRAY_SIZE]
+
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Link: https://lkml.kernel.org/r/1517484441-1420-3-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 84 +++++++++++++++++++++++++++++----------------
+ 1 file changed, 55 insertions(+), 29 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -118,13 +118,13 @@ static inline const char *spectre_v2_mod
+ static void __init spec2_print_if_insecure(const char *reason)
+ {
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+- pr_info("%s\n", reason);
++ pr_info("%s selected on command line.\n", reason);
+ }
+
+ static void __init spec2_print_if_secure(const char *reason)
+ {
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+- pr_info("%s\n", reason);
++ pr_info("%s selected on command line.\n", reason);
+ }
+
+ static inline bool retp_compiler(void)
+@@ -139,42 +139,68 @@ static inline bool match_option(const ch
+ return len == arglen && !strncmp(arg, opt, len);
+ }
+
++static const struct {
++ const char *option;
++ enum spectre_v2_mitigation_cmd cmd;
++ bool secure;
++} mitigation_options[] = {
++ { "off", SPECTRE_V2_CMD_NONE, false },
++ { "on", SPECTRE_V2_CMD_FORCE, true },
++ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
++ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
++ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
++ { "auto", SPECTRE_V2_CMD_AUTO, false },
++};
++
+ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+ {
+ char arg[20];
+- int ret;
++ int ret, i;
++ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
++
++ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
++ return SPECTRE_V2_CMD_NONE;
++ else {
++ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
++ sizeof(arg));
++ if (ret < 0)
++ return SPECTRE_V2_CMD_AUTO;
+
+- ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+- sizeof(arg));
+- if (ret > 0) {
+- if (match_option(arg, ret, "off")) {
+- goto disable;
+- } else if (match_option(arg, ret, "on")) {
+- spec2_print_if_secure("force enabled on command line.");
+- return SPECTRE_V2_CMD_FORCE;
+- } else if (match_option(arg, ret, "retpoline")) {
+- spec2_print_if_insecure("retpoline selected on command line.");
+- return SPECTRE_V2_CMD_RETPOLINE;
+- } else if (match_option(arg, ret, "retpoline,amd")) {
+- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+- return SPECTRE_V2_CMD_AUTO;
+- }
+- spec2_print_if_insecure("AMD retpoline selected on command line.");
+- return SPECTRE_V2_CMD_RETPOLINE_AMD;
+- } else if (match_option(arg, ret, "retpoline,generic")) {
+- spec2_print_if_insecure("generic retpoline selected on command line.");
+- return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+- } else if (match_option(arg, ret, "auto")) {
++ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
++ if (!match_option(arg, ret, mitigation_options[i].option))
++ continue;
++ cmd = mitigation_options[i].cmd;
++ break;
++ }
++
++ if (i >= ARRAY_SIZE(mitigation_options)) {
++ pr_err("unknown option (%s). Switching to AUTO select\n",
++ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+- if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
++ if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
++ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
++ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
++ !IS_ENABLED(CONFIG_RETPOLINE)) {
++ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
++ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+-disable:
+- spec2_print_if_insecure("disabled on command line.");
+- return SPECTRE_V2_CMD_NONE;
++ }
++
++ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
++ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
++ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
++ return SPECTRE_V2_CMD_AUTO;
++ }
++
++ if (mitigation_options[i].secure)
++ spec2_print_if_secure(mitigation_options[i].option);
++ else
++ spec2_print_if_insecure(mitigation_options[i].option);
++
++ return cmd;
+ }
+
+ /* Check for Skylake-like CPUs (for RSB handling) */
diff --git a/queue/x86-speculation-add-basic-ibpb-indirect-branch-prediction-barrier-support.patch b/queue/x86-speculation-add-basic-ibpb-indirect-branch-prediction-barrier-support.patch
new file mode 100644
index 0000000..676d99d
--- /dev/null
+++ b/queue/x86-speculation-add-basic-ibpb-indirect-branch-prediction-barrier-support.patch
@@ -0,0 +1,94 @@
+From foo@baz Thu Feb 8 03:30:27 CET 2018
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 25 Jan 2018 16:14:15 +0000
+Subject: x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+(cherry picked from commit 20ffa1caecca4db8f79fe665acdeaa5af815a24d)
+
+Expose indirect_branch_prediction_barrier() for use in subsequent patches.
+
+[ tglx: Add IBPB status to spectre_v2 sysfs file ]
+
+Co-developed-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: gnomes@lxorguk.ukuu.org.uk
+Cc: ak@linux.intel.com
+Cc: ashok.raj@intel.com
+Cc: dave.hansen@intel.com
+Cc: arjan@linux.intel.com
+Cc: torvalds@linux-foundation.org
+Cc: peterz@infradead.org
+Cc: bp@alien8.de
+Cc: pbonzini@redhat.com
+Cc: tim.c.chen@linux.intel.com
+Cc: gregkh@linux-foundation.org
+Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 2 ++
+ arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++
+ arch/x86/kernel/cpu/bugs.c | 10 +++++++++-
+ 3 files changed, 24 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -202,6 +202,8 @@
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+
++#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/
++
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -218,5 +218,18 @@ static inline void vmexit_fill_RSB(void)
+ #endif
+ }
+
++static inline void indirect_branch_prediction_barrier(void)
++{
++ asm volatile(ALTERNATIVE("",
++ "movl %[msr], %%ecx\n\t"
++ "movl %[val], %%eax\n\t"
++ "movl $0, %%edx\n\t"
++ "wrmsr",
++ X86_FEATURE_IBPB)
++ : : [msr] "i" (MSR_IA32_PRED_CMD),
++ [val] "i" (PRED_CMD_IBPB)
++ : "eax", "ecx", "edx", "memory");
++}
++
+ #endif /* __ASSEMBLY__ */
+ #endif /* __NOSPEC_BRANCH_H__ */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -262,6 +262,13 @@ retpoline_auto:
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Filling RSB on context switch\n");
+ }
++
++ /* Initialize Indirect Branch Prediction Barrier if supported */
++ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
++ boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
++ setup_force_cpu_cap(X86_FEATURE_IBPB);
++ pr_info("Enabling Indirect Branch Prediction Barrier\n");
++ }
+ }
+
+ #undef pr_fmt
+@@ -291,7 +298,8 @@ ssize_t cpu_show_spectre_v2(struct devic
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+- return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled],
++ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
++ boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "",
+ spectre_v2_bad_module ? " - vulnerable module loaded" : "");
+ }
+ #endif
diff --git a/queue/x86-speculation-fix-typo-ibrs_att-which-should-be-ibrs_all.patch b/queue/x86-speculation-fix-typo-ibrs_att-which-should-be-ibrs_all.patch
new file mode 100644
index 0000000..2446b18
--- /dev/null
+++ b/queue/x86-speculation-fix-typo-ibrs_att-which-should-be-ibrs_all.patch
@@ -0,0 +1,38 @@
+From foo@baz Thu Feb 8 03:33:09 CET 2018
+From: Darren Kenny <darren.kenny@oracle.com>
+Date: Fri, 2 Feb 2018 19:12:20 +0000
+Subject: x86/speculation: Fix typo IBRS_ATT, which should be IBRS_ALL
+
+From: Darren Kenny <darren.kenny@oracle.com>
+
+
+(cherry picked from commit af189c95a371b59f493dbe0f50c0a09724868881)
+
+Fixes: 117cc7a908c83 ("x86/retpoline: Fill return stack buffer on vmexit")
+Signed-off-by: Darren Kenny <darren.kenny@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Link: https://lkml.kernel.org/r/20180202191220.blvgkgutojecxr3b@starbug-vm.ie.oracle.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/nospec-branch.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -150,7 +150,7 @@ extern char __indirect_thunk_end[];
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+- * CPUs with IBRS_ATT *might* it be avoided.
++ * CPUs with IBRS_ALL *might* it be avoided.
+ */
+ static inline void vmexit_fill_RSB(void)
+ {
diff --git a/queue/x86-syscall-sanitize-syscall-table-de-references-under-speculation.patch b/queue/x86-syscall-sanitize-syscall-table-de-references-under-speculation.patch
new file mode 100644
index 0000000..cce824f
--- /dev/null
+++ b/queue/x86-syscall-sanitize-syscall-table-de-references-under-speculation.patch
@@ -0,0 +1,61 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:59 -0800
+Subject: x86/syscall: Sanitize syscall table de-references under speculation
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit 2fbd7af5af8665d18bcefae3e9700be07e22b681)
+
+The syscall table base is a user controlled function pointer in kernel
+space. Use array_index_nospec() to prevent any out of bounds speculation.
+
+While retpoline prevents speculating into a userspace directed target it
+does not stop the pointer de-reference, the concern is leaking memory
+relative to the syscall table base, by observing instruction cache
+behavior.
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727417984.33451.1216731042505722161.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/common.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -20,6 +20,7 @@
+ #include <linux/export.h>
+ #include <linux/context_tracking.h>
+ #include <linux/user-return-notifier.h>
++#include <linux/nospec.h>
+ #include <linux/uprobes.h>
+
+ #include <asm/desc.h>
+@@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_r
+ * regs->orig_ax, which changes the behavior of some syscalls.
+ */
+ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
+- regs->ax = sys_call_table[nr & __SYSCALL_MASK](
++ nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
++ regs->ax = sys_call_table[nr](
+ regs->di, regs->si, regs->dx,
+ regs->r10, regs->r8, regs->r9);
+ }
+@@ -313,6 +315,7 @@ static __always_inline void do_syscall_3
+ }
+
+ if (likely(nr < IA32_NR_syscalls)) {
++ nr = array_index_nospec(nr, IA32_NR_syscalls);
+ /*
+ * It's possible that a 32-bit syscall implementation
+ * takes a 64-bit parameter but nonetheless assumes that
diff --git a/queue/x86-tlb-drop-the-_gpl-from-the-cpu_tlbstate-export.patch b/queue/x86-tlb-drop-the-_gpl-from-the-cpu_tlbstate-export.patch
new file mode 100644
index 0000000..9d1ad97
--- /dev/null
+++ b/queue/x86-tlb-drop-the-_gpl-from-the-cpu_tlbstate-export.patch
@@ -0,0 +1,48 @@
+From 1e5476815fd7f98b888e01a0f9522b63085f96c9 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 4 Jan 2018 22:19:04 +0100
+Subject: x86/tlb: Drop the _GPL from the cpu_tlbstate export
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 1e5476815fd7f98b888e01a0f9522b63085f96c9 upstream.
+
+The recent changes for PTI touch cpu_tlbstate from various tlb_flush
+inlines. cpu_tlbstate is exported as GPL symbol, so this causes a
+regression when building out of tree drivers for certain graphics cards.
+
+Aside of that the export was wrong since it was introduced as it should
+have been EXPORT_PER_CPU_SYMBOL_GPL().
+
+Use the correct PER_CPU export and drop the _GPL to restore the previous
+state which allows users to utilize the cards they payed for.
+
+As always I'm really thrilled to make this kind of change to support the
+#friends (or however the hot hashtag of today is spelled) from that closet
+sauce graphics corp.
+
+Fixes: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4")
+Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches")
+Reported-by: Kees Cook <keescook@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Thomas Backlund <tmb@mageia.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/init.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -768,7 +768,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb
+ .state = 0,
+ .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
+ };
+-EXPORT_SYMBOL_GPL(cpu_tlbstate);
++EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
+
+ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
+ {
diff --git a/queue/x86-uaccess-use-__uaccess_begin_nospec-and-uaccess_try_nospec.patch b/queue/x86-uaccess-use-__uaccess_begin_nospec-and-uaccess_try_nospec.patch
new file mode 100644
index 0000000..4a3fa28
--- /dev/null
+++ b/queue/x86-uaccess-use-__uaccess_begin_nospec-and-uaccess_try_nospec.patch
@@ -0,0 +1,187 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:49 -0800
+Subject: x86/uaccess: Use __uaccess_begin_nospec() and uaccess_try_nospec
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit 304ec1b050310548db33063e567123fae8fd0301)
+
+Quoting Linus:
+
+ I do think that it would be a good idea to very expressly document
+ the fact that it's not that the user access itself is unsafe. I do
+ agree that things like "get_user()" want to be protected, but not
+ because of any direct bugs or problems with get_user() and friends,
+ but simply because get_user() is an excellent source of a pointer
+ that is obviously controlled from a potentially attacking user
+ space. So it's a prime candidate for then finding _subsequent_
+ accesses that can then be used to perturb the cache.
+
+__uaccess_begin_nospec() covers __get_user() and copy_from_iter() where the
+limit check is far away from the user pointer de-reference. In those cases
+a barrier_nospec() prevents speculation with a potential pointer to
+privileged memory. uaccess_try_nospec covers get_user_try.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Suggested-by: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: Kees Cook <keescook@chromium.org>
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727416953.33451.10508284228526170604.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/uaccess.h | 6 +++---
+ arch/x86/include/asm/uaccess_32.h | 12 ++++++------
+ arch/x86/include/asm/uaccess_64.h | 12 ++++++------
+ arch/x86/lib/usercopy_32.c | 4 ++--
+ 4 files changed, 17 insertions(+), 17 deletions(-)
+
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -437,7 +437,7 @@ do { \
+ ({ \
+ int __gu_err; \
+ __inttype(*(ptr)) __gu_val; \
+- __uaccess_begin(); \
++ __uaccess_begin_nospec(); \
+ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
+ __uaccess_end(); \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
+@@ -547,7 +547,7 @@ struct __large_struct { unsigned long bu
+ * get_user_ex(...);
+ * } get_user_catch(err)
+ */
+-#define get_user_try uaccess_try
++#define get_user_try uaccess_try_nospec
+ #define get_user_catch(err) uaccess_catch(err)
+
+ #define get_user_ex(x, ptr) do { \
+@@ -582,7 +582,7 @@ extern void __cmpxchg_wrong_size(void)
+ __typeof__(ptr) __uval = (uval); \
+ __typeof__(*(ptr)) __old = (old); \
+ __typeof__(*(ptr)) __new = (new); \
+- __uaccess_begin(); \
++ __uaccess_begin_nospec(); \
+ switch (size) { \
+ case 1: \
+ { \
+--- a/arch/x86/include/asm/uaccess_32.h
++++ b/arch/x86/include/asm/uaccess_32.h
+@@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __
+
+ switch (n) {
+ case 1:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_size(*(u8 *)to, from, 1, ret, 1);
+ __uaccess_end();
+ return ret;
+ case 2:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_size(*(u16 *)to, from, 2, ret, 2);
+ __uaccess_end();
+ return ret;
+ case 4:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_size(*(u32 *)to, from, 4, ret, 4);
+ __uaccess_end();
+ return ret;
+@@ -130,17 +130,17 @@ static __always_inline unsigned long __c
+
+ switch (n) {
+ case 1:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_size(*(u8 *)to, from, 1, ret, 1);
+ __uaccess_end();
+ return ret;
+ case 2:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_size(*(u16 *)to, from, 2, ret, 2);
+ __uaccess_end();
+ return ret;
+ case 4:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_size(*(u32 *)to, from, 4, ret, 4);
+ __uaccess_end();
+ return ret;
+--- a/arch/x86/include/asm/uaccess_64.h
++++ b/arch/x86/include/asm/uaccess_64.h
+@@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst,
+ return copy_user_generic(dst, (__force void *)src, size);
+ switch (size) {
+ case 1:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_asm(*(u8 *)dst, (u8 __user *)src,
+ ret, "b", "b", "=q", 1);
+ __uaccess_end();
+ return ret;
+ case 2:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_asm(*(u16 *)dst, (u16 __user *)src,
+ ret, "w", "w", "=r", 2);
+ __uaccess_end();
+ return ret;
+ case 4:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_asm(*(u32 *)dst, (u32 __user *)src,
+ ret, "l", "k", "=r", 4);
+ __uaccess_end();
+ return ret;
+ case 8:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_asm(*(u64 *)dst, (u64 __user *)src,
+ ret, "q", "", "=r", 8);
+ __uaccess_end();
+ return ret;
+ case 10:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_asm(*(u64 *)dst, (u64 __user *)src,
+ ret, "q", "", "=r", 10);
+ if (likely(!ret))
+@@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst,
+ __uaccess_end();
+ return ret;
+ case 16:
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ __get_user_asm(*(u64 *)dst, (u64 __user *)src,
+ ret, "q", "", "=r", 16);
+ if (likely(!ret))
+--- a/arch/x86/lib/usercopy_32.c
++++ b/arch/x86/lib/usercopy_32.c
+@@ -570,7 +570,7 @@ do { \
+ unsigned long __copy_to_user_ll(void __user *to, const void *from,
+ unsigned long n)
+ {
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ if (movsl_is_ok(to, from, n))
+ __copy_user(to, from, n);
+ else
+@@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocach
+ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+ unsigned long n)
+ {
+- __uaccess_begin();
++ __uaccess_begin_nospec();
+ #ifdef CONFIG_X86_INTEL_USERCOPY
+ if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
+ n = __copy_user_intel_nocache(to, from, n);
diff --git a/queue/x86-usercopy-replace-open-coded-stac-clac-with-__uaccess_-begin-end.patch b/queue/x86-usercopy-replace-open-coded-stac-clac-with-__uaccess_-begin-end.patch
new file mode 100644
index 0000000..bf11e7c
--- /dev/null
+++ b/queue/x86-usercopy-replace-open-coded-stac-clac-with-__uaccess_-begin-end.patch
@@ -0,0 +1,70 @@
+From foo@baz Thu Feb 8 03:32:24 CET 2018
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 29 Jan 2018 17:02:44 -0800
+Subject: x86/usercopy: Replace open coded stac/clac with __uaccess_{begin, end}
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+
+(cherry picked from commit b5c4ae4f35325d520b230bab6eb3310613b72ac1)
+
+In preparation for converting some __uaccess_begin() instances to
+__uacess_begin_nospec(), make sure all 'from user' uaccess paths are
+using the _begin(), _end() helpers rather than open-coded stac() and
+clac().
+
+No functional changes.
+
+Suggested-by: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-arch@vger.kernel.org
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: kernel-hardening@lists.openwall.com
+Cc: gregkh@linuxfoundation.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: torvalds@linux-foundation.org
+Cc: alan@linux.intel.com
+Link: https://lkml.kernel.org/r/151727416438.33451.17309465232057176966.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/lib/usercopy_32.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/lib/usercopy_32.c
++++ b/arch/x86/lib/usercopy_32.c
+@@ -570,12 +570,12 @@ do { \
+ unsigned long __copy_to_user_ll(void __user *to, const void *from,
+ unsigned long n)
+ {
+- stac();
++ __uaccess_begin();
+ if (movsl_is_ok(to, from, n))
+ __copy_user(to, from, n);
+ else
+ n = __copy_user_intel(to, from, n);
+- clac();
++ __uaccess_end();
+ return n;
+ }
+ EXPORT_SYMBOL(__copy_to_user_ll);
+@@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocach
+ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+ unsigned long n)
+ {
+- stac();
++ __uaccess_begin();
+ #ifdef CONFIG_X86_INTEL_USERCOPY
+ if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
+ n = __copy_user_intel_nocache(to, from, n);
+@@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocach
+ #else
+ __copy_user(to, from, n);
+ #endif
+- clac();
++ __uaccess_end();
+ return n;
+ }
+ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
diff --git a/queue/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch b/queue/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
new file mode 100644
index 0000000..ba2096a
--- /dev/null
+++ b/queue/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
@@ -0,0 +1,50 @@
+From 9ccee2373f0658f234727700e619df097ba57023 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sat, 22 Apr 2017 00:01:19 -0700
+Subject: x86/vm86/32: Switch to flush_tlb_mm_range() in mark_screen_rdonly()
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 9ccee2373f0658f234727700e619df097ba57023 upstream.
+
+mark_screen_rdonly() is the last remaining caller of flush_tlb().
+flush_tlb_mm_range() is potentially faster and isn't obsolete.
+
+Compile-tested only because I don't know whether software that uses
+this mechanism even exists.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Nadav Amit <namit@vmware.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/791a644076fc3577ba7f7b7cafd643cc089baa7d.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/vm86_32.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -191,7 +191,7 @@ static void mark_screen_rdonly(struct mm
+ pte_unmap_unlock(pte, ptl);
+ out:
+ up_write(&mm->mmap_sem);
+- flush_tlb();
++ flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL);
+ }
+
+