aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/intel_mpx.txt244
-rw-r--r--arch/x86/Kconfig28
-rw-r--r--arch/x86/include/asm/bugs.h6
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/fpu/xstate.h4
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h31
-rw-r--r--arch/x86/include/asm/mpx.h115
-rw-r--r--arch/x86/include/asm/processor.h18
-rw-r--r--arch/x86/include/asm/trace/mpx.h134
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c37
-rw-r--r--arch/x86/kernel/fpu/xstate.c6
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c9
-rw-r--r--arch/x86/kernel/traps.c74
-rw-r--r--arch/x86/kvm/cpuid.c13
-rw-r--r--arch/x86/kvm/cpuid.h1
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c60
-rw-r--r--arch/x86/kvm/x86.c25
-rw-r--r--arch/x86/kvm/x86.h3
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/hugetlbpage.c5
-rw-r--r--arch/x86/mm/mpx.c936
-rw-r--r--fs/exec.c1
-rw-r--r--include/asm-generic/mm_hooks.h11
-rw-r--r--include/uapi/linux/prctl.h2
-rw-r--r--kernel/sys.c13
-rw-r--r--mm/mmap.c6
-rw-r--r--tools/testing/selftests/x86/mpx-debug.h15
-rw-r--r--tools/testing/selftests/x86/mpx-dig.c499
-rw-r--r--tools/testing/selftests/x86/mpx-hw.h124
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c1616
-rw-r--r--tools/testing/selftests/x86/mpx-mm.h10
36 files changed, 13 insertions, 4073 deletions
diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
deleted file mode 100644
index 85d0549ad84636..00000000000000
--- a/Documentation/x86/intel_mpx.txt
+++ /dev/null
@@ -1,244 +0,0 @@
-1. Intel(R) MPX Overview
-========================
-
-Intel(R) Memory Protection Extensions (Intel(R) MPX) is a new capability
-introduced into Intel Architecture. Intel MPX provides hardware features
-that can be used in conjunction with compiler changes to check memory
-references, for those references whose compile-time normal intentions are
-usurped at runtime due to buffer overflow or underflow.
-
-You can tell if your CPU supports MPX by looking in /proc/cpuinfo:
-
- cat /proc/cpuinfo | grep ' mpx '
-
-For more information, please refer to Intel(R) Architecture Instruction
-Set Extensions Programming Reference, Chapter 9: Intel(R) Memory Protection
-Extensions.
-
-Note: As of December 2014, no hardware with MPX is available but it is
-possible to use SDE (Intel(R) Software Development Emulator) instead, which
-can be downloaded from
-http://software.intel.com/en-us/articles/intel-software-development-emulator
-
-
-2. How to get the advantage of MPX
-==================================
-
-For MPX to work, changes are required in the kernel, binutils and compiler.
-No source changes are required for applications, just a recompile.
-
-There are a lot of moving parts of this to all work right. The following
-is how we expect the compiler, application and kernel to work together.
-
-1) Application developer compiles with -fmpx. The compiler will add the
- instrumentation as well as some setup code called early after the app
- starts. New instruction prefixes are noops for old CPUs.
-2) That setup code allocates (virtual) space for the "bounds directory",
- points the "bndcfgu" register to the directory (must also set the valid
- bit) and notifies the kernel (via the new prctl(PR_MPX_ENABLE_MANAGEMENT))
- that the app will be using MPX. The app must be careful not to access
- the bounds tables between the time when it populates "bndcfgu" and
- when it calls the prctl(). This might be hard to guarantee if the app
- is compiled with MPX. You can add "__attribute__((bnd_legacy))" to
- the function to disable MPX instrumentation to help guarantee this.
- Also be careful not to call out to any other code which might be
- MPX-instrumented.
-3) The kernel detects that the CPU has MPX, allows the new prctl() to
- succeed, and notes the location of the bounds directory. Userspace is
- expected to keep the bounds directory at that location. We note it
- instead of reading it each time because the 'xsave' operation needed
- to access the bounds directory register is an expensive operation.
-4) If the application needs to spill bounds out of the 4 registers, it
- issues a bndstx instruction. Since the bounds directory is empty at
- this point, a bounds fault (#BR) is raised, the kernel allocates a
- bounds table (in the user address space) and makes the relevant entry
- in the bounds directory point to the new table.
-5) If the application violates the bounds specified in the bounds registers,
- a separate kind of #BR is raised which will deliver a signal with
- information about the violation in the 'struct siginfo'.
-6) Whenever memory is freed, we know that it can no longer contain valid
- pointers, and we attempt to free the associated space in the bounds
- tables. If an entire table becomes unused, we will attempt to free
- the table and remove the entry in the directory.
-
-To summarize, there are essentially three things interacting here:
-
-GCC with -fmpx:
- * enables annotation of code with MPX instructions and prefixes
- * inserts code early in the application to call in to the "gcc runtime"
-GCC MPX Runtime:
- * Checks for hardware MPX support in cpuid leaf
- * allocates virtual space for the bounds directory (malloc() essentially)
- * points the hardware BNDCFGU register at the directory
- * calls a new prctl(PR_MPX_ENABLE_MANAGEMENT) to notify the kernel to
- start managing the bounds directories
-Kernel MPX Code:
- * Checks for hardware MPX support in cpuid leaf
- * Handles #BR exceptions and sends SIGSEGV to the app when it violates
- bounds, like during a buffer overflow.
- * When bounds are spilled in to an unallocated bounds table, the kernel
- notices in the #BR exception, allocates the virtual space, then
- updates the bounds directory to point to the new table. It keeps
- special track of the memory with a VM_MPX flag.
- * Frees unused bounds tables at the time that the memory they described
- is unmapped.
-
-
-3. How does MPX kernel code work
-================================
-
-Handling #BR faults caused by MPX
----------------------------------
-
-When MPX is enabled, there are 2 new situations that can generate
-#BR faults.
- * new bounds tables (BT) need to be allocated to save bounds.
- * bounds violation caused by MPX instructions.
-
-We hook #BR handler to handle these two new situations.
-
-On-demand kernel allocation of bounds tables
---------------------------------------------
-
-MPX only has 4 hardware registers for storing bounds information. If
-MPX-enabled code needs more than these 4 registers, it needs to spill
-them somewhere. It has two special instructions for this which allow
-the bounds to be moved between the bounds registers and some new "bounds
-tables".
-
-#BR exceptions are a new class of exceptions just for MPX. They are
-similar conceptually to a page fault and will be raised by the MPX
-hardware during both bounds violations or when the tables are not
-present. The kernel handles those #BR exceptions for not-present tables
-by carving the space out of the normal processes address space and then
-pointing the bounds-directory over to it.
-
-The tables need to be accessed and controlled by userspace because
-the instructions for moving bounds in and out of them are extremely
-frequent. They potentially happen every time a register points to
-memory. Any direct kernel involvement (like a syscall) to access the
-tables would obviously destroy performance.
-
-Why not do this in userspace? MPX does not strictly require anything in
-the kernel. It can theoretically be done completely from userspace. Here
-are a few ways this could be done. We don't think any of them are practical
-in the real-world, but here they are.
-
-Q: Can virtual space simply be reserved for the bounds tables so that we
- never have to allocate them?
-A: MPX-enabled application will possibly create a lot of bounds tables in
- process address space to save bounds information. These tables can take
- up huge swaths of memory (as much as 80% of the memory on the system)
- even if we clean them up aggressively. In the worst-case scenario, the
- tables can be 4x the size of the data structure being tracked. IOW, a
- 1-page structure can require 4 bounds-table pages. An X-GB virtual
- area needs 4*X GB of virtual space, plus 2GB for the bounds directory.
- If we were to preallocate them for the 128TB of user virtual address
- space, we would need to reserve 512TB+2GB, which is larger than the
- entire virtual address space today. This means they can not be reserved
- ahead of time. Also, a single process's pre-populated bounds directory
- consumes 2GB of virtual *AND* physical memory. IOW, it's completely
- infeasible to prepopulate bounds directories.
-
-Q: Can we preallocate bounds table space at the same time memory is
- allocated which might contain pointers that might eventually need
- bounds tables?
-A: This would work if we could hook the site of each and every memory
- allocation syscall. This can be done for small, constrained applications.
- But, it isn't practical at a larger scale since a given app has no
- way of controlling how all the parts of the app might allocate memory
- (think libraries). The kernel is really the only place to intercept
- these calls.
-
-Q: Could a bounds fault be handed to userspace and the tables allocated
- there in a signal handler instead of in the kernel?
-A: mmap() is not on the list of safe async handler functions and even
- if mmap() would work it still requires locking or nasty tricks to
- keep track of the allocation state there.
-
-Having ruled out all of the userspace-only approaches for managing
-bounds tables that we could think of, we create them on demand in
-the kernel.
-
-Decoding MPX instructions
--------------------------
-
-If a #BR is generated due to a bounds violation caused by MPX.
-We need to decode MPX instructions to get violation address and
-set this address into extended struct siginfo.
-
-The _sigfault field of struct siginfo is extended as follow:
-
-87 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-88 struct {
-89 void __user *_addr; /* faulting insn/memory ref. */
-90 #ifdef __ARCH_SI_TRAPNO
-91 int _trapno; /* TRAP # which caused the signal */
-92 #endif
-93 short _addr_lsb; /* LSB of the reported address */
-94 struct {
-95 void __user *_lower;
-96 void __user *_upper;
-97 } _addr_bnd;
-98 } _sigfault;
-
-The '_addr' field refers to violation address, and new '_addr_and'
-field refers to the upper/lower bounds when a #BR is caused.
-
-Glibc will be also updated to support this new siginfo. So user
-can get violation address and bounds when bounds violations occur.
-
-Cleanup unused bounds tables
-----------------------------
-
-When a BNDSTX instruction attempts to save bounds to a bounds directory
-entry marked as invalid, a #BR is generated. This is an indication that
-no bounds table exists for this entry. In this case the fault handler
-will allocate a new bounds table on demand.
-
-Since the kernel allocated those tables on-demand without userspace
-knowledge, it is also responsible for freeing them when the associated
-mappings go away.
-
-Here, the solution for this issue is to hook do_munmap() to check
-whether one process is MPX enabled. If yes, those bounds tables covered
-in the virtual address region which is being unmapped will be freed also.
-
-Adding new prctl commands
--------------------------
-
-Two new prctl commands are added to enable and disable MPX bounds tables
-management in kernel.
-
-155 #define PR_MPX_ENABLE_MANAGEMENT 43
-156 #define PR_MPX_DISABLE_MANAGEMENT 44
-
-Runtime library in userspace is responsible for allocation of bounds
-directory. So kernel have to use XSAVE instruction to get the base
-of bounds directory from BNDCFG register.
-
-But XSAVE is expected to be very expensive. In order to do performance
-optimization, we have to get the base of bounds directory and save it
-into struct mm_struct to be used in future during PR_MPX_ENABLE_MANAGEMENT
-command execution.
-
-
-4. Special rules
-================
-
-1) If userspace is requesting help from the kernel to do the management
-of bounds tables, it may not create or modify entries in the bounds directory.
-
-Certainly users can allocate bounds tables and forcibly point the bounds
-directory at them through XSAVE instruction, and then set valid bit
-of bounds entry to have this entry valid. But, the kernel will decline
-to assist in managing these tables.
-
-2) Userspace may not take multiple bounds directory entries and point
-them at the same bounds table.
-
-This is allowed architecturally. See more information "Intel(R) Architecture
-Instruction Set Extensions Programming Reference" (9.3.4).
-
-However, if users did this, the kernel might be fooled in to unmapping an
-in-use bounds table since it does not recognize sharing.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9d734f3c8234d4..1a06a14f14e0eb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1875,34 +1875,6 @@ config X86_INTEL_UMIP
specific cases in protected and virtual-8086 modes. Emulated
results are dummy.
-config X86_INTEL_MPX
- prompt "Intel MPX (Memory Protection Extensions)"
- def_bool n
- # Note: only available in 64-bit mode due to VMA flags shortage
- depends on CPU_SUP_INTEL && X86_64
- select ARCH_USES_HIGH_VMA_FLAGS
- ---help---
- MPX provides hardware features that can be used in
- conjunction with compiler-instrumented code to check
- memory references. It is designed to detect buffer
- overflow or underflow bugs.
-
- This option enables running applications which are
- instrumented or otherwise use MPX. It does not use MPX
- itself inside the kernel or to protect the kernel
- against bad memory references.
-
- Enabling this option will make the kernel larger:
- ~8k of kernel text and 36 bytes of data on a 64-bit
- defconfig. It adds a long to the 'mm_struct' which
- will increase the kernel memory overhead of each
- process and adds some branches to paths used during
- exec() and munmap().
-
- For details, see Documentation/x86/intel_mpx.txt
-
- If unsure, say N.
-
config X86_INTEL_MEMORY_PROTECTION_KEYS
prompt "Intel Memory Protection Keys"
def_bool y
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 542509b53e0f5f..9fa43ab49a578b 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -6,12 +6,6 @@
extern void check_bugs(void);
-#if defined(CONFIG_CPU_SUP_INTEL)
-void check_mpx_erratum(struct cpuinfo_x86 *c);
-#else
-static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {}
-#endif
-
#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
int ppro_with_ram_bug(void);
#else
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 33833d1909afda..99238e0a767768 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -10,12 +10,6 @@
* cpu_feature_enabled().
*/
-#ifdef CONFIG_X86_INTEL_MPX
-# define DISABLE_MPX 0
-#else
-# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
-#endif
-
#ifdef CONFIG_X86_INTEL_UMIP
# define DISABLE_UMIP 0
#else
@@ -68,7 +62,7 @@
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_MPX)
+#define DISABLED_MASK9 0
#define DISABLED_MASK10 0
#define DISABLED_MASK11 0
#define DISABLED_MASK12 0
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 48581988d78c75..56260359b5a8cc 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -29,9 +29,7 @@
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
XFEATURE_MASK_Hi16_ZMM | \
- XFEATURE_MASK_PKRU | \
- XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR)
+ XFEATURE_MASK_PKRU)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5ff3e8af2c2056..beb8aa545c89d4 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -50,10 +50,6 @@ typedef struct {
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
-#ifdef CONFIG_X86_INTEL_MPX
- /* address of the bounds directory */
- void __user *bd_addr;
-#endif
} mm_context_t;
#define INIT_MM_CONTEXT(mm) \
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 0ca50611e8cec0..61363e794381e5 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -12,7 +12,6 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
-#include <asm/mpx.h>
extern atomic64_t last_mm_ctx_id;
@@ -253,36 +252,6 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
- mpx_mm_init(mm);
-}
-
-static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- /*
- * mpx_notify_unmap() goes and reads a rarely-hot
- * cacheline in the mm_struct. That can be expensive
- * enough to be seen in profiles.
- *
- * The mpx_notify_unmap() call and its contents have been
- * observed to affect munmap() performance on hardware
- * where MPX is not present.
- *
- * The unlikely() optimizes for the fast case: no MPX
- * in the CPU, or no MPX use in the process. Even if
- * we get this wrong (in the unlikely event that MPX
- * is widely enabled on some system) the overhead of
- * MPX itself (reading bounds tables) is expected to
- * overwhelm the overhead of getting this unlikely()
- * consistently wrong.
- */
- if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
- mpx_notify_unmap(mm, vma, start, end);
-}
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
deleted file mode 100644
index d0b1434fb0b691..00000000000000
--- a/arch/x86/include/asm/mpx.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MPX_H
-#define _ASM_X86_MPX_H
-
-#include <linux/types.h>
-#include <linux/mm_types.h>
-
-#include <asm/ptrace.h>
-#include <asm/insn.h>
-
-/*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
-#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1)
-#define MPX_BNDCFG_ENABLE_FLAG 0x1
-#define MPX_BD_ENTRY_VALID_FLAG 0x1
-
-/*
- * The upper 28 bits [47:20] of the virtual address in 64-bit
- * are used to index into bounds directory (BD).
- *
- * The directory is 2G (2^31) in size, and with 8-byte entries
- * it has 2^28 entries.
- */
-#define MPX_BD_SIZE_BYTES_64 (1UL<<31)
-#define MPX_BD_ENTRY_BYTES_64 8
-#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
-
-/*
- * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
- * entries it has 2^20 entries.
- */
-#define MPX_BD_SIZE_BYTES_32 (1UL<<22)
-#define MPX_BD_ENTRY_BYTES_32 4
-#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
-
-/*
- * A 64-bit table is 4MB total in size, and an entry is
- * 4 64-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_64 (1UL<<22)
-#define MPX_BT_ENTRY_BYTES_64 32
-#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
-
-/*
- * A 32-bit table is 16kB total in size, and an entry is
- * 4 32-bit pointers in size.
- */
-#define MPX_BT_SIZE_BYTES_32 (1UL<<14)
-#define MPX_BT_ENTRY_BYTES_32 16
-#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
-
-#define MPX_BNDSTA_TAIL 2
-#define MPX_BNDCFG_TAIL 12
-#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
-#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
-#define MPX_BNDSTA_ERROR_CODE 0x3
-
-struct mpx_fault_info {
- void __user *addr;
- void __user *lower;
- void __user *upper;
-};
-
-#ifdef CONFIG_X86_INTEL_MPX
-int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs);
-int mpx_handle_bd_fault(void);
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
- return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
-}
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
- /*
- * NULL is theoretically a valid place to put the bounds
- * directory, so point this at an invalid address.
- */
- mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
-}
-void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
-
-unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
- unsigned long flags);
-#else
-static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
-{
- return -EINVAL;
-}
-static inline int mpx_handle_bd_fault(void)
-{
- return -EINVAL;
-}
-static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
-{
- return 0;
-}
-static inline void mpx_mm_init(struct mm_struct *mm)
-{
-}
-static inline void mpx_notify_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline unsigned long mpx_unmapped_area_check(unsigned long addr,
- unsigned long len, unsigned long flags)
-{
- return addr;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 071b2a6fff85c9..51e2a9d157e99b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -925,24 +925,6 @@ extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
-/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
-#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
-
-#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(void);
-extern int mpx_disable_management(void);
-#else
-static inline int mpx_enable_management(void)
-{
- return -EINVAL;
-}
-static inline int mpx_disable_management(void)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
#ifdef CONFIG_CPU_SUP_AMD
extern u16 amd_get_nb_id(int cpu);
extern u32 amd_get_nodes_per_socket(void);
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
deleted file mode 100644
index 54133017267c3c..00000000000000
--- a/arch/x86/include/asm/trace/mpx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mpx
-
-#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_MPX_H
-
-#include <linux/tracepoint.h>
-
-#ifdef CONFIG_X86_INTEL_MPX
-
-TRACE_EVENT(mpx_bounds_register_exception,
-
- TP_PROTO(void __user *addr_referenced,
- const struct mpx_bndreg *bndreg),
- TP_ARGS(addr_referenced, bndreg),
-
- TP_STRUCT__entry(
- __field(void __user *, addr_referenced)
- __field(u64, lower_bound)
- __field(u64, upper_bound)
- ),
-
- TP_fast_assign(
- __entry->addr_referenced = addr_referenced;
- __entry->lower_bound = bndreg->lower_bound;
- __entry->upper_bound = bndreg->upper_bound;
- ),
- /*
- * Note that we are printing out the '~' of the upper
- * bounds register here. It is actually stored in its
- * one's complement form so that its 'init' state
- * corresponds to all 0's. But, that looks like
- * gibberish when printed out, so print out the 1's
- * complement instead of the actual value here. Note
- * though that you still need to specify filters for the
- * actual value, not the displayed one.
- */
- TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
- __entry->addr_referenced,
- __entry->lower_bound,
- ~__entry->upper_bound
- )
-);
-
-TRACE_EVENT(bounds_exception_mpx,
-
- TP_PROTO(const struct mpx_bndcsr *bndcsr),
- TP_ARGS(bndcsr),
-
- TP_STRUCT__entry(
- __field(u64, bndcfgu)
- __field(u64, bndstatus)
- ),
-
- TP_fast_assign(
- /* need to get rid of the 'const' on bndcsr */
- __entry->bndcfgu = (u64)bndcsr->bndcfgu;
- __entry->bndstatus = (u64)bndcsr->bndstatus;
- ),
-
- TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
- __entry->bndcfgu,
- __entry->bndstatus)
-);
-
-DECLARE_EVENT_CLASS(mpx_range_trace,
-
- TP_PROTO(unsigned long start,
- unsigned long end),
- TP_ARGS(start, end),
-
- TP_STRUCT__entry(
- __field(unsigned long, start)
- __field(unsigned long, end)
- ),
-
- TP_fast_assign(
- __entry->start = start;
- __entry->end = end;
- ),
-
- TP_printk("[0x%p:0x%p]",
- (void *)__entry->start,
- (void *)__entry->end
- )
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
- TP_PROTO(unsigned long start, unsigned long end),
- TP_ARGS(start, end)
-);
-
-DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
- TP_PROTO(unsigned long start, unsigned long end),
- TP_ARGS(start, end)
-);
-
-TRACE_EVENT(mpx_new_bounds_table,
-
- TP_PROTO(unsigned long table_vaddr),
- TP_ARGS(table_vaddr),
-
- TP_STRUCT__entry(
- __field(unsigned long, table_vaddr)
- ),
-
- TP_fast_assign(
- __entry->table_vaddr = table_vaddr;
- ),
-
- TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
-);
-
-#else
-
-/*
- * This gets used outside of MPX-specific code, so we need a stub.
- */
-static inline
-void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
-{
-}
-
-#endif /* CONFIG_X86_INTEL_MPX */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH asm/trace/
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE mpx
-#endif /* _TRACE_MPX_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ffb181f959d2b2..ec74e470aa0806 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -164,22 +164,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
-static int __init x86_mpx_setup(char *s)
-{
- /* require an exact match without trailing characters */
- if (strlen(s))
- return 0;
-
- /* do not emit a message if the feature is not present */
- if (!boot_cpu_has(X86_FEATURE_MPX))
- return 1;
-
- setup_clear_cpu_cap(X86_FEATURE_MPX);
- pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n");
- return 1;
-}
-__setup("nompx", x86_mpx_setup);
-
#ifdef CONFIG_X86_64
static int __init x86_nopcid_setup(char *s)
{
@@ -306,8 +290,6 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
static __init int setup_disable_smep(char *arg)
{
setup_clear_cpu_cap(X86_FEATURE_SMEP);
- /* Check for things that depend on SMEP being enabled: */
- check_mpx_erratum(&boot_cpu_data);
return 1;
}
__setup("nosmep", setup_disable_smep);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 2c0bd38a44ab12..dd64f893aa1827 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -25,7 +25,6 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
{ X86_FEATURE_AVX, X86_FEATURE_XSAVE },
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
- { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
{ X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
{ X86_FEATURE_XMM, X86_FEATURE_FXSR },
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df58a..057a8d6c82e6c7 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -31,41 +31,6 @@
#include <asm/apic.h>
#endif
-/*
- * Just in case our CPU detection goes bad, or you have a weird system,
- * allow a way to override the automatic disabling of MPX.
- */
-static int forcempx;
-
-static int __init forcempx_setup(char *__unused)
-{
- forcempx = 1;
-
- return 1;
-}
-__setup("intel-skd-046-workaround=disable", forcempx_setup);
-
-void check_mpx_erratum(struct cpuinfo_x86 *c)
-{
- if (forcempx)
- return;
- /*
- * Turn off the MPX feature on CPUs where SMEP is not
- * available or disabled.
- *
- * Works around Intel Erratum SKD046: "Branch Instructions
- * May Initialize MPX Bound Registers Incorrectly".
- *
- * This might falsely disable MPX on systems without
- * SMEP, like Atom processors without SMEP. But there
- * is no such hardware known at the moment.
- */
- if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) {
- setup_clear_cpu_cap(X86_FEATURE_MPX);
- pr_warn("x86/mpx: Disabling MPX since SMEP not present\n");
- }
-}
-
static bool ring3mwait_disabled __read_mostly;
static int __init ring3mwait_disable(char *__unused)
@@ -303,8 +268,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
}
- check_mpx_erratum(c);
-
/*
* Get the number of SMT siblings early from the extended topology
* leaf, if available. Otherwise try the legacy SMT detection.
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 87a57b7642d367..bdf06940146aa9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -27,8 +27,8 @@ static const char *xfeature_names[] =
"x87 floating point registers" ,
"SSE registers" ,
"AVX registers" ,
- "MPX bounds registers" ,
- "MPX CSR" ,
+ "MPX bounds registers (unused)" ,
+ "MPX CSR (unused)" ,
"AVX-512 opmask" ,
"AVX-512 Hi256" ,
"AVX-512 ZMM_Hi256" ,
@@ -306,8 +306,6 @@ static void __init print_xstate_features(void)
print_xstate_feature(XFEATURE_MASK_FP);
print_xstate_feature(XFEATURE_MASK_SSE);
print_xstate_feature(XFEATURE_MASK_YMM);
- print_xstate_feature(XFEATURE_MASK_BNDREGS);
- print_xstate_feature(XFEATURE_MASK_BNDCSR);
print_xstate_feature(XFEATURE_MASK_OPMASK);
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b74e7bfed6ab40..ddeacf2e6f9d7e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -924,8 +924,6 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = _brk_end;
- mpx_mm_init(&init_mm);
-
code_resource.start = __pa_symbol(_text);
code_resource.end = __pa_symbol(_etext)-1;
data_resource.start = __pa_symbol(_etext);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index f7476ce23b6e0f..ca3c11a17b5a1b 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -22,7 +22,6 @@
#include <asm/elf.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
-#include <asm/mpx.h>
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
@@ -137,10 +136,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct vm_unmapped_area_info info;
unsigned long begin, end;
- addr = mpx_unmapped_area_check(addr, len, flags);
- if (IS_ERR_VALUE(addr))
- return addr;
-
if (flags & MAP_FIXED)
return addr;
@@ -180,10 +175,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
- addr = mpx_unmapped_area_check(addr, len, flags);
- if (IS_ERR_VALUE(addr))
- return addr;
-
/* requested length too big for entire address space */
if (len > TASK_SIZE)
return -ENOMEM;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9b7c4ca8f0a735..85cccadb9a658e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -57,8 +57,6 @@
#include <asm/mach_traps.h>
#include <asm/alternative.h>
#include <asm/fpu/xstate.h>
-#include <asm/trace/mpx.h>
-#include <asm/mpx.h>
#include <asm/vm86.h>
#include <asm/umip.h>
@@ -433,8 +431,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
{
- const struct mpx_bndcsr *bndcsr;
-
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
@@ -444,76 +440,6 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
if (!user_mode(regs))
die("bounds", regs, error_code);
- if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
- /* The exception is not from Intel MPX */
- goto exit_trap;
- }
-
- /*
- * We need to look at BNDSTATUS to resolve this exception.
- * A NULL here might mean that it is in its 'init state',
- * which is all zeros which indicates MPX was not
- * responsible for the exception.
- */
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
- if (!bndcsr)
- goto exit_trap;
-
- trace_bounds_exception_mpx(bndcsr);
- /*
- * The error code field of the BNDSTATUS register communicates status
- * information of a bound range exception #BR or operation involving
- * bound directory.
- */
- switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
- case 2: /* Bound directory has invalid entry. */
- if (mpx_handle_bd_fault())
- goto exit_trap;
- break; /* Success, it was handled */
- case 1: /* Bound violation. */
- {
- struct task_struct *tsk = current;
- struct mpx_fault_info mpx;
-
- if (mpx_fault_info(&mpx, regs)) {
- /*
- * We failed to decode the MPX instruction. Act as if
- * the exception was not caused by MPX.
- */
- goto exit_trap;
- }
- /*
- * Success, we decoded the instruction and retrieved
- * an 'mpx' containing the address being accessed
- * which caused the exception. This information
- * allows and application to possibly handle the
- * #BR exception itself.
- */
- if (!do_trap_no_signal(tsk, X86_TRAP_BR, "bounds", regs,
- error_code))
- break;
-
- show_signal(tsk, SIGSEGV, "trap ", "bounds", regs, error_code);
-
- force_sig_bnderr(mpx.addr, mpx.lower, mpx.upper);
- break;
- }
- case 0: /* No exception caused by Intel MPX operations. */
- goto exit_trap;
- default:
- die("bounds", regs, error_code);
- }
-
- return;
-
-exit_trap:
- /*
- * This path out is for all the cases where we could not
- * handle the exception in some way (like allocating a
- * table or telling userspace about it. We will also end
- * up here if the kernel has MPX turned off at compile
- * time..
- */
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7bcfa61375c097..ad1f4a28ef9e3d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -48,19 +48,11 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
return ret;
}
-bool kvm_mpx_supported(void)
-{
- return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
- && kvm_x86_ops->mpx_supported());
-}
-EXPORT_SYMBOL_GPL(kvm_mpx_supported);
-
u64 kvm_supported_xcr0(void)
{
u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
- if (!kvm_mpx_supported())
- xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+ xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
return xcr0;
}
@@ -334,7 +326,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
- unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
@@ -392,7 +383,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_cpuid_7_0_ebx_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
+ F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
F(SHA_NI) | F(AVX512BW) | F(AVX512VL);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9a327d5b6d1f5b..f3955a47b4b851 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -7,7 +7,6 @@
#include <asm/processor.h>
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
-bool kvm_mpx_supported(void);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0e21ccc46792f6..6bfe471614f25c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5893,11 +5893,6 @@ static bool svm_invpcid_supported(void)
return false;
}
-static bool svm_mpx_supported(void)
-{
- return false;
-}
-
static bool svm_xsaves_supported(void)
{
return false;
@@ -7140,7 +7135,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.rdtscp_supported = svm_rdtscp_supported,
.invpcid_supported = svm_invpcid_supported,
- .mpx_supported = svm_mpx_supported,
.xsaves_supported = svm_xsaves_supported,
.umip_emulated = svm_umip_emulated,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4555077d69ce20..ba36bb106958ef 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4126,12 +4126,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported() ||
- (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
- return 1;
- msr_info->data = vmcs_read64(GUEST_BNDCFGS);
- break;
+ return 1;
case MSR_IA32_MCG_EXT_CTL:
if (!msr_info->host_initiated &&
!(vmx->msr_ia32_feature_control &
@@ -4211,15 +4206,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported() ||
- (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
- return 1;
- if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
- (data & MSR_IA32_BNDCFGS_RSVD))
- return 1;
- vmcs_write64(GUEST_BNDCFGS, data);
- break;
+ return 1;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -6761,8 +6748,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
- if (kvm_mpx_supported())
- vmcs_write64(GUEST_BNDCFGS, 0);
setup_msrs(vmx);
@@ -10945,12 +10930,6 @@ static bool vmx_has_emulated_msr(int index)
}
}
-static bool vmx_mpx_supported(void)
-{
- return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
- (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
-}
-
static bool vmx_xsaves_supported(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -11749,23 +11728,6 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
#undef cr4_fixed1_update
}
-static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (kvm_mpx_supported()) {
- bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
-
- if (mpx_enabled) {
- vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
- vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
- } else {
- vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
- vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
- }
- }
-}
-
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -11782,10 +11744,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
- if (nested_vmx_allowed(vcpu)) {
+ if (nested_vmx_allowed(vcpu))
nested_vmx_cr_fixed1_bits_update(vcpu);
- nested_vmx_entry_exit_ctls_update(vcpu);
- }
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -12807,14 +12767,6 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
set_cr4_guest_host_mask(vmx);
-
- if (kvm_mpx_supported()) {
- if (vmx->nested.nested_run_pending &&
- (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
- else
- vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
- }
}
/*
@@ -13331,9 +13283,6 @@ static int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
- if (kvm_mpx_supported() &&
- !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
- vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
@@ -13783,8 +13732,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
- if (kvm_mpx_supported())
- vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
}
/*
@@ -15068,7 +15015,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.check_intercept = vmx_check_intercept,
.handle_external_intr = vmx_handle_external_intr,
- .mpx_supported = vmx_mpx_supported,
.xsaves_supported = vmx_xsaves_supported,
.umip_emulated = vmx_umip_emulated,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5cd5647120f2b4..1eb02f1f604a68 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4834,10 +4834,6 @@ static void kvm_init_msr_list(void)
* to the guests in some cases.
*/
switch (msrs_to_save[i]) {
- case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported())
- continue;
- break;
case MSR_TSC_AUX:
if (!kvm_x86_ops->rdtscp_supported())
continue;
@@ -8694,27 +8690,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
- if (kvm_mpx_supported()) {
- void *mpx_state_buffer;
-
- /*
- * To avoid have the INIT path from kvm_apic_has_events() that be
- * called with loaded FPU and does not let userspace fix the state.
- */
- if (init_event)
- kvm_put_guest_fpu(vcpu);
- mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
- XFEATURE_MASK_BNDREGS);
- if (mpx_state_buffer)
- memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
- mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
- XFEATURE_MASK_BNDCSR);
- if (mpx_state_buffer)
- memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
- if (init_event)
- kvm_load_guest_fpu(vcpu);
- }
-
if (!init_event) {
kvm_pmu_reset(vcpu);
vcpu->arch.smbase = 0x30000;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 224cd0a4756840..ee523607d6b72f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -280,8 +280,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
- | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
- | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
+ | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU)
extern u64 host_xcr0;
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 4b101dd6e52f3f..8cc36394db07fa 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -45,7 +45,6 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
-obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 92e4c4b85bbaad..39486400839db0 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -19,7 +19,6 @@
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#include <asm/elf.h>
-#include <asm/mpx.h>
#if 0 /* This is just for testing */
struct page *
@@ -151,10 +150,6 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (len & ~huge_page_mask(h))
return -EINVAL;
- addr = mpx_unmapped_area_check(addr, len, flags);
- if (IS_ERR_VALUE(addr))
- return addr;
-
if (len > TASK_SIZE)
return -ENOMEM;
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
deleted file mode 100644
index 2385538e80656a..00000000000000
--- a/arch/x86/mm/mpx.c
+++ /dev/null
@@ -1,936 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * mpx.c - Memory Protection eXtensions
- *
- * Copyright (c) 2014, Intel Corporation.
- * Qiaowei Ren <qiaowei.ren@intel.com>
- * Dave Hansen <dave.hansen@intel.com>
- */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/mm_types.h>
-#include <linux/syscalls.h>
-#include <linux/sched/sysctl.h>
-
-#include <asm/insn.h>
-#include <asm/insn-eval.h>
-#include <asm/mman.h>
-#include <asm/mmu_context.h>
-#include <asm/mpx.h>
-#include <asm/processor.h>
-#include <asm/fpu/internal.h>
-
-#define CREATE_TRACE_POINTS
-#include <asm/trace/mpx.h>
-
-static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
-{
- if (is_64bit_mm(mm))
- return MPX_BD_SIZE_BYTES_64;
- else
- return MPX_BD_SIZE_BYTES_32;
-}
-
-static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
-{
- if (is_64bit_mm(mm))
- return MPX_BT_SIZE_BYTES_64;
- else
- return MPX_BT_SIZE_BYTES_32;
-}
-
-/*
- * This is really a simplified "vm_mmap". it only handles MPX
- * bounds tables (the bounds directory is user-allocated).
- */
-static unsigned long mpx_mmap(unsigned long len)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, populate;
-
- /* Only bounds table can be allocated here */
- if (len != mpx_bt_size_bytes(mm))
- return -EINVAL;
-
- down_write(&mm->mmap_sem);
- addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
- up_write(&mm->mmap_sem);
- if (populate)
- mm_populate(addr, populate);
-
- return addr;
-}
-
-static int mpx_insn_decode(struct insn *insn,
- struct pt_regs *regs)
-{
- unsigned char buf[MAX_INSN_SIZE];
- int x86_64 = !test_thread_flag(TIF_IA32);
- int not_copied;
- int nr_copied;
-
- not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
- nr_copied = sizeof(buf) - not_copied;
- /*
- * The decoder _should_ fail nicely if we pass it a short buffer.
- * But, let's not depend on that implementation detail. If we
- * did not get anything, just error out now.
- */
- if (!nr_copied)
- return -EFAULT;
- insn_init(insn, buf, nr_copied, x86_64);
- insn_get_length(insn);
- /*
- * copy_from_user() tries to get as many bytes as we could see in
- * the largest possible instruction. If the instruction we are
- * after is shorter than that _and_ we attempt to copy from
- * something unreadable, we might get a short read. This is OK
- * as long as the read did not stop in the middle of the
- * instruction. Check to see if we got a partial instruction.
- */
- if (nr_copied < insn->length)
- return -EFAULT;
-
- insn_get_opcode(insn);
- /*
- * We only _really_ need to decode bndcl/bndcn/bndcu
- * Error out on anything else.
- */
- if (insn->opcode.bytes[0] != 0x0f)
- goto bad_opcode;
- if ((insn->opcode.bytes[1] != 0x1a) &&
- (insn->opcode.bytes[1] != 0x1b))
- goto bad_opcode;
-
- return 0;
-bad_opcode:
- return -EINVAL;
-}
-
-/*
- * If a bounds overflow occurs then a #BR is generated. This
- * function decodes MPX instructions to get violation address
- * and set this address into extended struct siginfo.
- *
- * Note that this is not a super precise way of doing this.
- * Userspace could have, by the time we get here, written
- * anything it wants in to the instructions. We can not
- * trust anything about it. They might not be valid
- * instructions or might encode invalid registers, etc...
- */
-int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
-{
- const struct mpx_bndreg_state *bndregs;
- const struct mpx_bndreg *bndreg;
- struct insn insn;
- uint8_t bndregno;
- int err;
-
- err = mpx_insn_decode(&insn, regs);
- if (err)
- goto err_out;
-
- /*
- * We know at this point that we are only dealing with
- * MPX instructions.
- */
- insn_get_modrm(&insn);
- bndregno = X86_MODRM_REG(insn.modrm.value);
- if (bndregno > 3) {
- err = -EINVAL;
- goto err_out;
- }
- /* get bndregs field from current task's xsave area */
- bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
- if (!bndregs) {
- err = -EINVAL;
- goto err_out;
- }
- /* now go select the individual register in the set of 4 */
- bndreg = &bndregs->bndreg[bndregno];
-
- /*
- * The registers are always 64-bit, but the upper 32
- * bits are ignored in 32-bit mode. Also, note that the
- * upper bounds are architecturally represented in 1's
- * complement form.
- *
- * The 'unsigned long' cast is because the compiler
- * complains when casting from integers to different-size
- * pointers.
- */
- info->lower = (void __user *)(unsigned long)bndreg->lower_bound;
- info->upper = (void __user *)(unsigned long)~bndreg->upper_bound;
- info->addr = insn_get_addr_ref(&insn, regs);
-
- /*
- * We were not able to extract an address from the instruction,
- * probably because there was something invalid in it.
- */
- if (info->addr == (void __user *)-1) {
- err = -EINVAL;
- goto err_out;
- }
- trace_mpx_bounds_register_exception(info->addr, bndreg);
- return 0;
-err_out:
- /* info might be NULL, but kfree() handles that */
- return err;
-}
-
-static __user void *mpx_get_bounds_dir(void)
-{
- const struct mpx_bndcsr *bndcsr;
-
- if (!cpu_feature_enabled(X86_FEATURE_MPX))
- return MPX_INVALID_BOUNDS_DIR;
-
- /*
- * The bounds directory pointer is stored in a register
- * only accessible if we first do an xsave.
- */
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
- if (!bndcsr)
- return MPX_INVALID_BOUNDS_DIR;
-
- /*
- * Make sure the register looks valid by checking the
- * enable bit.
- */
- if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG))
- return MPX_INVALID_BOUNDS_DIR;
-
- /*
- * Lastly, mask off the low bits used for configuration
- * flags, and return the address of the bounds table.
- */
- return (void __user *)(unsigned long)
- (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
-}
-
-int mpx_enable_management(void)
-{
- void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
- struct mm_struct *mm = current->mm;
- int ret = 0;
-
- /*
- * runtime in the userspace will be responsible for allocation of
- * the bounds directory. Then, it will save the base of the bounds
- * directory into XSAVE/XRSTOR Save Area and enable MPX through
- * XRSTOR instruction.
- *
- * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
- * expected to be relatively expensive. Storing the bounds
- * directory here means that we do not have to do xsave in the
- * unmap path; we can just use mm->context.bd_addr instead.
- */
- bd_base = mpx_get_bounds_dir();
- down_write(&mm->mmap_sem);
-
- /* MPX doesn't support addresses above 47 bits yet. */
- if (find_vma(mm, DEFAULT_MAP_WINDOW)) {
- pr_warn_once("%s (%d): MPX cannot handle addresses "
- "above 47-bits. Disabling.",
- current->comm, current->pid);
- ret = -ENXIO;
- goto out;
- }
- mm->context.bd_addr = bd_base;
- if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR)
- ret = -ENXIO;
-out:
- up_write(&mm->mmap_sem);
- return ret;
-}
-
-int mpx_disable_management(void)
-{
- struct mm_struct *mm = current->mm;
-
- if (!cpu_feature_enabled(X86_FEATURE_MPX))
- return -ENXIO;
-
- down_write(&mm->mmap_sem);
- mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
- up_write(&mm->mmap_sem);
- return 0;
-}
-
-static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
- unsigned long *curval,
- unsigned long __user *addr,
- unsigned long old_val, unsigned long new_val)
-{
- int ret;
- /*
- * user_atomic_cmpxchg_inatomic() actually uses sizeof()
- * the pointer that we pass to it to figure out how much
- * data to cmpxchg. We have to be careful here not to
- * pass a pointer to a 64-bit data type when we only want
- * a 32-bit copy.
- */
- if (is_64bit_mm(mm)) {
- ret = user_atomic_cmpxchg_inatomic(curval,
- addr, old_val, new_val);
- } else {
- u32 uninitialized_var(curval_32);
- u32 old_val_32 = old_val;
- u32 new_val_32 = new_val;
- u32 __user *addr_32 = (u32 __user *)addr;
-
- ret = user_atomic_cmpxchg_inatomic(&curval_32,
- addr_32, old_val_32, new_val_32);
- *curval = curval_32;
- }
- return ret;
-}
-
-/*
- * With 32-bit mode, a bounds directory is 4MB, and the size of each
- * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB,
- * and the size of each bounds table is 4MB.
- */
-static int allocate_bt(struct mm_struct *mm, long __user *bd_entry)
-{
- unsigned long expected_old_val = 0;
- unsigned long actual_old_val = 0;
- unsigned long bt_addr;
- unsigned long bd_new_entry;
- int ret = 0;
-
- /*
- * Carve the virtual space out of userspace for the new
- * bounds table:
- */
- bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
- if (IS_ERR((void *)bt_addr))
- return PTR_ERR((void *)bt_addr);
- /*
- * Set the valid flag (kinda like _PAGE_PRESENT in a pte)
- */
- bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
-
- /*
- * Go poke the address of the new bounds table in to the
- * bounds directory entry out in userspace memory. Note:
- * we may race with another CPU instantiating the same table.
- * In that case the cmpxchg will see an unexpected
- * 'actual_old_val'.
- *
- * This can fault, but that's OK because we do not hold
- * mmap_sem at this point, unlike some of the other part
- * of the MPX code that have to pagefault_disable().
- */
- ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry,
- expected_old_val, bd_new_entry);
- if (ret)
- goto out_unmap;
-
- /*
- * The user_atomic_cmpxchg_inatomic() will only return nonzero
- * for faults, *not* if the cmpxchg itself fails. Now we must
- * verify that the cmpxchg itself completed successfully.
- */
- /*
- * We expected an empty 'expected_old_val', but instead found
- * an apparently valid entry. Assume we raced with another
- * thread to instantiate this table and desclare succecss.
- */
- if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) {
- ret = 0;
- goto out_unmap;
- }
- /*
- * We found a non-empty bd_entry but it did not have the
- * VALID_FLAG set. Return an error which will result in
- * a SEGV since this probably means that somebody scribbled
- * some invalid data in to a bounds table.
- */
- if (expected_old_val != actual_old_val) {
- ret = -EINVAL;
- goto out_unmap;
- }
- trace_mpx_new_bounds_table(bt_addr);
- return 0;
-out_unmap:
- vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
- return ret;
-}
-
-/*
- * When a BNDSTX instruction attempts to save bounds to a bounds
- * table, it will first attempt to look up the table in the
- * first-level bounds directory. If it does not find a table in
- * the directory, a #BR is generated and we get here in order to
- * allocate a new table.
- *
- * With 32-bit mode, the size of BD is 4MB, and the size of each
- * bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
- * and the size of each bound table is 4MB.
- */
-static int do_mpx_bt_fault(void)
-{
- unsigned long bd_entry, bd_base;
- const struct mpx_bndcsr *bndcsr;
- struct mm_struct *mm = current->mm;
-
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
- if (!bndcsr)
- return -EINVAL;
- /*
- * Mask off the preserve and enable bits
- */
- bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK;
- /*
- * The hardware provides the address of the missing or invalid
- * entry via BNDSTATUS, so we don't have to go look it up.
- */
- bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK;
- /*
- * Make sure the directory entry is within where we think
- * the directory is.
- */
- if ((bd_entry < bd_base) ||
- (bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
- return -EINVAL;
-
- return allocate_bt(mm, (long __user *)bd_entry);
-}
-
-int mpx_handle_bd_fault(void)
-{
- /*
- * Userspace never asked us to manage the bounds tables,
- * so refuse to help.
- */
- if (!kernel_managing_mpx_tables(current->mm))
- return -EINVAL;
-
- return do_mpx_bt_fault();
-}
-
-/*
- * A thin wrapper around get_user_pages(). Returns 0 if the
- * fault was resolved or -errno if not.
- */
-static int mpx_resolve_fault(long __user *addr, int write)
-{
- long gup_ret;
- int nr_pages = 1;
-
- gup_ret = get_user_pages((unsigned long)addr, nr_pages,
- write ? FOLL_WRITE : 0, NULL, NULL);
- /*
- * get_user_pages() returns number of pages gotten.
- * 0 means we failed to fault in and get anything,
- * probably because 'addr' is bad.
- */
- if (!gup_ret)
- return -EFAULT;
- /* Other error, return it */
- if (gup_ret < 0)
- return gup_ret;
- /* must have gup'd a page and gup_ret>0, success */
- return 0;
-}
-
-static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
- unsigned long bd_entry)
-{
- unsigned long bt_addr = bd_entry;
- int align_to_bytes;
- /*
- * Bit 0 in a bt_entry is always the valid bit.
- */
- bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
- /*
- * Tables are naturally aligned at 8-byte boundaries
- * on 64-bit and 4-byte boundaries on 32-bit. The
- * documentation makes it appear that the low bits
- * are ignored by the hardware, so we do the same.
- */
- if (is_64bit_mm(mm))
- align_to_bytes = 8;
- else
- align_to_bytes = 4;
- bt_addr &= ~(align_to_bytes-1);
- return bt_addr;
-}
-
-/*
- * We only want to do a 4-byte get_user() on 32-bit. Otherwise,
- * we might run off the end of the bounds table if we are on
- * a 64-bit kernel and try to get 8 bytes.
- */
-static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
- long __user *bd_entry_ptr)
-{
- u32 bd_entry_32;
- int ret;
-
- if (is_64bit_mm(mm))
- return get_user(*bd_entry_ret, bd_entry_ptr);
-
- /*
- * Note that get_user() uses the type of the *pointer* to
- * establish the size of the get, not the destination.
- */
- ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
- *bd_entry_ret = bd_entry_32;
- return ret;
-}
-
-/*
- * Get the base of bounds tables pointed by specific bounds
- * directory entry.
- */
-static int get_bt_addr(struct mm_struct *mm,
- long __user *bd_entry_ptr,
- unsigned long *bt_addr_result)
-{
- int ret;
- int valid_bit;
- unsigned long bd_entry;
- unsigned long bt_addr;
-
- if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr)))
- return -EFAULT;
-
- while (1) {
- int need_write = 0;
-
- pagefault_disable();
- ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
- pagefault_enable();
- if (!ret)
- break;
- if (ret == -EFAULT)
- ret = mpx_resolve_fault(bd_entry_ptr, need_write);
- /*
- * If we could not resolve the fault, consider it
- * userspace's fault and error out.
- */
- if (ret)
- return ret;
- }
-
- valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
- bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
-
- /*
- * When the kernel is managing bounds tables, a bounds directory
- * entry will either have a valid address (plus the valid bit)
- * *OR* be completely empty. If we see a !valid entry *and* some
- * data in the address field, we know something is wrong. This
- * -EINVAL return will cause a SIGSEGV.
- */
- if (!valid_bit && bt_addr)
- return -EINVAL;
- /*
- * Do we have an completely zeroed bt entry? That is OK. It
- * just means there was no bounds table for this memory. Make
- * sure to distinguish this from -EINVAL, which will cause
- * a SEGV.
- */
- if (!valid_bit)
- return -ENOENT;
-
- *bt_addr_result = bt_addr;
- return 0;
-}
-
-static inline int bt_entry_size_bytes(struct mm_struct *mm)
-{
- if (is_64bit_mm(mm))
- return MPX_BT_ENTRY_BYTES_64;
- else
- return MPX_BT_ENTRY_BYTES_32;
-}
-
-/*
- * Take a virtual address and turns it in to the offset in bytes
- * inside of the bounds table where the bounds table entry
- * controlling 'addr' can be found.
- */
-static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
- unsigned long addr)
-{
- unsigned long bt_table_nr_entries;
- unsigned long offset = addr;
-
- if (is_64bit_mm(mm)) {
- /* Bottom 3 bits are ignored on 64-bit */
- offset >>= 3;
- bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
- } else {
- /* Bottom 2 bits are ignored on 32-bit */
- offset >>= 2;
- bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
- }
- /*
- * We know the size of the table in to which we are
- * indexing, and we have eliminated all the low bits
- * which are ignored for indexing.
- *
- * Mask out all the high bits which we do not need
- * to index in to the table. Note that the tables
- * are always powers of two so this gives us a proper
- * mask.
- */
- offset &= (bt_table_nr_entries-1);
- /*
- * We now have an entry offset in terms of *entries* in
- * the table. We need to scale it back up to bytes.
- */
- offset *= bt_entry_size_bytes(mm);
- return offset;
-}
-
-/*
- * How much virtual address space does a single bounds
- * directory entry cover?
- *
- * Note, we need a long long because 4GB doesn't fit in
- * to a long on 32-bit.
- */
-static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
-{
- unsigned long long virt_space;
- unsigned long long GB = (1ULL << 30);
-
- /*
- * This covers 32-bit emulation as well as 32-bit kernels
- * running on 64-bit hardware.
- */
- if (!is_64bit_mm(mm))
- return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
-
- /*
- * 'x86_virt_bits' returns what the hardware is capable
- * of, and returns the full >32-bit address space when
- * running 32-bit kernels on 64-bit hardware.
- */
- virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
- return virt_space / MPX_BD_NR_ENTRIES_64;
-}
-
-/*
- * Free the backing physical pages of bounds table 'bt_addr'.
- * Assume start...end is within that bounds table.
- */
-static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
- unsigned long bt_addr,
- unsigned long start_mapping, unsigned long end_mapping)
-{
- struct vm_area_struct *vma;
- unsigned long addr, len;
- unsigned long start;
- unsigned long end;
-
- /*
- * if we 'end' on a boundary, the offset will be 0 which
- * is not what we want. Back it up a byte to get the
- * last bt entry. Then once we have the entry itself,
- * move 'end' back up by the table entry size.
- */
- start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
- end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
- /*
- * Move end back up by one entry. Among other things
- * this ensures that it remains page-aligned and does
- * not screw up zap_page_range()
- */
- end += bt_entry_size_bytes(mm);
-
- /*
- * Find the first overlapping vma. If vma->vm_start > start, there
- * will be a hole in the bounds table. This -EINVAL return will
- * cause a SIGSEGV.
- */
- vma = find_vma(mm, start);
- if (!vma || vma->vm_start > start)
- return -EINVAL;
-
- /*
- * A NUMA policy on a VM_MPX VMA could cause this bounds table to
- * be split. So we need to look across the entire 'start -> end'
- * range of this bounds table, find all of the VM_MPX VMAs, and
- * zap only those.
- */
- addr = start;
- while (vma && vma->vm_start < end) {
- /*
- * We followed a bounds directory entry down
- * here. If we find a non-MPX VMA, that's bad,
- * so stop immediately and return an error. This
- * probably results in a SIGSEGV.
- */
- if (!(vma->vm_flags & VM_MPX))
- return -EINVAL;
-
- len = min(vma->vm_end, end) - addr;
- zap_page_range(vma, addr, len);
- trace_mpx_unmap_zap(addr, addr+len);
-
- vma = vma->vm_next;
- addr = vma->vm_start;
- }
- return 0;
-}
-
-static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
- unsigned long addr)
-{
- /*
- * There are several ways to derive the bd offsets. We
- * use the following approach here:
- * 1. We know the size of the virtual address space
- * 2. We know the number of entries in a bounds table
- * 3. We know that each entry covers a fixed amount of
- * virtual address space.
- * So, we can just divide the virtual address by the
- * virtual space used by one entry to determine which
- * entry "controls" the given virtual address.
- */
- if (is_64bit_mm(mm)) {
- int bd_entry_size = 8; /* 64-bit pointer */
- /*
- * Take the 64-bit addressing hole in to account.
- */
- addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
- return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
- } else {
- int bd_entry_size = 4; /* 32-bit pointer */
- /*
- * 32-bit has no hole so this case needs no mask
- */
- return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
- }
- /*
- * The two return calls above are exact copies. If we
- * pull out a single copy and put it in here, gcc won't
- * realize that we're doing a power-of-2 divide and use
- * shifts. It uses a real divide. If we put them up
- * there, it manages to figure it out (gcc 4.8.3).
- */
-}
-
-static int unmap_entire_bt(struct mm_struct *mm,
- long __user *bd_entry, unsigned long bt_addr)
-{
- unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
- unsigned long uninitialized_var(actual_old_val);
- int ret;
-
- while (1) {
- int need_write = 1;
- unsigned long cleared_bd_entry = 0;
-
- pagefault_disable();
- ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
- bd_entry, expected_old_val, cleared_bd_entry);
- pagefault_enable();
- if (!ret)
- break;
- if (ret == -EFAULT)
- ret = mpx_resolve_fault(bd_entry, need_write);
- /*
- * If we could not resolve the fault, consider it
- * userspace's fault and error out.
- */
- if (ret)
- return ret;
- }
- /*
- * The cmpxchg was performed, check the results.
- */
- if (actual_old_val != expected_old_val) {
- /*
- * Someone else raced with us to unmap the table.
- * That is OK, since we were both trying to do
- * the same thing. Declare success.
- */
- if (!actual_old_val)
- return 0;
- /*
- * Something messed with the bounds directory
- * entry. We hold mmap_sem for read or write
- * here, so it could not be a _new_ bounds table
- * that someone just allocated. Something is
- * wrong, so pass up the error and SIGSEGV.
- */
- return -EINVAL;
- }
- /*
- * Note, we are likely being called under do_munmap() already. To
- * avoid recursion, do_munmap() will check whether it comes
- * from one bounds table through VM_MPX flag.
- */
- return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
-}
-
-static int try_unmap_single_bt(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- struct vm_area_struct *next;
- struct vm_area_struct *prev;
- /*
- * "bta" == Bounds Table Area: the area controlled by the
- * bounds table that we are unmapping.
- */
- unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
- unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
- unsigned long uninitialized_var(bt_addr);
- void __user *bde_vaddr;
- int ret;
- /*
- * We already unlinked the VMAs from the mm's rbtree so 'start'
- * is guaranteed to be in a hole. This gets us the first VMA
- * before the hole in to 'prev' and the next VMA after the hole
- * in to 'next'.
- */
- next = find_vma_prev(mm, start, &prev);
- /*
- * Do not count other MPX bounds table VMAs as neighbors.
- * Although theoretically possible, we do not allow bounds
- * tables for bounds tables so our heads do not explode.
- * If we count them as neighbors here, we may end up with
- * lots of tables even though we have no actual table
- * entries in use.
- */
- while (next && (next->vm_flags & VM_MPX))
- next = next->vm_next;
- while (prev && (prev->vm_flags & VM_MPX))
- prev = prev->vm_prev;
- /*
- * We know 'start' and 'end' lie within an area controlled
- * by a single bounds table. See if there are any other
- * VMAs controlled by that bounds table. If there are not
- * then we can "expand" the are we are unmapping to possibly
- * cover the entire table.
- */
- next = find_vma_prev(mm, start, &prev);
- if ((!prev || prev->vm_end <= bta_start_vaddr) &&
- (!next || next->vm_start >= bta_end_vaddr)) {
- /*
- * No neighbor VMAs controlled by same bounds
- * table. Try to unmap the whole thing
- */
- start = bta_start_vaddr;
- end = bta_end_vaddr;
- }
-
- bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start);
- ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
- /*
- * No bounds table there, so nothing to unmap.
- */
- if (ret == -ENOENT) {
- ret = 0;
- return 0;
- }
- if (ret)
- return ret;
- /*
- * We are unmapping an entire table. Either because the
- * unmap that started this whole process was large enough
- * to cover an entire table, or that the unmap was small
- * but was the area covered by a bounds table.
- */
- if ((start == bta_start_vaddr) &&
- (end == bta_end_vaddr))
- return unmap_entire_bt(mm, bde_vaddr, bt_addr);
- return zap_bt_entries_mapping(mm, bt_addr, start, end);
-}
-
-static int mpx_unmap_tables(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- unsigned long one_unmap_start;
- trace_mpx_unmap_search(start, end);
-
- one_unmap_start = start;
- while (one_unmap_start < end) {
- int ret;
- unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
- bd_entry_virt_space(mm));
- unsigned long one_unmap_end = end;
- /*
- * if the end is beyond the current bounds table,
- * move it back so we only deal with a single one
- * at a time
- */
- if (one_unmap_end > next_unmap_start)
- one_unmap_end = next_unmap_start;
- ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
- if (ret)
- return ret;
-
- one_unmap_start = next_unmap_start;
- }
- return 0;
-}
-
-/*
- * Free unused bounds tables covered in a virtual address region being
- * munmap()ed. Assume end > start.
- *
- * This function will be called by do_munmap(), and the VMAs covering
- * the virtual address region start...end have already been split if
- * necessary, and the 'vma' is the first vma in this range (start -> end).
- */
-void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- int ret;
-
- /*
- * Refuse to do anything unless userspace has asked
- * the kernel to help manage the bounds tables,
- */
- if (!kernel_managing_mpx_tables(current->mm))
- return;
- /*
- * This will look across the entire 'start -> end' range,
- * and find all of the non-VM_MPX VMAs.
- *
- * To avoid recursion, if a VM_MPX vma is found in the range
- * (start->end), we will not continue follow-up work. This
- * recursion represents having bounds tables for bounds tables,
- * which should not occur normally. Being strict about it here
- * helps ensure that we do not have an exploitable stack overflow.
- */
- do {
- if (vma->vm_flags & VM_MPX)
- return;
- vma = vma->vm_next;
- } while (vma && vma->vm_start < end);
-
- ret = mpx_unmap_tables(mm, start, end);
- if (ret)
- force_sig(SIGSEGV, current);
-}
-
-/* MPX cannot handle addresses above 47 bits yet. */
-unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
- unsigned long flags)
-{
- if (!kernel_managing_mpx_tables(current->mm))
- return addr;
- if (addr + len <= DEFAULT_MAP_WINDOW)
- return addr;
- if (flags & MAP_FIXED)
- return -ENOMEM;
-
- /*
- * Requested len is larger than the whole area we're allowed to map in.
- * Resetting hinting address wouldn't do much good -- fail early.
- */
- if (len > DEFAULT_MAP_WINDOW)
- return -ENOMEM;
-
- /* Look for unmap area within DEFAULT_MAP_WINDOW */
- return 0;
-}
diff --git a/fs/exec.c b/fs/exec.c
index acc3a5536384cf..1985bd07b1bc49 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -318,7 +318,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
goto err;
mm->stack_vm = mm->total_vm = 1;
- arch_bprm_mm_init(mm, vma);
up_write(&mm->mmap_sem);
bprm->p = vma->vm_end - sizeof(void *);
return 0;
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index 8ac4e68a12f08e..40038d04070c66 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -17,17 +17,6 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
{
}
-static inline void arch_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
-}
-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index c0d7ea0bf5b624..ef1f1d88ce8fd7 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -181,7 +181,7 @@ struct prctl_mm_map {
#define PR_GET_THP_DISABLE 42
/*
- * Tell the kernel to start/stop helping userspace manage bounds tables.
+ * No longer implemented, but left here to ensure the numbers stay reserved:
*/
#define PR_MPX_ENABLE_MANAGEMENT 43
#define PR_MPX_DISABLE_MANAGEMENT 44
diff --git a/kernel/sys.c b/kernel/sys.c
index 123bd73046ec6b..9ea99d1a83b80e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -103,9 +103,6 @@
#ifndef SET_TSC_CTL
# define SET_TSC_CTL(a) (-EINVAL)
#endif
-#ifndef MPX_ENABLE_MANAGEMENT
-# define MPX_ENABLE_MANAGEMENT() (-EINVAL)
-#endif
#ifndef MPX_DISABLE_MANAGEMENT
# define MPX_DISABLE_MANAGEMENT() (-EINVAL)
#endif
@@ -2445,15 +2442,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
up_write(&me->mm->mmap_sem);
break;
case PR_MPX_ENABLE_MANAGEMENT:
- if (arg2 || arg3 || arg4 || arg5)
- return -EINVAL;
- error = MPX_ENABLE_MANAGEMENT();
- break;
case PR_MPX_DISABLE_MANAGEMENT:
- if (arg2 || arg3 || arg4 || arg5)
- return -EINVAL;
- error = MPX_DISABLE_MANAGEMENT();
- break;
+ /* No longer implemented: */
+ return -EINVAL;
case PR_SET_FP_MODE:
error = SET_FP_MODE(me, arg2);
break;
diff --git a/mm/mmap.c b/mm/mmap.c
index 6c04292e16a70c..365be3f1788033 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2802,12 +2802,6 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
/* Detach vmas from rbtree */
detach_vmas_to_be_unmapped(mm, vma, prev, end);
- /*
- * mpx unmap needs to be called with mmap_sem held for write.
- * It is safe to call it before unmap_region().
- */
- arch_unmap(mm, vma, start, end);
-
if (downgrade)
downgrade_write(&mm->mmap_sem);
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
deleted file mode 100644
index 7546eba7f17a0d..00000000000000
--- a/tools/testing/selftests/x86/mpx-debug.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_DEBUG_H
-#define _MPX_DEBUG_H
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-#define dprintf5(args...) dprintf_level(5, args)
-
-#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
deleted file mode 100644
index c13607ef5c11e0..00000000000000
--- a/tools/testing/selftests/x86/mpx-dig.c
+++ /dev/null
@@ -1,499 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Written by Dave Hansen <dave.hansen@intel.com>
- */
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <fcntl.h>
-#include "mpx-debug.h"
-#include "mpx-mm.h"
-#include "mpx-hw.h"
-
-unsigned long bounds_dir_global;
-
-#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__)
-static void inline __mpx_dig_abort(const char *file, const char *func, int line)
-{
- fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
- printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
- abort();
-}
-
-/*
- * run like this (BDIR finds the probably bounds directory):
- *
- * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
- * | head -1 | awk -F- '{print $1}')";
- * ./mpx-dig $pid 0x$BDIR
- *
- * NOTE:
- * assumes that the only 2097152-kb VMA is the bounds dir
- */
-
-long nr_incore(void *ptr, unsigned long size_bytes)
-{
- int i;
- long ret = 0;
- long vec_len = size_bytes / PAGE_SIZE;
- unsigned char *vec = malloc(vec_len);
- int incore_ret;
-
- if (!vec)
- mpx_dig_abort();
-
- incore_ret = mincore(ptr, size_bytes, vec);
- if (incore_ret) {
- printf("mincore ret: %d\n", incore_ret);
- perror("mincore");
- mpx_dig_abort();
- }
- for (i = 0; i < vec_len; i++)
- ret += vec[i];
- free(vec);
- return ret;
-}
-
-int open_proc(int pid, char *file)
-{
- static char buf[100];
- int fd;
-
- snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
- fd = open(&buf[0], O_RDONLY);
- if (fd < 0)
- perror(buf);
-
- return fd;
-}
-
-struct vaddr_range {
- unsigned long start;
- unsigned long end;
-};
-struct vaddr_range *ranges;
-int nr_ranges_allocated;
-int nr_ranges_populated;
-int last_range = -1;
-
-int __pid_load_vaddrs(int pid)
-{
- int ret = 0;
- int proc_maps_fd = open_proc(pid, "maps");
- char linebuf[10000];
- unsigned long start;
- unsigned long end;
- char rest[1000];
- FILE *f = fdopen(proc_maps_fd, "r");
-
- if (!f)
- mpx_dig_abort();
- nr_ranges_populated = 0;
- while (!feof(f)) {
- char *readret = fgets(linebuf, sizeof(linebuf), f);
- int parsed;
-
- if (readret == NULL) {
- if (feof(f))
- break;
- mpx_dig_abort();
- }
-
- parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
- if (parsed != 3)
- mpx_dig_abort();
-
- dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
- if (nr_ranges_populated >= nr_ranges_allocated) {
- ret = -E2BIG;
- break;
- }
- ranges[nr_ranges_populated].start = start;
- ranges[nr_ranges_populated].end = end;
- nr_ranges_populated++;
- }
- last_range = -1;
- fclose(f);
- close(proc_maps_fd);
- return ret;
-}
-
-int pid_load_vaddrs(int pid)
-{
- int ret;
-
- dprintf2("%s(%d)\n", __func__, pid);
- if (!ranges) {
- nr_ranges_allocated = 4;
- ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
- dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
- nr_ranges_allocated, ranges);
- assert(ranges != NULL);
- }
- do {
- ret = __pid_load_vaddrs(pid);
- if (!ret)
- break;
- if (ret == -E2BIG) {
- dprintf2("%s(%d) need to realloc\n", __func__, pid);
- nr_ranges_allocated *= 2;
- ranges = realloc(ranges,
- nr_ranges_allocated * sizeof(ranges[0]));
- dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
- pid, nr_ranges_allocated, ranges);
- assert(ranges != NULL);
- dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
- }
- } while (1);
-
- dprintf2("%s(%d) done\n", __func__, pid);
-
- return ret;
-}
-
-static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
-{
- if (vaddr < r->start)
- return 0;
- if (vaddr >= r->end)
- return 0;
- return 1;
-}
-
-static inline int vaddr_mapped_by_range(unsigned long vaddr)
-{
- int i;
-
- if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
- return 1;
-
- for (i = 0; i < nr_ranges_populated; i++) {
- struct vaddr_range *r = &ranges[i];
-
- if (vaddr_in_range(vaddr, r))
- continue;
- last_range = i;
- return 1;
- }
- return 0;
-}
-
-const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
-
-void *read_bounds_table_into_buf(unsigned long table_vaddr)
-{
-#ifdef MPX_DIG_STANDALONE
- static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
- off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
- if (seek_ret != table_vaddr)
- mpx_dig_abort();
-
- int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
- if (read_ret != sizeof(bt_buf))
- mpx_dig_abort();
- return &bt_buf;
-#else
- return (void *)table_vaddr;
-#endif
-}
-
-int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
- unsigned long bde_vaddr)
-{
- unsigned long offset_inside_bt;
- int nr_entries = 0;
- int do_abort = 0;
- char *bt_buf;
-
- dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
- __func__, base_controlled_vaddr, bde_vaddr);
-
- bt_buf = read_bounds_table_into_buf(table_vaddr);
-
- dprintf4("%s() read done\n", __func__);
-
- for (offset_inside_bt = 0;
- offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
- offset_inside_bt += bt_entry_size_bytes) {
- unsigned long bt_entry_index;
- unsigned long bt_entry_controls;
- unsigned long this_bt_entry_for_vaddr;
- unsigned long *bt_entry_buf;
- int i;
-
- dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
- offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
- bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
- if (!bt_buf) {
- printf("null bt_buf\n");
- mpx_dig_abort();
- }
- if (!bt_entry_buf) {
- printf("null bt_entry_buf\n");
- mpx_dig_abort();
- }
- dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
- bt_entry_buf);
- if (!bt_entry_buf[0] &&
- !bt_entry_buf[1] &&
- !bt_entry_buf[2] &&
- !bt_entry_buf[3])
- continue;
-
- nr_entries++;
-
- bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
- bt_entry_controls = sizeof(void *);
- this_bt_entry_for_vaddr =
- base_controlled_vaddr + bt_entry_index*bt_entry_controls;
- /*
- * We sign extend vaddr bits 48->63 which effectively
- * creates a hole in the virtual address space.
- * This calculation corrects for the hole.
- */
- if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
- this_bt_entry_for_vaddr |= 0xffff800000000000;
-
- if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
- printf("bt_entry_buf: %p\n", bt_entry_buf);
- printf("there is a bte for %lx but no mapping\n",
- this_bt_entry_for_vaddr);
- printf(" bde vaddr: %016lx\n", bde_vaddr);
- printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
- printf(" table_vaddr: %016lx\n", table_vaddr);
- printf(" entry vaddr: %016lx @ offset %lx\n",
- table_vaddr + offset_inside_bt, offset_inside_bt);
- do_abort = 1;
- mpx_dig_abort();
- }
- if (DEBUG_LEVEL < 4)
- continue;
-
- printf("table entry[%lx]: ", offset_inside_bt);
- for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
- printf("0x%016lx ", bt_entry_buf[i]);
- printf("\n");
- }
- if (do_abort)
- mpx_dig_abort();
- dprintf4("%s() done\n", __func__);
- return nr_entries;
-}
-
-int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
- int *nr_populated_bdes)
-{
- unsigned long i;
- int total_entries = 0;
-
- dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
- len_bytes, bd_offset_bytes, buf + len_bytes);
-
- for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
- unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
- unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
- unsigned long bounds_dir_entry;
- unsigned long bd_for_vaddr;
- unsigned long bt_start;
- unsigned long bt_tail;
- int nr_entries;
-
- dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
- bounds_dir_entry_ptr);
-
- bounds_dir_entry = *bounds_dir_entry_ptr;
- if (!bounds_dir_entry) {
- dprintf4("no bounds dir at index 0x%lx / 0x%lx "
- "start at offset:%lx %lx\n", bd_index, bd_index,
- bd_offset_bytes, i);
- continue;
- }
- dprintf3("found bounds_dir_entry: 0x%lx @ "
- "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
- &buf[i]);
- /* mask off the enable bit: */
- bounds_dir_entry &= ~0x1;
- (*nr_populated_bdes)++;
- dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
- dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
-
- bt_start = bounds_dir_entry;
- bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
- if (!vaddr_mapped_by_range(bt_start)) {
- printf("bounds directory 0x%lx points to nowhere\n",
- bounds_dir_entry);
- mpx_dig_abort();
- }
- if (!vaddr_mapped_by_range(bt_tail)) {
- printf("bounds directory end 0x%lx points to nowhere\n",
- bt_tail);
- mpx_dig_abort();
- }
- /*
- * Each bounds directory entry controls 1MB of virtual address
- * space. This variable is the virtual address in the process
- * of the beginning of the area controlled by this bounds_dir.
- */
- bd_for_vaddr = bd_index * (1UL<<20);
-
- nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
- bounds_dir_global+bd_offset_bytes+i);
- total_entries += nr_entries;
- dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
- "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
- bd_index, buf+i,
- bounds_dir_entry, nr_entries, total_entries,
- bd_for_vaddr, bd_for_vaddr + (1UL<<20));
- }
- dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
- bd_offset_bytes);
- return total_entries;
-}
-
-int proc_pid_mem_fd = -1;
-
-void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
- long buffer_size_bytes, void *buffer)
-{
- unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
- int read_ret;
- off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
-
- if (seek_ret != seekto)
- mpx_dig_abort();
-
- read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
- /* there shouldn't practically be short reads of /proc/$pid/mem */
- if (read_ret != buffer_size_bytes)
- mpx_dig_abort();
-
- return buffer;
-}
-void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
- long buffer_size_bytes, void *buffer)
-
-{
- unsigned char vec[buffer_size_bytes / PAGE_SIZE];
- char *dig_bounds_dir_ptr =
- (void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
- /*
- * use mincore() to quickly find the areas of the bounds directory
- * that have memory and thus will be worth scanning.
- */
- int incore_ret;
-
- int incore = 0;
- int i;
-
- dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
-
- incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
- if (incore_ret) {
- printf("mincore ret: %d\n", incore_ret);
- perror("mincore");
- mpx_dig_abort();
- }
- for (i = 0; i < sizeof(vec); i++)
- incore += vec[i];
- dprintf4("%s() total incore: %d\n", __func__, incore);
- if (!incore)
- return NULL;
- dprintf3("%s() total incore: %d\n", __func__, incore);
- return dig_bounds_dir_ptr;
-}
-
-int inspect_pid(int pid)
-{
- static int dig_nr;
- long offset_inside_bounds_dir;
- char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
- char *dig_bounds_dir_ptr;
- int total_entries = 0;
- int nr_populated_bdes = 0;
- int inspect_self;
-
- if (getpid() == pid) {
- dprintf4("inspecting self\n");
- inspect_self = 1;
- } else {
- dprintf4("inspecting pid %d\n", pid);
- mpx_dig_abort();
- }
-
- for (offset_inside_bounds_dir = 0;
- offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
- offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
- static int bufs_skipped;
- int this_entries;
-
- if (inspect_self) {
- dig_bounds_dir_ptr =
- fill_bounds_dir_buf_self(offset_inside_bounds_dir,
- sizeof(bounds_dir_buf),
- &bounds_dir_buf[0]);
- } else {
- dig_bounds_dir_ptr =
- fill_bounds_dir_buf_other(offset_inside_bounds_dir,
- sizeof(bounds_dir_buf),
- &bounds_dir_buf[0]);
- }
- if (!dig_bounds_dir_ptr) {
- bufs_skipped++;
- continue;
- }
- this_entries = search_bd_buf(dig_bounds_dir_ptr,
- sizeof(bounds_dir_buf),
- offset_inside_bounds_dir,
- &nr_populated_bdes);
- total_entries += this_entries;
- }
- printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
- total_entries, nr_populated_bdes);
- return total_entries + nr_populated_bdes;
-}
-
-#ifdef MPX_DIG_REMOTE
-int main(int argc, char **argv)
-{
- int err;
- char *c;
- unsigned long bounds_dir_entry;
- int pid;
-
- printf("mpx-dig starting...\n");
- err = sscanf(argv[1], "%d", &pid);
- printf("parsing: '%s', err: %d\n", argv[1], err);
- if (err != 1)
- mpx_dig_abort();
-
- err = sscanf(argv[2], "%lx", &bounds_dir_global);
- printf("parsing: '%s': %d\n", argv[2], err);
- if (err != 1)
- mpx_dig_abort();
-
- proc_pid_mem_fd = open_proc(pid, "mem");
- if (proc_pid_mem_fd < 0)
- mpx_dig_abort();
-
- inspect_pid(pid);
- return 0;
-}
-#endif
-
-long inspect_me(struct mpx_bounds_dir *bounds_dir)
-{
- int pid = getpid();
-
- pid_load_vaddrs(pid);
- bounds_dir_global = (unsigned long)bounds_dir;
- dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
- return inspect_pid(pid);
-}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
deleted file mode 100644
index d1b61ab870f8d9..00000000000000
--- a/tools/testing/selftests/x86/mpx-hw.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_HW_H
-#define _MPX_HW_H
-
-#include <assert.h>
-
-/* Describe the MPX Hardware Layout in here */
-
-#define NR_MPX_BOUNDS_REGISTERS 4
-
-#ifdef __i386__
-
-#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */
-#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */
-#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4
-#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */
-
-#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2
-#define MPX_BOUNDS_TABLE_TOP_BIT 11
-#define MPX_BOUNDS_DIR_BOTTOM_BIT 12
-#define MPX_BOUNDS_DIR_TOP_BIT 31
-
-#else
-
-/*
- * Linear Address of "pointer" (LAp)
- * 0 -> 2: ignored
- * 3 -> 19: index in to bounds table
- * 20 -> 47: index in to bounds directory
- * 48 -> 63: ignored
- */
-
-#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32
-#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */
-#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8
-#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */
-
-#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3
-#define MPX_BOUNDS_TABLE_TOP_BIT 19
-#define MPX_BOUNDS_DIR_BOTTOM_BIT 20
-#define MPX_BOUNDS_DIR_TOP_BIT 47
-
-#endif
-
-#define MPX_BOUNDS_DIR_NR_ENTRIES \
- (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
-#define MPX_BOUNDS_TABLE_NR_ENTRIES \
- (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
-
-#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1
-
-struct mpx_bd_entry {
- union {
- char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
- void *contents[0];
- };
-} __attribute__((packed));
-
-struct mpx_bt_entry {
- union {
- char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
- unsigned long contents[0];
- };
-} __attribute__((packed));
-
-struct mpx_bounds_dir {
- struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
-} __attribute__((packed));
-
-struct mpx_bounds_table {
- struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
-} __attribute__((packed));
-
-static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
-{
- int total_nr_bits = topbit - bottombit;
- unsigned long mask = (1UL << total_nr_bits)-1;
- return (val >> bottombit) & mask;
-}
-
-static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
-{
- return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
- MPX_BOUNDS_TABLE_TOP_BIT);
-}
-
-static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
-{
- return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
- MPX_BOUNDS_DIR_TOP_BIT);
-}
-
-static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
- struct mpx_bounds_dir *bounds_dir)
-{
- unsigned long index = __vaddr_bounds_directory_index(vaddr);
- return &bounds_dir->entries[index];
-}
-
-static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
-{
- unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
- return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
-}
-
-static inline struct mpx_bounds_table *
-__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
-{
- unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
- assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
- __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
- return (struct mpx_bounds_table *)__bd_entry;
-}
-
-static inline struct mpx_bt_entry *
-mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
-{
- struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
- struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
- unsigned long index = __vaddr_bounds_table_index(vaddr);
- return &bt->entries[index];
-}
-
-#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
deleted file mode 100644
index 50f7e92724813a..00000000000000
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ /dev/null
@@ -1,1616 +0,0 @@
-/*
- * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
- *
- * Written by:
- * "Ren, Qiaowei" <qiaowei.ren@intel.com>
- * "Wei, Gang" <gang.wei@intel.com>
- * "Hansen, Dave" <dave.hansen@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2.
- */
-
-/*
- * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
- * it works on 32-bit.
- */
-
-int inspect_every_this_many_mallocs = 100;
-int zap_all_every_this_many_mallocs = 1000;
-
-#define _GNU_SOURCE
-#define _LARGEFILE64_SOURCE
-
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include "mpx-hw.h"
-#include "mpx-debug.h"
-#include "mpx-mm.h"
-
-#ifndef __always_inline
-#define __always_inline inline __attribute__((always_inline)
-#endif
-
-#ifndef TEST_DURATION_SECS
-#define TEST_DURATION_SECS 3
-#endif
-
-void write_int_to(char *prefix, char *file, int int_to_write)
-{
- char buf[100];
- int fd = open(file, O_RDWR);
- int len;
- int ret;
-
- assert(fd >= 0);
- len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
- assert(len >= 0);
- assert(len < sizeof(buf));
- ret = write(fd, buf, len);
- assert(ret == len);
- ret = close(fd);
- assert(!ret);
-}
-
-void write_pid_to(char *prefix, char *file)
-{
- write_int_to(prefix, file, getpid());
-}
-
-void trace_me(void)
-{
-/* tracing events dir */
-#define TED "/sys/kernel/debug/tracing/events/"
-/*
- write_pid_to("common_pid=", TED "signal/filter");
- write_pid_to("common_pid=", TED "exceptions/filter");
- write_int_to("", TED "signal/enable", 1);
- write_int_to("", TED "exceptions/enable", 1);
-*/
- write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
- write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
-}
-
-#define test_failed() __test_failed(__FILE__, __LINE__)
-static void __test_failed(char *f, int l)
-{
- fprintf(stderr, "abort @ %s::%d\n", f, l);
- abort();
-}
-
-/* Error Printf */
-#define eprintf(args...) fprintf(stderr, args)
-
-#ifdef __i386__
-
-/* i386 directory size is 4MB */
-#define REG_IP_IDX REG_EIP
-#define REX_PREFIX
-
-#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate)
-
-/*
- * __cpuid() is from the Linux Kernel:
- */
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "push %%ebx;"
- "cpuid;"
- "mov %%ebx, %1;"
- "pop %%ebx"
- : "=a" (*eax),
- "=g" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-#else /* __i386__ */
-
-#define REG_IP_IDX REG_RIP
-#define REX_PREFIX "0x48, "
-
-#define XSAVE_OFFSET_IN_FPMEM 0
-
-/*
- * __cpuid() is from the Linux Kernel:
- */
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-#endif /* !__i386__ */
-
-struct xsave_hdr_struct {
- uint64_t xstate_bv;
- uint64_t reserved1[2];
- uint64_t reserved2[5];
-} __attribute__((packed));
-
-struct bndregs_struct {
- uint64_t bndregs[8];
-} __attribute__((packed));
-
-struct bndcsr_struct {
- uint64_t cfg_reg_u;
- uint64_t status_reg;
-} __attribute__((packed));
-
-struct xsave_struct {
- uint8_t fpu_sse[512];
- struct xsave_hdr_struct xsave_hdr;
- uint8_t ymm[256];
- uint8_t lwp[128];
- struct bndregs_struct bndregs;
- struct bndcsr_struct bndcsr;
-} __attribute__((packed));
-
-uint8_t __attribute__((__aligned__(64))) buffer[4096];
-struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
-
-uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
-struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
-
-uint64_t num_bnd_chk;
-
-static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
-{
- uint32_t lmask = mask;
- uint32_t hmask = mask >> 32;
-
- asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-}
-
-static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
-{
- uint32_t lmask = mask;
- uint32_t hmask = mask >> 32;
- unsigned char *fx = _fx;
-
- asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-}
-
-static inline uint64_t xgetbv(uint32_t index)
-{
- uint32_t eax, edx;
-
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((uint64_t)edx << 32);
-}
-
-static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
-{
- memset(buffer, 0, sizeof(buffer));
- memcpy(buffer,
- (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
- sizeof(struct xsave_struct));
-
- return xsave_buf->bndcsr.status_reg;
-}
-
-#include <pthread.h>
-
-static uint8_t *get_next_inst_ip(uint8_t *addr)
-{
- uint8_t *ip = addr;
- uint8_t sib;
- uint8_t rm;
- uint8_t mod;
- uint8_t base;
- uint8_t modrm;
-
- /* determine the prefix. */
- switch(*ip) {
- case 0xf2:
- case 0xf3:
- case 0x66:
- ip++;
- break;
- }
-
- /* look for rex prefix */
- if ((*ip & 0x40) == 0x40)
- ip++;
-
- /* Make sure we have a MPX instruction. */
- if (*ip++ != 0x0f)
- return addr;
-
- /* Skip the op code byte. */
- ip++;
-
- /* Get the modrm byte. */
- modrm = *ip++;
-
- /* Break it down into parts. */
- rm = modrm & 7;
- mod = (modrm >> 6);
-
- /* Init the parts of the address mode. */
- base = 8;
-
- /* Is it a mem mode? */
- if (mod != 3) {
- /* look for scaled indexed addressing */
- if (rm == 4) {
- /* SIB addressing */
- sib = *ip++;
- base = sib & 7;
- switch (mod) {
- case 0:
- if (base == 5)
- ip += 4;
- break;
-
- case 1:
- ip++;
- break;
-
- case 2:
- ip += 4;
- break;
- }
-
- } else {
- /* MODRM addressing */
- switch (mod) {
- case 0:
- /* DISP32 addressing, no base */
- if (rm == 5)
- ip += 4;
- break;
-
- case 1:
- ip++;
- break;
-
- case 2:
- ip += 4;
- break;
- }
- }
- }
- return ip;
-}
-
-#ifdef si_lower
-static inline void *__si_bounds_lower(siginfo_t *si)
-{
- return si->si_lower;
-}
-
-static inline void *__si_bounds_upper(siginfo_t *si)
-{
- return si->si_upper;
-}
-#else
-
-/*
- * This deals with old version of _sigfault in some distros:
- *
-
-old _sigfault:
- struct {
- void *si_addr;
- } _sigfault;
-
-new _sigfault:
- struct {
- void __user *_addr;
- int _trapno;
- short _addr_lsb;
- union {
- struct {
- void __user *_lower;
- void __user *_upper;
- } _addr_bnd;
- __u32 _pkey;
- };
- } _sigfault;
- *
- */
-
-static inline void **__si_bounds_hack(siginfo_t *si)
-{
- void *sigfault = &si->_sifields._sigfault;
- void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
- int *trapno = (int*)end_sigfault;
- /* skip _trapno and _addr_lsb */
- void **__si_lower = (void**)(trapno + 2);
-
- return __si_lower;
-}
-
-static inline void *__si_bounds_lower(siginfo_t *si)
-{
- return *__si_bounds_hack(si);
-}
-
-static inline void *__si_bounds_upper(siginfo_t *si)
-{
- return *(__si_bounds_hack(si) + 1);
-}
-#endif
-
-static int br_count;
-static int expected_bnd_index = -1;
-uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
-unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
-
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-/*
- * The kernel is supposed to provide some information about the bounds
- * exception in the siginfo. It should match what we have in the bounds
- * registers that we are checking against. Just check against the shadow copy
- * since it is easily available, and we also check that *it* matches the real
- * registers.
- */
-void check_siginfo_vs_shadow(siginfo_t* si)
-{
- int siginfo_ok = 1;
- void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
- void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
-
- if ((expected_bnd_index < 0) ||
- (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
- fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
- expected_bnd_index);
- exit(6);
- }
- if (__si_bounds_lower(si) != shadow_lower)
- siginfo_ok = 0;
- if (__si_bounds_upper(si) != shadow_upper)
- siginfo_ok = 0;
-
- if (!siginfo_ok) {
- fprintf(stderr, "ERROR: siginfo bounds do not match "
- "shadow bounds for register %d\n", expected_bnd_index);
- exit(7);
- }
-}
-
-void handler(int signum, siginfo_t *si, void *vucontext)
-{
- int i;
- ucontext_t *uctxt = vucontext;
- int trapno;
- unsigned long ip;
-
- dprintf1("entered signal handler\n");
-
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
-
- if (trapno == 5) {
- typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
- uint64_t status = read_mpx_status_sig(uctxt);
- uint64_t br_reason = status & 0x3;
-
- br_count++;
- dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
-
- dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
- status, ip, br_reason);
- dprintf2("si_signo: %d\n", si->si_signo);
- dprintf2(" signum: %d\n", signum);
- dprintf2("info->si_code == SEGV_BNDERR: %d\n",
- (si->si_code == SEGV_BNDERR));
- dprintf2("info->si_code: %d\n", si->si_code);
- dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
- dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
-
- for (i = 0; i < 8; i++)
- dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
- switch (br_reason) {
- case 0: /* traditional BR */
- fprintf(stderr,
- "Undefined status with bound exception:%jx\n",
- status);
- exit(5);
- case 1: /* #BR MPX bounds exception */
- /* these are normal and we expect to see them */
-
- check_siginfo_vs_shadow(si);
-
- dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
- status, (void *)ip, si->si_addr);
- num_bnd_chk++;
- uctxt->uc_mcontext.gregs[REG_IP_IDX] =
- (greg_t)get_next_inst_ip((uint8_t *)ip);
- break;
- case 2:
- fprintf(stderr, "#BR status == 2, missing bounds table,"
- "kernel should have handled!!\n");
- exit(4);
- break;
- default:
- fprintf(stderr, "bound check error: status 0x%jx at %p\n",
- status, (void *)ip);
- num_bnd_chk++;
- uctxt->uc_mcontext.gregs[REG_IP_IDX] =
- (greg_t)get_next_inst_ip((uint8_t *)ip);
- fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
- exit(3);
- }
- } else if (trapno == 14) {
- eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
- trapno, ip);
- eprintf("si_addr %p\n", si->si_addr);
- eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
- test_failed();
- } else {
- eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
- eprintf("si_addr %p\n", si->si_addr);
- eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
- test_failed();
- }
-}
-
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-#define XSTATE_CPUID 0x0000000d
-
-/*
- * List of XSAVE features Linux knows about:
- */
-enum xfeature_bit {
- XSTATE_BIT_FP,
- XSTATE_BIT_SSE,
- XSTATE_BIT_YMM,
- XSTATE_BIT_BNDREGS,
- XSTATE_BIT_BNDCSR,
- XSTATE_BIT_OPMASK,
- XSTATE_BIT_ZMM_Hi256,
- XSTATE_BIT_Hi16_ZMM,
-
- XFEATURES_NR_MAX,
-};
-
-#define XSTATE_FP (1 << XSTATE_BIT_FP)
-#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
-#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
-#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
-#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
-#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
-#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
-#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
-
-#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
-
-bool one_bit(unsigned int x, int bit)
-{
- return !!(x & (1<<bit));
-}
-
-void print_state_component(int state_bit_nr, char *name)
-{
- unsigned int eax, ebx, ecx, edx;
- unsigned int state_component_size;
- unsigned int state_component_supervisor;
- unsigned int state_component_user;
- unsigned int state_component_aligned;
-
- /* See SDM Section 13.2 */
- cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
- assert(eax || ebx || ecx);
- state_component_size = eax;
- state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
- state_component_user = !one_bit(ecx, 0);
- state_component_aligned = one_bit(ecx, 1);
- printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
- name,
- state_component_size, state_component_user,
- state_component_supervisor, state_component_aligned);
-
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
-#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */
-
-bool check_mpx_support(void)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
-
- /* We can't do much without XSAVE, so just make these assert()'s */
- if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
- fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
- exit(0);
- }
-
- if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
- fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
- exit(0);
- }
-
- /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
- /* Is this redundant with the feature bit checks? */
- cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
- if (eax < XSTATE_CPUID) {
- fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
- " can not run MPX tests\n");
- exit(0);
- }
-
- printf("XSAVE is supported by HW & OS\n");
-
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-
- printf("XSAVE processor supported state mask: 0x%x\n", eax);
- printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
-
- /* Make sure that the MPX states are enabled in in XCR0 */
- if ((eax & MPX_XSTATES) != MPX_XSTATES) {
- fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
- exit(0);
- }
-
- /* Make sure the MPX states are supported by XSAVE* */
- if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
- fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
- "can not run MPX tests\n");
- exit(0);
- }
-
- print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
- print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR");
-
- return true;
-}
-
-void enable_mpx(void *l1base)
-{
- /* enable point lookup */
- memset(buffer, 0, sizeof(buffer));
- xrstor_state(xsave_buf, 0x18);
-
- xsave_buf->xsave_hdr.xstate_bv = 0x10;
- xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
- xsave_buf->bndcsr.status_reg = 0;
-
- dprintf2("bf xrstor\n");
- dprintf2("xsave cndcsr: status %jx, configu %jx\n",
- xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
- xrstor_state(xsave_buf, 0x18);
- dprintf2("after xrstor\n");
-
- xsave_state_1(xsave_buf, 0x18);
-
- dprintf1("xsave bndcsr: status %jx, configu %jx\n",
- xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
-}
-
-#include <sys/prctl.h>
-
-struct mpx_bounds_dir *bounds_dir_ptr;
-
-unsigned long __bd_incore(const char *func, int line)
-{
- unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
- return ret;
-}
-#define bd_incore() __bd_incore(__func__, __LINE__)
-
-void check_clear(void *ptr, unsigned long sz)
-{
- unsigned long *i;
-
- for (i = ptr; (void *)i < ptr + sz; i++) {
- if (*i) {
- dprintf1("%p is NOT clear at %p\n", ptr, i);
- assert(0);
- }
- }
- dprintf1("%p is clear for %lx\n", ptr, sz);
-}
-
-void check_clear_bd(void)
-{
- check_clear(bounds_dir_ptr, 2UL << 30);
-}
-
-#define USE_MALLOC_FOR_BOUNDS_DIR 1
-bool process_specific_init(void)
-{
- unsigned long size;
- unsigned long *dir;
- /* Guarantee we have the space to align it, add padding: */
- unsigned long pad = getpagesize();
-
- size = 2UL << 30; /* 2GB */
- if (sizeof(unsigned long) == 4)
- size = 4UL << 20; /* 4MB */
- dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
-
- if (USE_MALLOC_FOR_BOUNDS_DIR) {
- unsigned long _dir;
-
- dir = malloc(size + pad);
- assert(dir);
- _dir = (unsigned long)dir;
- _dir += 0xfffUL;
- _dir &= ~0xfffUL;
- dir = (void *)_dir;
- } else {
- /*
- * This makes debugging easier because the address
- * calculations are simpler:
- */
- dir = mmap((void *)0x200000000000, size + pad,
- PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (dir == (void *)-1) {
- perror("unable to allocate bounds directory");
- abort();
- }
- check_clear(dir, size);
- }
- bounds_dir_ptr = (void *)dir;
- madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
- bd_incore();
- dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
- (char *)bounds_dir_ptr + size);
- check_clear(dir, size);
- enable_mpx(dir);
- check_clear(dir, size);
- if (prctl(43, 0, 0, 0, 0)) {
- printf("no MPX support\n");
- abort();
- return false;
- }
- return true;
-}
-
-bool process_specific_finish(void)
-{
- if (prctl(44)) {
- printf("no MPX support\n");
- return false;
- }
- return true;
-}
-
-void setup_handler()
-{
- int r, rs;
- struct sigaction newact;
- struct sigaction oldact;
-
- /* #BR is mapped to sigsegv */
- int signum = SIGSEGV;
-
- newact.sa_handler = 0; /* void(*)(int)*/
- newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
-
- /*sigset_t - signals to block while in the handler */
- /* get the old signal mask. */
- rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
- assert(rs == 0);
-
- /* call sa_sigaction, not sa_handler*/
- newact.sa_flags = SA_SIGINFO;
-
- newact.sa_restorer = 0; /* void(*)(), obsolete */
- r = sigaction(signum, &newact, &oldact);
- assert(r == 0);
-}
-
-void mpx_prepare(void)
-{
- dprintf2("%s()\n", __func__);
- setup_handler();
- process_specific_init();
-}
-
-void mpx_cleanup(void)
-{
- printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
- process_specific_finish();
-}
-
-/*-------------- the following is test case ---------------*/
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-
-uint64_t num_lower_brs;
-uint64_t num_upper_brs;
-
-#define MPX_CONFIG_OFFSET 1024
-#define MPX_BOUNDS_OFFSET 960
-#define MPX_HEADER_OFFSET 512
-#define MAX_ADDR_TESTED (1<<28)
-#define TEST_ROUNDS 100
-
-/*
- 0F 1A /r BNDLDX-Load
- 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
- 66 0F 1A /r BNDMOV bnd1, bnd2/m128
- 66 0F 1B /r BNDMOV bnd1/m128, bnd2
- F2 0F 1A /r BNDCU bnd, r/m64
- F2 0F 1B /r BNDCN bnd, r/m64
- F3 0F 1A /r BNDCL bnd, r/m64
- F3 0F 1B /r BNDMK bnd, m64
-*/
-
-static __always_inline void xsave_state(void *_fx, uint64_t mask)
-{
- uint32_t lmask = mask;
- uint32_t hmask = mask >> 32;
- unsigned char *fx = _fx;
-
- asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-}
-
-static __always_inline void mpx_clear_bnd0(void)
-{
- long size = 0;
- void *ptr = NULL;
- /* F3 0F 1B /r BNDMK bnd, m64 */
- /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
- asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
- : : "c" (ptr), "d" (size-1)
- : "memory");
-}
-
-static __always_inline void mpx_make_bound_helper(unsigned long ptr,
- unsigned long size)
-{
- /* F3 0F 1B /r BNDMK bnd, m64 */
- /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
- asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
- : : "c" (ptr), "d" (size-1)
- : "memory");
-}
-
-static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
-{
- /* F3 0F 1A /r NDCL bnd, r/m64 */
- /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */
- asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
- : : "c" (ptr)
- : "memory");
-}
-
-static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
-{
- /* F2 0F 1A /r BNDCU bnd, r/m64 */
- /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */
- asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
- : : "c" (ptr)
- : "memory");
-}
-
-static __always_inline void mpx_movbndreg_helper()
-{
- /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
- /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */
-
- asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
-}
-
-static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
-{
- /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
- /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */
- asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
- : : "c" (mem)
- : "memory");
-}
-
-static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
-{
- /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */
- /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */
- asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
- : : "c" (mem)
- : "memory");
-}
-
-static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
- unsigned long ptr_val)
-{
- /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */
- /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */
- asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
- : : "c" (ptr_addr), "d" (ptr_val)
- : "memory");
-}
-
-static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
- unsigned long ptr_val)
-{
- /* 0F 1A /r BNDLDX-Load */
- /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */
- asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
- : : "c" (ptr_addr), "d" (ptr_val)
- : "memory");
-}
-
-void __print_context(void *__print_xsave_buffer, int line)
-{
- uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
- uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
-
- int i;
- eprintf("%s()::%d\n", "print_context", line);
- for (i = 0; i < 4; i++) {
- eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
- (unsigned long)bounds[i*2],
- ~(unsigned long)bounds[i*2+1],
- (unsigned long)bounds[i*2+1]);
- }
-
- eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]);
-}
-#define print_context(x) __print_context(x, __LINE__)
-#ifdef DEBUG
-#define dprint_context(x) print_context(x)
-#else
-#define dprint_context(x) do{}while(0)
-#endif
-
-void init()
-{
- int i;
-
- srand((unsigned int)time(NULL));
-
- for (i = 0; i < 4; i++) {
- shadow_plb[i][0] = 0;
- shadow_plb[i][1] = ~(unsigned long)0;
- }
-}
-
-long int __mpx_random(int line)
-{
-#ifdef NOT_SO_RANDOM
- static long fake = 722122311;
- fake += 563792075;
- return fakse;
-#else
- return random();
-#endif
-}
-#define mpx_random() __mpx_random(__LINE__)
-
-uint8_t *get_random_addr()
-{
- uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
- return (addr - (unsigned long)addr % sizeof(uint8_t *));
-}
-
-static inline bool compare_context(void *__xsave_buffer)
-{
- uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
-
- int i;
- for (i = 0; i < 4; i++) {
- dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
- i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
- i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]);
- if ((shadow_plb[i][0] != bounds[i*2]) ||
- (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
- eprintf("ERROR comparing shadow to real bound register %d\n", i);
- eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
- (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
- (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
- return false;
- }
- }
-
- return true;
-}
-
-void mkbnd_shadow(uint8_t *ptr, int index, long offset)
-{
- uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
- uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
- *lower = (unsigned long)ptr;
- *upper = (unsigned long)ptr + offset - 1;
-}
-
-void check_lowerbound_shadow(uint8_t *ptr, int index)
-{
- uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
- if (*lower > (uint64_t)(unsigned long)ptr)
- num_lower_brs++;
- else
- dprintf1("LowerBoundChk passed:%p\n", ptr);
-}
-
-void check_upperbound_shadow(uint8_t *ptr, int index)
-{
- uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
- if (upper < (uint64_t)(unsigned long)ptr)
- num_upper_brs++;
- else
- dprintf1("UpperBoundChk passed:%p\n", ptr);
-}
-
-__always_inline void movbndreg_shadow(int src, int dest)
-{
- shadow_plb[dest][0] = shadow_plb[src][0];
- shadow_plb[dest][1] = shadow_plb[src][1];
-}
-
-__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
-{
- unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
- unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
- *dest = *lower;
- *(dest+1) = *upper;
-}
-
-__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
-{
- unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
- unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
- *lower = *src;
- *upper = *(src+1);
-}
-
-__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
-{
- shadow_map[0] = (unsigned long)shadow_plb[index][0];
- shadow_map[1] = (unsigned long)shadow_plb[index][1];
- shadow_map[2] = (unsigned long)ptr_val;
- dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
- index, ptr, ptr_val, ptr_val);
- /*ptr ignored */
-}
-
-void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
-{
- uint64_t lower = shadow_map[0];
- uint64_t upper = shadow_map[1];
- uint8_t *value = (uint8_t *)shadow_map[2];
-
- if (value != ptr_val) {
- dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
- "because %p != %p\n", __func__, index, ptr,
- ptr_val, index, value, ptr_val);
- shadow_plb[index][0] = 0;
- shadow_plb[index][1] = ~(unsigned long)0;
- } else {
- shadow_plb[index][0] = lower;
- shadow_plb[index][1] = upper;
- }
- /* ptr ignored */
-}
-
-static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
-{
- mpx_make_bound_helper((unsigned long)ptr, 0x1800);
-}
-
-static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
-{
- mkbnd_shadow(ptr, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
-{
- /* these are hard-coded to check bnd0 */
- expected_bnd_index = 0;
- mpx_check_lowerbound_helper((unsigned long)(ptr-1));
- mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
- /* reset this since we do not expect any more bounds exceptions */
- expected_bnd_index = -1;
-}
-
-static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
-{
- check_lowerbound_shadow(ptr-1, 0);
- check_upperbound_shadow(ptr+0x1800, 0);
-}
-
-static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
-{
- mpx_make_bound_helper((unsigned long)ptr, 0x1800);
- mpx_movbndreg_helper();
- mpx_movbnd2mem_helper(buf);
- mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
-}
-
-static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
-{
- mkbnd_shadow(ptr, 0, 0x1800);
- movbndreg_shadow(0, 2);
- movbnd2mem_shadow(0, (unsigned long *)buf);
- mkbnd_shadow(ptr+0x12, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
-{
- mpx_movbnd_from_mem_helper(buf);
-}
-
-static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
-{
- movbnd_from_mem_shadow((unsigned long *)buf, 0);
-}
-
-static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
-{
- mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
- mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
-}
-
-static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
-{
- stdsc_shadow(0, buf, ptr);
- mkbnd_shadow(ptr+0x12, 0, 0x1800);
-}
-
-static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
-{
- mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
-}
-
-static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
-{
- lddsc_shadow(0, buf, ptr);
-}
-
-#define NR_MPX_TEST_FUNCTIONS 6
-
-/*
- * For compatibility reasons, MPX will clear the bounds registers
- * when you make function calls (among other things). We have to
- * preserve the registers in between calls to the "helpers" since
- * they build on each other.
- *
- * Be very careful not to make any function calls inside the
- * helpers, or anywhere else beween the xrstor and xsave.
- */
-#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \
- xrstor_state(xsave_test_buf, flags); \
- mpx_test_helper##helper_nr(buf, ptr); \
- xsave_state(xsave_test_buf, flags); \
- mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \
-} while (0)
-
-static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
-{
- uint64_t flags = 0x18;
-
- dprint_context(xsave_test_buf);
- switch (nr) {
- case 0:
- run_helper(0, buf, buf_shadow, ptr);
- break;
- case 1:
- run_helper(1, buf, buf_shadow, ptr);
- break;
- case 2:
- run_helper(2, buf, buf_shadow, ptr);
- break;
- case 3:
- run_helper(3, buf, buf_shadow, ptr);
- break;
- case 4:
- run_helper(4, buf, buf_shadow, ptr);
- break;
- case 5:
- run_helper(5, buf, buf_shadow, ptr);
- break;
- default:
- test_failed();
- break;
- }
- dprint_context(xsave_test_buf);
-}
-
-unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
-extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
-
-long cover_buf_with_bt_entries(void *buf, long buf_len)
-{
- int i;
- long nr_to_fill;
- int ratio = 1000;
- unsigned long buf_len_in_ptrs;
-
- /* Fill about 1/100 of the space with bt entries */
- nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
-
- if (!nr_to_fill)
- dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
-
- /* Align the buffer to pointer size */
- while (((unsigned long)buf) % sizeof(void *)) {
- buf++;
- buf_len--;
- }
- /* We are storing pointers, so make */
- buf_len_in_ptrs = buf_len / sizeof(void *);
-
- for (i = 0; i < nr_to_fill; i++) {
- long index = (mpx_random() % buf_len_in_ptrs);
- void *ptr = buf + index * sizeof(unsigned long);
- unsigned long ptr_addr = (unsigned long)ptr;
-
- /* ptr and size can be anything */
- mpx_make_bound_helper((unsigned long)ptr, 8);
-
- /*
- * take bnd0 and put it in to bounds tables "buf + index" is an
- * address inside the buffer where we are pretending that we
- * are going to put a pointer We do not, though because we will
- * never load entries from the table, so it doesn't matter.
- */
- mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
- dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
- ptr_addr, buf);
- }
- return nr_to_fill;
-}
-
-unsigned long align_down(unsigned long alignme, unsigned long align_to)
-{
- return alignme & ~(align_to-1);
-}
-
-unsigned long align_up(unsigned long alignme, unsigned long align_to)
-{
- return (alignme + align_to - 1) & ~(align_to-1);
-}
-
-/*
- * Using 1MB alignment guarantees that each no allocation
- * will overlap with another's bounds tables.
- *
- * We have to cook our own allocator here. malloc() can
- * mix other allocation with ours which means that even
- * if we free all of our allocations, there might still
- * be bounds tables for the *areas* since there is other
- * valid memory there.
- *
- * We also can't use malloc() because a free() of an area
- * might not free it back to the kernel. We want it
- * completely unmapped an malloc() does not guarantee
- * that.
- */
-#ifdef __i386__
-long alignment = 4096;
-long sz_alignment = 4096;
-#else
-long alignment = 1 * MB;
-long sz_alignment = 1 * MB;
-#endif
-void *mpx_mini_alloc(unsigned long sz)
-{
- unsigned long long tries = 0;
- static void *last;
- void *ptr;
- void *try_at;
-
- sz = align_up(sz, sz_alignment);
-
- try_at = last + alignment;
- while (1) {
- ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (ptr == (void *)-1)
- return NULL;
- if (ptr == try_at)
- break;
-
- munmap(ptr, sz);
- try_at += alignment;
-#ifdef __i386__
- /*
- * This isn't quite correct for 32-bit binaries
- * on 64-bit kernels since they can use the
- * entire 32-bit address space, but it's close
- * enough.
- */
- if (try_at > (void *)0xC0000000)
-#else
- if (try_at > (void *)0x0000800000000000)
-#endif
- try_at = (void *)0x0;
- if (!(++tries % 10000))
- dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
- continue;
- }
- last = ptr;
- dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
- return ptr;
-}
-void mpx_mini_free(void *ptr, long sz)
-{
- dprintf2("%s() ptr: %p\n", __func__, ptr);
- if ((unsigned long)ptr > 0x100000000000) {
- dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
- test_failed();
- }
- sz = align_up(sz, sz_alignment);
- dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
- munmap(ptr, sz);
- dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
-}
-
-#define NR_MALLOCS 100
-struct one_malloc {
- char *ptr;
- int nr_filled_btes;
- unsigned long size;
-};
-struct one_malloc mallocs[NR_MALLOCS];
-
-void free_one_malloc(int index)
-{
- unsigned long free_ptr;
- unsigned long mask;
-
- if (!mallocs[index].ptr)
- return;
-
- mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
- dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr);
-
- free_ptr = (unsigned long)mallocs[index].ptr;
- mask = alignment-1;
- dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
- (free_ptr & mask), mask);
- assert((free_ptr & mask) == 0);
-
- mallocs[index].ptr = NULL;
-}
-
-#ifdef __i386__
-#define MPX_BOUNDS_TABLE_COVERS 4096
-#else
-#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
-#endif
-void zap_everything(void)
-{
- long after_zap;
- long before_zap;
- int i;
-
- before_zap = inspect_me(bounds_dir_ptr);
- dprintf1("zapping everything start: %ld\n", before_zap);
- for (i = 0; i < NR_MALLOCS; i++)
- free_one_malloc(i);
-
- after_zap = inspect_me(bounds_dir_ptr);
- dprintf1("zapping everything done: %ld\n", after_zap);
- /*
- * We only guarantee to empty the thing out if our allocations are
- * exactly aligned on the boundaries of a boudns table.
- */
- if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
- (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
- if (after_zap != 0)
- test_failed();
-
- assert(after_zap == 0);
- }
-}
-
-void do_one_malloc(void)
-{
- static int malloc_counter;
- long sz;
- int rand_index = (mpx_random() % NR_MALLOCS);
- void *ptr = mallocs[rand_index].ptr;
-
- dprintf3("%s() enter\n", __func__);
-
- if (ptr) {
- dprintf3("freeing one malloc at index: %d\n", rand_index);
- free_one_malloc(rand_index);
- if (mpx_random() % (NR_MALLOCS*3) == 3) {
- int i;
- dprintf3("zapping some more\n");
- for (i = rand_index; i < NR_MALLOCS; i++)
- free_one_malloc(i);
- }
- if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
- zap_everything();
- }
-
- /* 1->~1M */
- sz = (1 + mpx_random() % 1000) * 1000;
- ptr = mpx_mini_alloc(sz);
- if (!ptr) {
- /*
- * If we are failing allocations, just assume we
- * are out of memory and zap everything.
- */
- dprintf3("zapping everything because out of memory\n");
- zap_everything();
- goto out;
- }
-
- dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
- mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
- mallocs[rand_index].ptr = ptr;
- mallocs[rand_index].size = sz;
-out:
- if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
- inspect_me(bounds_dir_ptr);
-}
-
-void run_timed_test(void (*test_func)(void))
-{
- int done = 0;
- long iteration = 0;
- static time_t last_print;
- time_t now;
- time_t start;
-
- time(&start);
- while (!done) {
- time(&now);
- if ((now - start) > TEST_DURATION_SECS)
- done = 1;
-
- test_func();
- iteration++;
-
- if ((now - last_print > 1) || done) {
- printf("iteration %ld complete, OK so far\n", iteration);
- last_print = now;
- }
- }
-}
-
-void check_bounds_table_frees(void)
-{
- printf("executing unmaptest\n");
- inspect_me(bounds_dir_ptr);
- run_timed_test(&do_one_malloc);
- printf("done with malloc() fun\n");
-}
-
-void insn_test_failed(int test_nr, int test_round, void *buf,
- void *buf_shadow, void *ptr)
-{
- print_context(xsave_test_buf);
- eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
- while (test_nr == 5) {
- struct mpx_bt_entry *bte;
- struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
- struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
-
- printf(" bd: %p\n", bd);
- printf("&bde: %p\n", bde);
- printf("*bde: %lx\n", *(unsigned long *)bde);
- if (!bd_entry_valid(bde))
- break;
-
- bte = mpx_vaddr_to_bt_entry(buf, bd);
- printf(" te: %p\n", bte);
- printf("bte[0]: %lx\n", bte->contents[0]);
- printf("bte[1]: %lx\n", bte->contents[1]);
- printf("bte[2]: %lx\n", bte->contents[2]);
- printf("bte[3]: %lx\n", bte->contents[3]);
- break;
- }
- test_failed();
-}
-
-void check_mpx_insns_and_tables(void)
-{
- int successes = 0;
- int failures = 0;
- int buf_size = (1024*1024);
- unsigned long *buf = malloc(buf_size);
- const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
- int i, j;
-
- memset(buf, 0, buf_size);
- memset(buf_shadow, 0, sizeof(buf_shadow));
-
- for (i = 0; i < TEST_ROUNDS; i++) {
- uint8_t *ptr = get_random_addr() + 8;
-
- for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
- if (0 && j != 5) {
- successes++;
- continue;
- }
- dprintf2("starting test %d round %d\n", j, i);
- dprint_context(xsave_test_buf);
- /*
- * test5 loads an address from the bounds tables.
- * The load will only complete if 'ptr' matches
- * the load and the store, so with random addrs,
- * the odds of this are very small. Make it
- * higher by only moving 'ptr' 1/10 times.
- */
- if (random() % 10 <= 0)
- ptr = get_random_addr() + 8;
- dprintf3("random ptr{%p}\n", ptr);
- dprint_context(xsave_test_buf);
- run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
- dprint_context(xsave_test_buf);
- if (!compare_context(xsave_test_buf)) {
- insn_test_failed(j, i, buf, buf_shadow, ptr);
- failures++;
- goto exit;
- }
- successes++;
- dprint_context(xsave_test_buf);
- dprintf2("finished test %d round %d\n", j, i);
- dprintf3("\n");
- dprint_context(xsave_test_buf);
- }
- }
-
-exit:
- dprintf2("\nabout to free:\n");
- free(buf);
- dprintf1("successes: %d\n", successes);
- dprintf1(" failures: %d\n", failures);
- dprintf1(" tests: %d\n", total_nr_tests);
- dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
- dprintf1(" saw: %d #BRs\n", br_count);
- if (failures) {
- eprintf("ERROR: non-zero number of failures\n");
- exit(20);
- }
- if (successes != total_nr_tests) {
- eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
- successes, total_nr_tests);
- exit(21);
- }
- if (num_upper_brs + num_lower_brs != br_count) {
- eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
- num_upper_brs, num_lower_brs, br_count);
- eprintf("successes: %d\n", successes);
- eprintf(" failures: %d\n", failures);
- eprintf(" tests: %d\n", total_nr_tests);
- eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
- eprintf(" saw: %d #BRs\n", br_count);
- exit(22);
- }
-}
-
-/*
- * This is supposed to SIGSEGV nicely once the kernel
- * can no longer allocate vaddr space.
- */
-void exhaust_vaddr_space(void)
-{
- unsigned long ptr;
- /* Try to make sure there is no room for a bounds table anywhere */
- unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
-#ifdef __i386__
- unsigned long max_vaddr = 0xf7788000UL;
-#else
- unsigned long max_vaddr = 0x800000000000UL;
-#endif
-
- dprintf1("%s() start\n", __func__);
- /* do not start at 0, we aren't allowed to map there */
- for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
- void *ptr_ret;
- int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
-
- if (!ret) {
- dprintf1("madvise() %lx ret: %d\n", ptr, ret);
- continue;
- }
- ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (ptr_ret != (void *)ptr) {
- perror("mmap");
- dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
- break;
- }
- if (!(ptr & 0xffffff))
- dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
- }
- for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
- dprintf2("covering 0x%lx with bounds table entries\n", ptr);
- cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
- }
- dprintf1("%s() end\n", __func__);
- printf("done with vaddr space fun\n");
-}
-
-void mpx_table_test(void)
-{
- printf("starting mpx bounds table test\n");
- run_timed_test(check_mpx_insns_and_tables);
- printf("done with mpx bounds table test\n");
-}
-
-int main(int argc, char **argv)
-{
- int unmaptest = 0;
- int vaddrexhaust = 0;
- int tabletest = 0;
- int i;
-
- check_mpx_support();
- mpx_prepare();
- srandom(11179);
-
- bd_incore();
- init();
- bd_incore();
-
- trace_me();
-
- xsave_state((void *)xsave_test_buf, 0x1f);
- if (!compare_context(xsave_test_buf))
- printf("Init failed\n");
-
- for (i = 1; i < argc; i++) {
- if (!strcmp(argv[i], "unmaptest"))
- unmaptest = 1;
- if (!strcmp(argv[i], "vaddrexhaust"))
- vaddrexhaust = 1;
- if (!strcmp(argv[i], "tabletest"))
- tabletest = 1;
- }
- if (!(unmaptest || vaddrexhaust || tabletest)) {
- unmaptest = 1;
- /* vaddrexhaust = 1; */
- tabletest = 1;
- }
- if (unmaptest)
- check_bounds_table_frees();
- if (tabletest)
- mpx_table_test();
- if (vaddrexhaust)
- exhaust_vaddr_space();
- printf("%s completed successfully\n", argv[0]);
- exit(0);
-}
-
-#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
deleted file mode 100644
index 6dbdd66b824240..00000000000000
--- a/tools/testing/selftests/x86/mpx-mm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MPX_MM_H
-#define _MPX_MM_H
-
-#define PAGE_SIZE 4096
-#define MB (1UL<<20)
-
-extern long nr_incore(void *ptr, unsigned long size_bytes);
-
-#endif /* _MPX_MM_H */